Python DataSet.from_df Examples, summit.utils.dataset.DataSet.from_df Python Examples

Example #1

0

Show file

File: test_benchmarks.py Project: sustainable-processes/summit

def test_reizman_emulator(show_plots=False):
    b = get_pretrained_reizman_suzuki_emulator(case=1)
    b.parity_plot(include_test=True)
    if show_plots:
        plt.show()
    columns = [v.name for v in b.domain.variables]
    values = {
        "catalyst": ["P1-L3"],
        "t_res": [600],
        "temperature": [30],
        "catalyst_loading": [0.498],
    }
    conditions = pd.DataFrame(values)
    conditions = DataSet.from_df(conditions)
    results = b.run_experiments(conditions, return_std=True)

    for name, value in values.items():
        if type(value[0]) == str:
            assert str(results[name].iloc[0]) == value[0]
        else:
            assert float(results[name].iloc[0]) == value[0]
    assert np.isclose(float(results["yld"]), 0.6, atol=15)
    assert np.isclose(float(results["ton"]), 1.1, atol=15)

    # Test serialization
    d = b.to_dict()
    exp = ReizmanSuzukiEmulator.from_dict(d)
    return results

Example #2

0

Show file

File: test_benchmarks.py Project: sustainable-processes/summit

def test_baumgartner_CC_emulator(use_descriptors,
                                 include_cost,
                                 show_plots=False):
    """ Test the Baumgartner Cross Coupling emulator"""
    b = get_pretrained_baumgartner_cc_emulator(use_descriptors=use_descriptors,
                                               include_cost=include_cost)
    b.parity_plot(include_test=True)
    if show_plots:
        plt.show()
    columns = [v.name for v in b.domain.variables]
    values = {
        "catalyst": ["tBuXPhos"],
        "base": ["DBU"],
        "t_res": [328.717801570892],
        "temperature": [30],
        "base_equivalents": [2.18301549894049],
    }
    conditions = pd.DataFrame(values)
    conditions = DataSet.from_df(conditions)
    results = b.run_experiments(conditions, return_std=True)

    assert str(results["catalyst"].iloc[0]) == values["catalyst"][0]
    assert str(results["base"].iloc[0]) == values["base"][0]
    assert float(results["t_res"]) == values["t_res"][0]
    assert float(results["temperature"]) == values["temperature"][0]
    assert np.isclose(float(results["yld"]), 0.042832638, atol=0.15)

    # Test serialization
    d = b.to_dict()
    exp = BaumgartnerCrossCouplingEmulator.from_dict(d)
    return results

Example #3

0

Show file

File: base.py Project: sustainable-processes/summit

    def to_dataset(self) -> DataSet:
        """Get design as a pandas dataframe
        Returns
        -------
        ds: summit.utils.dataset.Dataset
        """
        df = pd.DataFrame([])
        for i, variable in enumerate(self._domain.input_variables):
            if isinstance(variable, ContinuousVariable):
                values = self.get_values(variable.name)[:, 0]
            elif isinstance(variable, CategoricalVariable):
                values = [
                    variable.levels[i]
                    for i in self.get_indices(variable.name)[:, 0]
                ]
            df.insert(i, variable.name, values)

        return DataSet.from_df(df)

Example #4

0

Show file

    def to_dataset(self) -> DataSet:
        """Get design as a pandas dataframe
        Returns
        -------
        ds: summit.utils.dataset.Dataset
        """
        df = pd.DataFrame([])
        i = 0
        for variable in self._domain.variables:
            if variable.is_objective or variable.name in self.exclude:
                continue
            elif isinstance(variable, ContinuousVariable):
                values = self.get_values(variable.name)[:, 0]
            elif isinstance(variable, CategoricalVariable):
                values = [
                    variable.levels[i]
                    for i in self.get_indices(variable.name)[:, 0]
                ]
            df.insert(i, variable.name, values)
            i += 1

        return DataSet.from_df(df)

Example #5

0

Show file

    def suggest_experiments(self,
                            num_experiments,
                            criterion="center",
                            exclude=[],
                            **kwargs) -> DataSet:
        """Generate latin hypercube intial design

        Parameters
        ----------
        num_experiments: int
            The number of experiments (i.e., samples) to generate
        criterion: str, optional
            The criterion used for the LHS.  Allowable values are "center" or "c", "maximin" or "m",
            "centermaximin" or "cm", and "correlation" or "corr". Default is center.
        exclude: array like, optional
            List of variable names that should be excluded from the design. Default is None.

        Returns
        -------
        next_experiments : :class:`~summit.utils.data.DataSet`
            A Dataset object with the suggested experiments
        """
        # design = Design(self.domain, num_experiments, "Latin design", exclude=exclude)
        design = pd.DataFrame()

        # Instantiate the random design class to be used with categorical variables with no descriptors
        rdesigner = Random(self.domain, random_state=self._rstate)

        # Get categorical variables without descriptors
        categoricals = []
        for v in self.domain.input_variables:
            if isinstance(v, CategoricalVariable):
                if v.ds is None:
                    categoricals.append(v.name)

        # Sampling
        n = self.domain.num_continuous_dimensions(include_descriptors=True)
        if len(categoricals) < n:
            samples = lhs(
                n,
                samples=num_experiments,
                criterion=criterion,
                random_state=self._rstate,
            )
        else:
            raise ValueError("Need sufficient number of variables")

        k = 0
        columns = []
        for variable in self.domain.input_variables:
            if variable.name in exclude:
                continue

            # For continuous variables, use samples directly
            if isinstance(variable, ContinuousVariable):
                b = variable.lower_bound * np.ones(num_experiments)
                values = b + samples[:, k] * (variable.upper_bound -
                                              variable.lower_bound)
                design.insert(design.shape[1], variable.name, values)
                k += 1

            # For categorical variable with no descriptors, randomly choose
            elif (isinstance(variable, CategoricalVariable)
                  and variable.name in categoricals):
                indices, values = rdesigner._random_categorical(
                    variable, num_experiments)
                design.insert(design.shape[1], variable.name, values)

            # For categorical variable with descriptors, look in descriptors space
            # The untransform method at the end should find the closest point by euclidean distance.
            elif isinstance(variable,
                            CategoricalVariable) and variable.ds is not None:
                num_descriptors = variable.num_descriptors
                values = samples[:, k:k + num_descriptors]

                # Scaling
                var_min = (variable.ds.loc[:, variable.ds.data_columns].min(
                    axis=0).to_numpy())
                var_min = np.atleast_2d(var_min)
                var_max = (variable.ds.loc[:, variable.ds.data_columns].max(
                    axis=0).to_numpy())
                var_max = np.atleast_2d(var_max)
                var_range = var_max - var_min

                # Rescale
                values_scaled = var_min + values * var_range
                values = values_scaled
                values.shape = (num_experiments, num_descriptors)
                k += num_descriptors

                # Add each descriptors
                names = variable.ds.columns.levels[0].to_list()
                for i in range(num_descriptors):
                    design.insert(design.shape[1], names[i], values_scaled[:,
                                                                           i])
            else:
                raise DomainError(
                    f"Variable {variable} is not one of the possible variable types (continuous or categorical)."
                )

            # design.add_variable(variable.name, values, indices=indices)
        design = DataSet.from_df(design)
        design[("strategy", "METADATA")] = "LHS"
        return self.transform.un_transform(design, transform_descriptors=True)

Example #6

0

Show file

File: deep_reaction_optimizer.py Project: sustainable-processes/summit

    def main(self, num_input=3, prev_res=None, prev_param=None):
        import chemopt
        from chemopt.logger import get_handlers

        x0, y0 = prev_res[0], prev_res[1]
        module_path = os.path.dirname(chemopt.__file__)
        if self._pretrained_model_config_path:
            path = self._pretrained_model_config_path
        else:
            path = osp.join(
                module_path,
                "config_" + str(num_input) + "_inputs_" +
                str(self._model_size) + ".json",
            )
        config_file = open(path)
        config = json.load(config_file,
                           object_hook=lambda d: namedtuple("x", d.keys())
                           (*d.values()))
        saved_model_path = osp.join(os.path.dirname(os.path.realpath(path)),
                                    str(config.save_path))
        if prev_param:
            if prev_param["iteration"] > config.unroll_length:
                raise ValueError(
                    "Number of iterations exceeds unroll length of the pretrained model!"
                )

        logging.basicConfig(level=logging.WARNING, handlers=get_handlers())
        logger = logging.getLogger()

        cell = chemopt.rnn.StochasticRNNCell(
            cell=chemopt.rnn.LSTM,
            kwargs={"hidden_size": config.hidden_size},
            nlayers=config.num_layers,
            reuse=config.reuse,
        )
        optimizer = self.StepOptimizer(
            cell=cell,
            ndim=config.num_params,
            nsteps=config.num_steps,
            ckpt_path=saved_model_path,
            infer_model_path=self._infer_model_path,
            logger=logger,
            constraints=True,
            x=x0,
            y=y0,
        )
        x, state = optimizer.run(prev_res=y0, prev_param=prev_param)

        real_x = self.x_convert(x)
        next_experiments = {}
        i_inp = 0
        for v in self.domain.variables:
            if not v.is_objective:
                next_experiments[v.name] = [real_x[i_inp]]
                i_inp += 1
        next_experiments = DataSet.from_df(pd.DataFrame(data=next_experiments))
        next_experiments[("strategy", "METADATA")] = ["DRO"]

        param = {}
        if not y0:
            y0 = np.array([[float("inf")]])
            param["iteration"] = 0
        else:
            param["iteration"] = prev_param["iteration"] + 1
        if not prev_param:
            self.fbest = y0[0]
            self.xbest = real_x
        elif y0 < prev_param["fbest"]:
            self.fbest = y0[0]
            self.xbest = real_x
        else:
            self.fbest = prev_param["fbest"]
            self.xbest = prev_param["xbest"]

        param.update({
            "state": state,
            "last_requested_point": x,
            "xbest": self.xbest,
            "fbest": self.fbest,
        })

        tf.reset_default_graph()

        return next_experiments, param

Example #7

0

Show file

File: experimental_emulator.py Project: jduerholt/summit

 def _run(self, conditions, **kwargs):
     condition = DataSet.from_df(conditions.to_frame().T)
     infer_dict = self.emulator.infer_model(dataset=condition)
     for k, v in infer_dict.items():
         conditions[(k, "DATA")] = v
     return conditions, None

Example #8

0

Show file

    def _inner_suggest_experiments(self,
                                   prev_res: DataSet = None,
                                   prev_param=None):
        """Inner loop for suggestion of experiments using Nelder-Mead Simplex method

        Parameters
        ----------
        prev_res: summit.utils.data.DataSet, optional
            Dataset with data from previous experiments.
            If no data is passed, the Nelder-Mead optimization algorithm
            will be initialized and suggest initial experiments.
        prev_param:
            Parameters of Nelder-Mead algorithm from previous
            iterations of a optimization problem.
            If no data is passed, the Nelder-Mead optimization algorithm
            will be initialized.

        """

        # intern
        stay_inner = False

        # Get bounds of input variables
        bounds = []
        input_var_names = []
        output_var_names = []
        for v in self.domain.variables:
            if not v.is_objective:
                if isinstance(v, ContinuousVariable):
                    bounds.append(v.bounds)
                    input_var_names.append(v.name)
                elif isinstance(v, CategoricalVariable):
                    if v.ds is not None:
                        descriptor_names = v.ds.data_columns
                        descriptors = np.asarray([
                            v.ds.loc[:, [l]].values.tolist()
                            for l in v.ds.data_columns
                        ])
                    else:
                        raise ValueError("No descriptors given for {}".format(
                            v.name))
                    for d in descriptors:
                        bounds.append(
                            [np.min(np.asarray(d)),
                             np.max(np.asarray(d))])
                    input_var_names.extend(descriptor_names)
                else:
                    raise TypeError(
                        "Nelder-Mead can not handle variable type: {}".format(
                            v.type))
            else:
                output_var_names.extend(v.name)
        bounds = np.asarray(bounds, dtype=float)

        # Extract dimension of input domain
        dim = len(bounds[:, 0])

        # Initialization
        initial_run = True
        x0 = [self._x_start]
        y0 = []

        # Get previous results
        if prev_res is not None:
            initial_run = False
            inputs, outputs = self.transform.transform_inputs_outputs(
                prev_res, transform_descriptors=True)

            # Set up maximization and minimization
            for v in self.domain.variables:
                if v.is_objective and v.maximize:
                    outputs[v.name] = -1 * outputs[v.name]

            x0 = inputs.data_to_numpy()
            y0 = outputs.data_to_numpy()

        elif prev_param is not None:
            raise ValueError(
                "Parameter from previous optimization iteration are given but previous results are "
                "missing!")

        # if no previous results are given initialize center point as geometrical middle point of bounds
        if len(x0[0]) == 0 and not self.random_start:
            x0 = np.ones(
                (1, len(bounds))) * 0.5 * ((bounds[:, 1] + bounds[:, 0]).T)
        elif len(x0[0]) == 0 and self.random_start:
            weight = np.random.rand()
            x0 = np.ones(
                (1, len(bounds))) * (weight * (bounds[:, 1] +
                                               (1 - weight) * bounds[:, 0]).T)
        """ Set Nelder-Mead parameters, i.e., initialize or include data from previous iterations
            --------
            prev_sim: array-like
                variable coordinates (points) of simplex from previous run
            prev_fsim: array-like
                function values corresponding to points of simplex from previous run
            x_iter: array-like
                variable coordinates and corresponding function values of potential new 
                simplex points determined in one iteration of the NMS algorithm; note that 
                within one iteration multiple points need to be evaluated; that's why we have
                to store the points of an unfinished iteration (start iteration -> request point
                -> run experiment -> restart same iteration with results of experiment 
                -> request point -> run experiment ... -> finish iteration)
            red_dim: boolean
                True if dimension was reduced in one of the previous iteration and has not been recovered yet
            red_sim: array-like
                variable coordinates (points) of simplex before dimension was reduced
            red_fsim: array-like
                function values of points corresponding to simplex before dimension was reduced
            rec_dim: boolean
                True if dimension was revocered in last iteration
            memory: array-like
                list of all points for which the function was evaluated
        """

        prev_sim, prev_fsim, x_iter, red_dim, red_sim, red_fsim, rec_dim, memory = (
            None,
            None,
            None,
            None,
            None,
            None,
            None,
            [np.ones(dim) * float("inf")],
        )

        # if this is not the first iteration of the Nelder-Mead algorithm, get parameters from previous iteration
        if prev_param:
            prev_sim = prev_param["sim"]
            red_dim = prev_param["red_dim"]
            red_sim = prev_param["red_sim"]
            red_fsim = prev_param["red_fsim"]
            rec_dim = prev_param["rec_dim"]
            memory = prev_param["memory"]

            # if dimension was recovered in last iteration, N functions evaluations were requested
            # that need to be assigned to the respective points in the simplex
            if rec_dim:
                prev_fsim = prev_param["fsim"]
                for k in range(len(x0)):
                    for s in range(len(prev_sim)):
                        if np.array_equal(prev_sim[s], x0[k]):
                            prev_fsim[s] = y0[k]
                rec_dim = False
            # assign function values to respective points
            elif prev_param["fsim"] is not None:
                prev_fsim = prev_param["fsim"]
                x_iter = prev_param["x_iter"]
                for key, value in x_iter.items():
                    if value is not None:
                        if key == "x_shrink":
                            for k in range(len(x0)):
                                for j in range(len(value)):
                                    if np.array_equal(value[j][0],
                                                      np.asarray(x0[k])):
                                        x_iter[key][j][1] = y0[k]
                        else:
                            for k in range(len(x0)):
                                if np.array_equal(value[0], np.asarray(x0[k])):
                                    x_iter[key][1] = y0[k]
                                    break
            else:
                prev_fsim = y0
        # initialize with given simplex points (including function evaluations) for initialization
        elif prev_res is not None:
            prev_sim = x0
            prev_fsim = y0
            for p in x0.astype(float).tolist():
                memory.append(p)

        # Run Nelder-Mead Simplex algorithm for one iteration
        overfull_simplex = False
        if not red_dim:
            request, sim, fsim, x_iter = self._minimize_neldermead(
                x0=x0[0],
                bounds=bounds,
                x_iter=x_iter,
                f=prev_fsim,
                sim=prev_sim,
                adaptive=self._adaptive,
            )
            if not initial_run:
                (
                    overfull_simplex,
                    prev_sim,
                    prev_fsim,
                    red_sim,
                    red_fsim,
                    overfull_dim,
                ) = self.check_overfull(request, sim, fsim, bounds)

        ## Reduce dimension if n+1 points are located in n-1 dimensions (if either red_dim = True, i.e.,
        # optimization in the reduced dimension space was not finished in the last iteration, or overfull_simplex, i.e.,
        # last Nelder-Mead call (with red_dim = False) lead to an overfull simplex).
        ## Note that in order to not loose any information, the simplex without dimension reduction is returned even
        # if the optimization in the reduced dimension space is not finished.
        ## If the optimization in the reduced dimension space was not finished in the last iteration (red_dim = True),
        # the simplex will automatically be reduced again.
        if red_dim or overfull_simplex:
            # prepare dimension reduction
            if red_dim:
                x_iter, overfull_dim = self.upstream_simplex_dim_red(
                    prev_sim, x_iter)
            else:
                x_iter = None

            # save value of dimension reduced
            save_dim = prev_sim[0][overfull_dim]
            # delete overfull dimension
            new_prev_sim = np.delete(prev_sim, overfull_dim, 1)
            # delete bounds for overfull dimension
            new_bounds = np.delete(bounds, overfull_dim, 0)

            # Run one iteration of Nelder-Mead Simplex algorithm for reduced simplex
            request, sim, fsim, x_iter = self._minimize_neldermead(
                x0=new_prev_sim[0],
                x_iter=x_iter,
                bounds=new_bounds,
                f=prev_fsim,
                sim=new_prev_sim,
                adaptive=self._adaptive,
            )

            overfull_simplex, _, _, _, _, _ = self.check_overfull(
                request, sim, fsim, bounds)
            if overfull_simplex:
                raise NotImplementedError(
                    "Recursive dimension reduction not implemented yet.")

            # recover dimension after Nelder-Mead Simplex run (to return full request for experiment)
            request = np.insert(request, overfull_dim, save_dim, 1)
            sim = np.insert(sim, overfull_dim, save_dim, 1)

            # follow-up of dimension reduction
            x_iter = self.downstream_simplex_dim_red(x_iter, overfull_dim,
                                                     save_dim)

            red_dim = True

        # if not overfull and no reduced dimension from previous iteration
        else:
            red_dim = False

        # Circle (suggested point that already has been investigated)
        if any(np.array([np.array(memory == x).all(1).any()
                         for x in request])):
            ## if dimension is reduced and requested point has already been evaluated, recover dimension with
            # reflected and translated simplex before dimension reduction
            if red_dim:
                sim, fsim, request = self.recover_simplex_dim(
                    sim, red_sim, red_fsim, overfull_dim, bounds, memory,
                    self._dx)
                red_dim = False
                rec_dim = True
            # raise error
            else:
                stay_inner = True
                # raise NotImplementedError("Circle - point has already been investigated.")

        ## Only little changes in requested points, xatol = tolerance for changes in x,
        # or in function values, fatol = tolerance for changes in f
        ## TODO: add extra threshold to stop reduced dimension problem and recover dimension
        if not initial_run:
            xatol = (bounds[:, 1] - bounds[:, 0]) * self._dx
            fatol = self._df
            if (np.max(np.abs(sim[1:] - sim[0]), 0) <= xatol).all() or (np.max(
                    np.abs(fsim[0] - fsim[1:])) <= fatol).any():
                if red_dim:
                    sim, fsim, request = self.recover_simplex_dim(
                        sim, red_sim, red_fsim, overfull_dim, bounds, memory,
                        self._dx)
                    red_dim = False
                    rec_dim = True
                else:
                    print(
                        "Warning, internal stopping criterion is reached. "
                        "Either points of simplex or function values of points of simplex are very close to each other."
                    )

        # add requested points to memory
        for p in request.astype(float).tolist():
            memory.append(p)

        # store parameters of iteration as parameter array
        param = [
            sim, fsim, x_iter, red_dim, red_sim, red_fsim, rec_dim, memory
        ]
        param = dict(
            sim=sim,
            fsim=fsim,
            x_iter=x_iter,
            red_dim=red_dim,
            red_sim=red_sim,
            red_fsim=red_fsim,
            rec_dim=rec_dim,
            memory=memory,
        )

        # Generate DataSet object with variable values of next experiments
        next_experiments = {}
        for i, v in enumerate(input_var_names):
            next_experiments[v] = request[:, i]
        next_experiments = DataSet.from_df(pd.DataFrame(data=next_experiments))

        # Violate constraint
        mask_valid_next_experiments = self.check_constraints(next_experiments)
        if initial_run and not all(mask_valid_next_experiments):
            raise ValueError(
                "Default initialization failed due to constraints. Please enter an initial simplex with feasible points"
            )
        if not any(mask_valid_next_experiments):
            stay_inner = True

        if stay_inner:
            # add infinity as
            next_experiments[("constraint", "DATA")] = False
        else:
            # add optimization strategy
            next_experiments[("constraint",
                              "DATA")] = mask_valid_next_experiments
            next_experiments[(
                "strategy",
                "METADATA")] = ["Nelder-Mead Simplex"] * len(request)
        x_best = None
        f_best = float("inf")
        # fbest corresponds to the transformed function values
        if not initial_run:
            x_best = sim[0]
            f_best = fsim[0]
            x_best = self.round(x_best, bounds, self._dx)
            f_best = int(f_best * 10**int(np.log10(1 / self._df))) / 10**int(
                np.log10(1 / self._df))
        # next_experiments = np.around(next_experiments, decimals=self._dx)

        # Do any necessary transformation back
        next_experiments = self.transform.un_transform(
            next_experiments, transform_descriptors=True)

        return next_experiments, x_best, f_best, param

Example #9

0

Show file

File: gryffin.py Project: jduerholt/summit

    def suggest_experiments(self, prev_res: DataSet = None, **kwargs):
        """Suggest experiments using Gryffin optimization strategy

        Parameters
        ----------
        prev_res: :class:`~summit.utils.data.DataSet`, optional
            Dataset with data from previous experiments of previous iteration.
            If no data is passed, then random sampling will
            be used to suggest an initial design.

        Returns
        -------
        next_experiments : :class:`~summit.utils.data.DataSet`
            A Dataset object with the suggested experiments

        """

        param = None
        xbest = np.zeros(self.domain.num_continuous_dimensions())
        obj = self.domain.output_variables[0]
        fbest = float("inf")

        # Suggest random initial design
        if prev_res is None:
            request = self.gryffin.recommend(observations=[])
        else:
            # Get inputs and outputs
            inputs, outputs = self.transform.transform_inputs_outputs(
                prev_res, transform_descriptors=False)

            # Set up maximization and minimization by converting maximization to minimization problem
            for v in self.domain.variables:
                if v.is_objective and v.maximize:
                    outputs[v.name] = -1 * outputs[v.name]

            inputs_dict = inputs.to_dict(orient="records")
            outputs_dict = outputs.to_dict(orient="records")
            prev_samples = [{
                **{k1[0]: [v1]
                   for k1, v1 in inputs_dict[i].items()},
                **{k2[0]: v2
                   for k2, v2 in outputs_dict[i].items()},
            } for i in range(len(inputs_dict))]

            observations = []
            if self.prev_param is not None:
                observations = self.prev_param
            observations.extend(prev_samples)
            param = observations

            request = self.gryffin.recommend(observations=observations)

            for obs in observations:
                if obs[obj.name] < fbest:
                    fbest = obs[obj.name]
                    xbest = np.asarray(
                        [v[0] for k, v in obs.items() if k != obj.name])

        # Generate DataSet object with variable values of next
        next_experiments = None
        if request is not None and len(request) != 0:
            next_experiments = {}
            for k in request[0].keys():
                next_experiments[k] = [r[k][0] for r in request]
            next_experiments = DataSet.from_df(
                pd.DataFrame(data=next_experiments))
            next_experiments[("strategy", "METADATA")] = "GRYFFIN"

        obj = self.domain.output_variables[0]
        objective_dir = -1.0 if obj.maximize else 1.0
        fbest = objective_dir * fbest
        self.fbest = fbest
        self.xbest = xbest
        self.prev_param = param

        # Do any necessary transformation back
        next_experiments = self.transform.un_transform(
            next_experiments, transform_descriptors=False)

        return next_experiments

Example #10

0

Show file

File: snobfit.py Project: jduerholt/summit

    def _inner_suggest_experiments(
        self, num_experiments, prev_res: DataSet = None, prev_param=None
    ):
        """Inner loop for generation of suggested experiments using the SNOBFIT method
        Parameters
        ----------
        num_experiments: int
            The number of experiments (i.e., samples) to generate
        prev_res: summit.utils.data.DataSet, optional
            Dataset with data from previous experiments.
            If no data is passed, the SNOBFIT optimization algorithm
            will be initialized will suggest initial experiments.
        prev_param: file.txt TODO: how to handle this?
            File with parameters of SNOBFIT algorithm from previous
            iterations of a optimization problem.
            If no data is passed, the SNOBFIT optimization algorithm
            will be initialized.
        Returns
        -------
        next_experiments: DataSet
            A `Dataset` object with the suggested experiments by SNOBFIT algorithm
        xbest: list
            List with variable settings of experiment with best outcome
        fbest: float
            Objective value at xbest
        param: list
            List with parameters and prev_param of SNOBFIT algorithm (required for next iteration)
        """

        # Extract dimension of input domain
        dim = self.domain.num_continuous_dimensions()

        # intern
        stay_inner = False

        # Get bounds of input variables
        bounds = []
        input_var_names = []
        output_var_names = []
        for v in self.domain.variables:
            if not v.is_objective:
                if isinstance(v, ContinuousVariable):
                    bounds.append(v.bounds)
                    input_var_names.append(v.name)
                elif isinstance(v, CategoricalVariable):
                    if v.ds is not None:
                        descriptor_names = v.ds.data_columns
                        descriptors = np.asarray(
                            [
                                v.ds.loc[:, [l]].values.tolist()
                                for l in v.ds.data_columns
                            ]
                        )
                    else:
                        raise ValueError("No descriptors given for {}".format(v.name))
                    for d in descriptors:
                        bounds.append([np.min(np.asarray(d)), np.max(np.asarray(d))])
                    input_var_names.extend(descriptor_names)
                else:
                    raise TypeError(
                        "SNOBFIT can not handle variable type: {}".format(v.type)
                    )
            else:
                output_var_names.extend(v.name)
        bounds = np.asarray(bounds, dtype=float)

        # Initialization
        x0 = []
        y0 = []

        # Get previous results
        if prev_res is not None:
            # get always the same order according to the ordering in the domain -> this is actually done within transform
            # ordered_var_names = input_var_names + output_var_names
            # prev_res = prev_res[ordered_var_names]
            # transform
            inputs, outputs = self.transform.transform_inputs_outputs(
                prev_res, transform_descriptors=True
            )

            # Set up maximization and minimization
            for v in self.domain.variables:
                if v.is_objective and v.maximize:
                    outputs[v.name] = -1 * outputs[v.name]

            x0 = inputs.data_to_numpy()
            y0 = outputs.data_to_numpy()

            # Add uncertainties to measurements TODO: include uncertainties in input
            y = []
            for i in range(y0.shape[0]):
                y.append([y0[i].tolist()[0], math.sqrt(numpy.spacing(1))])
            y0 = np.asarray(y, dtype=float)
        # If no prev_res are given but prev_param -> raise error
        elif prev_param is not None:
            raise ValueError(
                "Parameter from previous optimization iteration are given but previous results are "
                "missing!"
            )

        # if no previous results are given initialize with empty lists
        if not len(x0):
            x0 = np.array(x0).reshape(0, len(bounds))
            y0 = np.array(y0).reshape(0, 2)

        """ Determine SNOBFIT parameters
          config       structure variable defining the box [u,v] in which the
                       points are to be generated, the number nreq of
                       points to be generated and the probability p that a
                       point of type 4 is generated
                       config = struct('bounds',{u,v},'nreq',nreq,'p',p)
          dx           only used for the definition of a new problem (when
                       the program should continue from the values stored in
                       file.mat, the call should have only 4 input parameters!)
                       n-vector (n = dimension of the problem) of minimal
                       stnp.spacing(1), i.e., two points are considered to be different
                       if they differ by at least dx(i) in at least one
                       coordinate i
        """
        config = {"bounds": bounds, "p": self._p, "nreq": num_experiments}
        dx = (bounds[:, 1] - bounds[:, 0]) * self._dx_dim

        # Run SNOBFIT for one iteration
        request, xbest, fbest, param = self.snobfit(x0, y0, config, dx, prev_param)

        # Generate DataSet object with variable values of next experiments
        next_experiments = {}
        for i, v in enumerate(input_var_names):
            next_experiments[v] = request[:, i]
        next_experiments = DataSet.from_df(pd.DataFrame(data=next_experiments))

        # Violate constraint
        mask_valid_next_experiments = self.check_constraints(next_experiments)
        if not any(mask_valid_next_experiments):
            stay_inner = True

        if stay_inner:
            # add infinity as
            next_experiments[("constraint", "DATA")] = False
        else:
            # add optimization strategy
            next_experiments[("constraint", "DATA")] = mask_valid_next_experiments
            next_experiments[("strategy", "METADATA")] = ["SNOBFIT"] * len(request)

        # Do any necessary transformation back
        next_experiments = self.transform.un_transform(
            next_experiments, transform_descriptors=True
        )

        return next_experiments, xbest, fbest, param

Example #11

0

Show file

File: sobo.py Project: jduerholt/summit

    def suggest_experiments(
        self, num_experiments=1, prev_res: DataSet = None, **kwargs
    ):
        """Suggest experiments using GPyOpt single-objective Bayesian Optimization

        Parameters
        ----------
        num_experiments: int, optional
            The number of experiments (i.e., samples) to generate. Default is 1.
        prev_res: :class:`~summit.utils.data.DataSet`, optional
            Dataset with data from previous experiments of previous iteration.
            If no data is passed, then random sampling will
            be used to suggest an initial design.

        Returns
        -------
        next_experiments : :class:`~summit.utils.data.DataSet`
            A Dataset object with the suggested experiments

        """

        param = None
        xbest = np.zeros(self.domain.num_continuous_dimensions())
        obj = self.domain.output_variables[0]
        objective_dir = -1.0 if obj.maximize else 1.0
        fbest = float("inf")

        # Suggest random initial design
        if prev_res is None:
            """lhs design does not consider constraints
            lhs = LHS(self.domain)
            next_experiments = lhs.suggest_experiments((num_experiments))
            return next_experiments, None, float("inf"), None
            """
            feasible_region = GPyOpt.Design_space(
                space=self.input_domain, constraints=self.constraints
            )
            request = GPyOpt.experiment_design.initial_design(
                "random", feasible_region, num_experiments
            )
        else:
            # Get inputs and outputs
            inputs, outputs = self.transform.transform_inputs_outputs(
                prev_res, transform_descriptors=self.use_descriptors
            )

            # Set up maximization and minimization by converting maximization to minimization problem
            for v in self.domain.variables:
                if v.is_objective and v.maximize:
                    outputs[v.name] = -1 * outputs[v.name]
                if isinstance(v, CategoricalVariable):
                    if not self.use_descriptors:
                        inputs[v.name] = self.categorical_wrapper(
                            inputs[v.name], v.levels
                        )

            inputs = inputs.to_numpy()
            outputs = outputs.to_numpy()

            if self.prev_param is not None:
                X_step = self.prev_param[0]
                Y_step = self.prev_param[1]

                X_step = np.vstack((X_step, inputs))
                Y_step = np.vstack((Y_step, outputs))

            else:
                X_step = inputs
                Y_step = outputs

            sobo_model = GPyOpt.methods.BayesianOptimization(
                f=None,
                domain=self.input_domain,
                constraints=self.constraints,
                model_type=self.gp_model_type,
                kernel=self.kernel,
                acquisition_type=self.acquisition_type,
                acquisition_optimizer_type=self.optimizer_type,
                normalize_Y=self.standardize_outputs,
                batch_size=num_experiments,
                evaluator_type=self.evaluator_type,
                maximize=False,
                ARD=self.ARD,
                exact_feval=self.exact_feval,
                X=X_step,
                Y=Y_step,
            )
            request = sobo_model.suggest_next_locations()

            # Store parameters (history of suggested points and function evaluations)
            param = [X_step, Y_step]

            fbest = np.min(Y_step)
            xbest = X_step[np.argmin(Y_step)]

        # Generate DataSet object with variable values of next
        next_experiments = None
        transform_descriptors = False
        if request is not None and len(request) != 0:
            next_experiments = {}
            i_inp = 0
            for v in self.domain.variables:
                if not v.is_objective:
                    if isinstance(v, CategoricalVariable):
                        if v.ds is None or not self.use_descriptors:
                            cat_list = []
                            for j, entry in enumerate(request[:, i_inp]):
                                cat_list.append(
                                    self.categorical_unwrap(entry, v.levels)
                                )
                            next_experiments[v.name] = np.asarray(cat_list)
                            i_inp += 1
                        else:
                            descriptor_names = v.ds.data_columns
                            for d in descriptor_names:
                                next_experiments[d] = request[:, i_inp]
                                i_inp += 1
                            transform_descriptors = True
                    else:
                        next_experiments[v.name] = request[:, i_inp]
                        i_inp += 1
            next_experiments = DataSet.from_df(pd.DataFrame(data=next_experiments))
            next_experiments[("strategy", "METADATA")] = "Single-objective BayOpt"

        self.fbest = objective_dir * fbest
        self.xbest = xbest
        self.prev_param = param

        # Do any necessary transformation back
        next_experiments = self.transform.un_transform(
            next_experiments, transform_descriptors=self.use_descriptors
        )

        return next_experiments

Example #12

0

Show file

def fit_and_test(n_training_matlab, num_restarts=100, max_iters=2000, n_spectral_points=4000, 
                use_spectral_sample=True, plot=True):
    # Read in data from one Matlab experiment
    X = pd.read_csv('data/matlab/experiment_1/X.csv', names=[f"x_{i}" for i in range(6)])
    y = pd.read_csv('data/matlab/experiment_1/Y.csv', names=['y_0', 'y_1'])
    X = DataSet.from_df(X)
    y = DataSet.from_df(y)

    # Train-test split
    X_train = X.iloc[:n_training_matlab, :]
    X_test =  X.iloc[n_training_matlab:, :]
    y_train = y.iloc[:n_training_matlab, :]
    y_test = y.iloc[n_training_matlab:, :]
    print("Number of training data:", X_train.shape[0])
    print("Number of test data:", X_test.shape[0])

    # Scale decision variables between 0 and 1
    X_min = X_train.min()
    X_max = X_train.max()
    X_train_scaled = (X_train-X_min)/(X_max-X_min)
    X_test_scaled = (X_test-X_min)/(X_max-X_min)

    # Scale objectives to 0 mean and unit variance
    y_mean = y_train.mean()
    y_std = y_train.std()
    y_train_scaled = (y_train-y_mean)/y_std

    # Train model
    print(f"Fitting models (number of optimization restarts={num_restarts})")
    kerns = [GPy.kern.Exponential(input_dim=6,ARD=True) for _ in range(2)]
    models = ModelGroup({'y_0': GPyModel(kernel=kerns[0]),
                        'y_1': GPyModel(kernel=kerns[1])})
    models.fit(X_train_scaled, y_train_scaled, 
            num_restarts=num_restarts,
            max_iters=max_iters,
            parallel=True,
            n_spectral_points=n_spectral_points, 
            spectral_sample=False)  # spectral sampling done below
    for name, model in models.models.items():
        hyp = model.hyperparameters
        print(f"Model {name} lengthscales: {hyp[0]}")
        print(f"Model {name} variance: {hyp[1]}")
        print(f"Model {name} noise: {hyp[2]}")

    # Model validation
    rmse = lambda pred, actual: np.sqrt(np.mean((pred-actual)**2, axis=0))

    y_pred_train_scaled = models.predict(X_train_scaled, 
                            use_spectral_sample=False)
    y_pred_train_scaled = DataSet(y_pred_train_scaled, columns=['y_0', 'y_1'])
    y_pred_train = y_pred_train_scaled*y_std+y_mean
    rmse_train = rmse(y_pred_train.to_numpy(), y_train.to_numpy())
    print(f"RMSE train y0 ={rmse_train[0].round(2)}, RMSE train y1={rmse_train[1].round(2)}")

    y_pred_test_scaled = models.predict(X_test_scaled, 
                                        use_spectral_sample=False)
    y_pred_test_scaled = DataSet(y_pred_test_scaled, columns=['y_0', 'y_1'])
    y_pred_test = y_pred_test_scaled*y_std+y_mean
    rmse_test= rmse(y_pred_test.to_numpy(), y_test.to_numpy())
    print(f"RMSE test y0 ={rmse_test[0].round(2)}, RMSE test y1={rmse_test[1].round(2)}")

    # Spectral sampling
    if use_spectral_sample:
        print(f"Spectral sampling with {n_spectral_points} spectral points.")
        for name, model in models.models.items():
            model.spectral_sample(X_train_scaled, y_train_scaled[[name]], 
                                  n_spectral_points=n_spectral_points)



    # Model validation on spectral sampling
    if  use_spectral_sample:
        y_pred_train_scaled = models.predict(X_train_scaled, 
                                use_spectral_sample=True)
        y_pred_train_scaled = DataSet(y_pred_train_scaled, columns=['y_0', 'y_1'])
        y_pred_train = y_pred_train_scaled*y_std+y_mean
        rmse_train_spectral = rmse(y_pred_train.to_numpy(), y_train.to_numpy())
        print(f"RMSE train spectral y0 ={rmse_train_spectral[0].round(2)}, RMSE train spectral y1={rmse_train_spectral[1].round(2)}")

        y_pred_test_scaled = models.predict(X_test_scaled, 
                                            use_spectral_sample=True)
        y_pred_test_scaled = DataSet(y_pred_test_scaled, columns=['y_0', 'y_1'])
        y_pred_test = y_pred_test_scaled*y_std+y_mean
        rmse_test_spectral = rmse(y_pred_test.to_numpy(), y_test.to_numpy())
        print(f"RMSE test spectral y0 ={rmse_test_spectral[0].round(2)}, RMSE test spectral y1={rmse_test_spectral[1].round(2)}")

    # Make parity plots for both objectives
    if plot:
        fig, axes = plt.subplots(1,2)
        fig.suptitle("With Spectral Sampling" if use_spectral_sample else "Without Spectral Sampling")
        for i, name in enumerate(models.models.keys()):
            axes[i].scatter(y_train[name], y_pred_train[name], 
                            label=f"Training: RMSE = {rmse_train[i].round(2)}")
            axes[i].scatter(y_test[name], y_pred_test[name],
                            label=f"Test: RMSE = {rmse_test[i].round(2)}")
            axes[i].plot([0,2], [0,2])
            axes[i].legend()
            axes[i].set_xlabel('Actual')
            axes[i].set_ylabel('Predicted')
            axes[i].set_title(name)
        plt.savefig('20200710_train_gp_matlab_data.png',dpi=300)
        plt.show()
    
    objectives = [m._model.objective_function() for m in models.models.values()]
    print("---------------------------------------------------------------")
    return dict(rmse_train_y0=rmse_train[0],
                rmse_train_y1=rmse_train[1],
                rmse_test_y0=rmse_test[0],
                rmse_test_y1=rmse_test[1],
                rmse_train_spectral_y0=rmse_train_spectral[0],
                rmse_train_spectral_y1=rmse_train_spectral[1],
                rmse_test_spectral_y0=rmse_test_spectral[0],
                rmse_test_spectral_y1=rmse_test_spectral[1],
                objective_y0=objectives[0],
                objective_y1=objectives[1])

Example #13

0

Show file

File: entmoot.py Project: jezsadler/summit

    def suggest_experiments(
        self, num_experiments=1, prev_res: DataSet = None, **kwargs
    ):
        """Suggest experiments using ENTMOOT tree-based Bayesian Optimization

        Parameters
        ----------
        num_experiments: int, optional
            The number of experiments (i.e., samples) to generate. Default is 1.
        prev_res: :class:`~summit.utils.data.DataSet`, optional
            Dataset with data from previous experiments of previous iteration.
            If no data is passed, then random sampling will
            be used to suggest an initial design.

        Returns
        -------
        next_experiments : :class:`~summit.utils.data.DataSet`
            A Dataset object with the suggested experiments

        """

        param = None
        xbest = np.zeros(self.domain.num_continuous_dimensions())
        obj = self.domain.output_variables[0]
        objective_dir = -1.0 if obj.maximize else 1.0
        fbest = float("inf")

        bounds = [k["domain"] for k in self.input_domain]

        space = Space(bounds)
        core_model = get_core_gurobi_model(space)
        gvars = core_model.getVars()

        for c in self.constraints:
            left = LinExpr()
            left.addTerms(c[0], gvars)
            left.addConstant(c[1])
            core_model.addLConstr(left, c[2], 0)

        core_model.update()

        entmoot_model = Optimizer(
            dimensions=bounds,
            base_estimator=self.estimator_type,
            std_estimator=self.std_estimator_type,
            n_initial_points=self.initial_points,
            initial_point_generator=self.generator_type,
            acq_func=self.acquisition_type,
            acq_optimizer=self.optimizer_type,
            random_state=None,
            acq_func_kwargs=None,
            acq_optimizer_kwargs={"add_model_core": core_model},
            base_estimator_kwargs={"min_child_samples": self.min_child_samples},
            std_estimator_kwargs=None,
            model_queue_size=None,
            verbose=False,
        )

        # If we have previous results:
        if prev_res is not None:
            # Get inputs and outputs
            inputs, outputs = self.transform.transform_inputs_outputs(
                prev_res, transform_descriptors=self.use_descriptors
            )

            # Set up maximization and minimization by converting maximization to minimization problem
            for v in self.domain.variables:
                if v.is_objective and v.maximize:
                    outputs[v.name] = -1 * outputs[v.name]
                if isinstance(v, CategoricalVariable):
                    if not self.use_descriptors:
                        inputs[v.name] = self.categorical_wrapper(
                            inputs[v.name], v.levels
                        )

            inputs = inputs.to_numpy()
            outputs = outputs.to_numpy()

            if self.prev_param is not None:
                X_step = self.prev_param[0]
                Y_step = self.prev_param[1]

                X_step = np.vstack((X_step, inputs))
                Y_step = np.vstack((Y_step, outputs))

            else:
                X_step = inputs
                Y_step = outputs
            # Convert to list form to give to optimizer
            prev_X = [list(x) for x in X_step]
            prev_y = [y for x in Y_step for y in x]

            # Train entmoot model
            entmoot_model.tell(prev_X, prev_y, fit=True)

            # Store parameters (history of suggested points and function evaluations)
            param = [X_step, Y_step]
            fbest = np.min(Y_step)
            xbest = X_step[np.argmin(Y_step)]

        request = np.array(
            entmoot_model.ask(n_points=num_experiments, strategy="cl_mean")
        )
        # Generate DataSet object with variable values of next
        next_experiments = None
        transform_descriptors = False
        if request is not None and len(request) != 0:
            next_experiments = {}
            i_inp = 0
            for v in self.domain.variables:
                if not v.is_objective:
                    if isinstance(v, CategoricalVariable):
                        if v.ds is None or not self.use_descriptors:
                            cat_list = []
                            for j, entry in enumerate(request[:, i_inp]):
                                cat_list.append(
                                    self.categorical_unwrap(entry, v.levels)
                                )
                            next_experiments[v.name] = np.asarray(cat_list)
                            i_inp += 1
                        else:
                            descriptor_names = v.ds.data_columns
                            for d in descriptor_names:
                                next_experiments[d] = request[:, i_inp]
                                i_inp += 1
                            transform_descriptors = True
                    else:
                        next_experiments[v.name] = request[:, i_inp]
                        i_inp += 1
            next_experiments = DataSet.from_df(pd.DataFrame(data=next_experiments))
            next_experiments[("strategy", "METADATA")] = "ENTMOOT"

        self.fbest = objective_dir * fbest
        self.xbest = xbest
        self.prev_param = param

        # Do any necessary transformation back
        next_experiments = self.transform.un_transform(
            next_experiments, transform_descriptors=self.use_descriptors
        )

        return next_experiments