def test_reizman_emulator(show_plots=False): b = get_pretrained_reizman_suzuki_emulator(case=1) b.parity_plot(include_test=True) if show_plots: plt.show() columns = [v.name for v in b.domain.variables] values = { "catalyst": ["P1-L3"], "t_res": [600], "temperature": [30], "catalyst_loading": [0.498], } conditions = pd.DataFrame(values) conditions = DataSet.from_df(conditions) results = b.run_experiments(conditions, return_std=True) for name, value in values.items(): if type(value[0]) == str: assert str(results[name].iloc[0]) == value[0] else: assert float(results[name].iloc[0]) == value[0] assert np.isclose(float(results["yld"]), 0.6, atol=15) assert np.isclose(float(results["ton"]), 1.1, atol=15) # Test serialization d = b.to_dict() exp = ReizmanSuzukiEmulator.from_dict(d) return results
def test_baumgartner_CC_emulator(use_descriptors, include_cost, show_plots=False): """ Test the Baumgartner Cross Coupling emulator""" b = get_pretrained_baumgartner_cc_emulator(use_descriptors=use_descriptors, include_cost=include_cost) b.parity_plot(include_test=True) if show_plots: plt.show() columns = [v.name for v in b.domain.variables] values = { "catalyst": ["tBuXPhos"], "base": ["DBU"], "t_res": [328.717801570892], "temperature": [30], "base_equivalents": [2.18301549894049], } conditions = pd.DataFrame(values) conditions = DataSet.from_df(conditions) results = b.run_experiments(conditions, return_std=True) assert str(results["catalyst"].iloc[0]) == values["catalyst"][0] assert str(results["base"].iloc[0]) == values["base"][0] assert float(results["t_res"]) == values["t_res"][0] assert float(results["temperature"]) == values["temperature"][0] assert np.isclose(float(results["yld"]), 0.042832638, atol=0.15) # Test serialization d = b.to_dict() exp = BaumgartnerCrossCouplingEmulator.from_dict(d) return results
def to_dataset(self) -> DataSet: """Get design as a pandas dataframe Returns ------- ds: summit.utils.dataset.Dataset """ df = pd.DataFrame([]) for i, variable in enumerate(self._domain.input_variables): if isinstance(variable, ContinuousVariable): values = self.get_values(variable.name)[:, 0] elif isinstance(variable, CategoricalVariable): values = [ variable.levels[i] for i in self.get_indices(variable.name)[:, 0] ] df.insert(i, variable.name, values) return DataSet.from_df(df)
def to_dataset(self) -> DataSet: """Get design as a pandas dataframe Returns ------- ds: summit.utils.dataset.Dataset """ df = pd.DataFrame([]) i = 0 for variable in self._domain.variables: if variable.is_objective or variable.name in self.exclude: continue elif isinstance(variable, ContinuousVariable): values = self.get_values(variable.name)[:, 0] elif isinstance(variable, CategoricalVariable): values = [ variable.levels[i] for i in self.get_indices(variable.name)[:, 0] ] df.insert(i, variable.name, values) i += 1 return DataSet.from_df(df)
def suggest_experiments(self, num_experiments, criterion="center", exclude=[], **kwargs) -> DataSet: """Generate latin hypercube intial design Parameters ---------- num_experiments: int The number of experiments (i.e., samples) to generate criterion: str, optional The criterion used for the LHS. Allowable values are "center" or "c", "maximin" or "m", "centermaximin" or "cm", and "correlation" or "corr". Default is center. exclude: array like, optional List of variable names that should be excluded from the design. Default is None. Returns ------- next_experiments : :class:`~summit.utils.data.DataSet` A Dataset object with the suggested experiments """ # design = Design(self.domain, num_experiments, "Latin design", exclude=exclude) design = pd.DataFrame() # Instantiate the random design class to be used with categorical variables with no descriptors rdesigner = Random(self.domain, random_state=self._rstate) # Get categorical variables without descriptors categoricals = [] for v in self.domain.input_variables: if isinstance(v, CategoricalVariable): if v.ds is None: categoricals.append(v.name) # Sampling n = self.domain.num_continuous_dimensions(include_descriptors=True) if len(categoricals) < n: samples = lhs( n, samples=num_experiments, criterion=criterion, random_state=self._rstate, ) else: raise ValueError("Need sufficient number of variables") k = 0 columns = [] for variable in self.domain.input_variables: if variable.name in exclude: continue # For continuous variables, use samples directly if isinstance(variable, ContinuousVariable): b = variable.lower_bound * np.ones(num_experiments) values = b + samples[:, k] * (variable.upper_bound - variable.lower_bound) design.insert(design.shape[1], variable.name, values) k += 1 # For categorical variable with no descriptors, randomly choose elif (isinstance(variable, CategoricalVariable) and variable.name in categoricals): indices, values = rdesigner._random_categorical( variable, num_experiments) design.insert(design.shape[1], variable.name, values) # For categorical variable with descriptors, look in descriptors space # The untransform method at the end should find the closest point by euclidean distance. elif isinstance(variable, CategoricalVariable) and variable.ds is not None: num_descriptors = variable.num_descriptors values = samples[:, k:k + num_descriptors] # Scaling var_min = (variable.ds.loc[:, variable.ds.data_columns].min( axis=0).to_numpy()) var_min = np.atleast_2d(var_min) var_max = (variable.ds.loc[:, variable.ds.data_columns].max( axis=0).to_numpy()) var_max = np.atleast_2d(var_max) var_range = var_max - var_min # Rescale values_scaled = var_min + values * var_range values = values_scaled values.shape = (num_experiments, num_descriptors) k += num_descriptors # Add each descriptors names = variable.ds.columns.levels[0].to_list() for i in range(num_descriptors): design.insert(design.shape[1], names[i], values_scaled[:, i]) else: raise DomainError( f"Variable {variable} is not one of the possible variable types (continuous or categorical)." ) # design.add_variable(variable.name, values, indices=indices) design = DataSet.from_df(design) design[("strategy", "METADATA")] = "LHS" return self.transform.un_transform(design, transform_descriptors=True)
def main(self, num_input=3, prev_res=None, prev_param=None): import chemopt from chemopt.logger import get_handlers x0, y0 = prev_res[0], prev_res[1] module_path = os.path.dirname(chemopt.__file__) if self._pretrained_model_config_path: path = self._pretrained_model_config_path else: path = osp.join( module_path, "config_" + str(num_input) + "_inputs_" + str(self._model_size) + ".json", ) config_file = open(path) config = json.load(config_file, object_hook=lambda d: namedtuple("x", d.keys()) (*d.values())) saved_model_path = osp.join(os.path.dirname(os.path.realpath(path)), str(config.save_path)) if prev_param: if prev_param["iteration"] > config.unroll_length: raise ValueError( "Number of iterations exceeds unroll length of the pretrained model!" ) logging.basicConfig(level=logging.WARNING, handlers=get_handlers()) logger = logging.getLogger() cell = chemopt.rnn.StochasticRNNCell( cell=chemopt.rnn.LSTM, kwargs={"hidden_size": config.hidden_size}, nlayers=config.num_layers, reuse=config.reuse, ) optimizer = self.StepOptimizer( cell=cell, ndim=config.num_params, nsteps=config.num_steps, ckpt_path=saved_model_path, infer_model_path=self._infer_model_path, logger=logger, constraints=True, x=x0, y=y0, ) x, state = optimizer.run(prev_res=y0, prev_param=prev_param) real_x = self.x_convert(x) next_experiments = {} i_inp = 0 for v in self.domain.variables: if not v.is_objective: next_experiments[v.name] = [real_x[i_inp]] i_inp += 1 next_experiments = DataSet.from_df(pd.DataFrame(data=next_experiments)) next_experiments[("strategy", "METADATA")] = ["DRO"] param = {} if not y0: y0 = np.array([[float("inf")]]) param["iteration"] = 0 else: param["iteration"] = prev_param["iteration"] + 1 if not prev_param: self.fbest = y0[0] self.xbest = real_x elif y0 < prev_param["fbest"]: self.fbest = y0[0] self.xbest = real_x else: self.fbest = prev_param["fbest"] self.xbest = prev_param["xbest"] param.update({ "state": state, "last_requested_point": x, "xbest": self.xbest, "fbest": self.fbest, }) tf.reset_default_graph() return next_experiments, param
def _run(self, conditions, **kwargs): condition = DataSet.from_df(conditions.to_frame().T) infer_dict = self.emulator.infer_model(dataset=condition) for k, v in infer_dict.items(): conditions[(k, "DATA")] = v return conditions, None
def _inner_suggest_experiments(self, prev_res: DataSet = None, prev_param=None): """Inner loop for suggestion of experiments using Nelder-Mead Simplex method Parameters ---------- prev_res: summit.utils.data.DataSet, optional Dataset with data from previous experiments. If no data is passed, the Nelder-Mead optimization algorithm will be initialized and suggest initial experiments. prev_param: Parameters of Nelder-Mead algorithm from previous iterations of a optimization problem. If no data is passed, the Nelder-Mead optimization algorithm will be initialized. """ # intern stay_inner = False # Get bounds of input variables bounds = [] input_var_names = [] output_var_names = [] for v in self.domain.variables: if not v.is_objective: if isinstance(v, ContinuousVariable): bounds.append(v.bounds) input_var_names.append(v.name) elif isinstance(v, CategoricalVariable): if v.ds is not None: descriptor_names = v.ds.data_columns descriptors = np.asarray([ v.ds.loc[:, [l]].values.tolist() for l in v.ds.data_columns ]) else: raise ValueError("No descriptors given for {}".format( v.name)) for d in descriptors: bounds.append( [np.min(np.asarray(d)), np.max(np.asarray(d))]) input_var_names.extend(descriptor_names) else: raise TypeError( "Nelder-Mead can not handle variable type: {}".format( v.type)) else: output_var_names.extend(v.name) bounds = np.asarray(bounds, dtype=float) # Extract dimension of input domain dim = len(bounds[:, 0]) # Initialization initial_run = True x0 = [self._x_start] y0 = [] # Get previous results if prev_res is not None: initial_run = False inputs, outputs = self.transform.transform_inputs_outputs( prev_res, transform_descriptors=True) # Set up maximization and minimization for v in self.domain.variables: if v.is_objective and v.maximize: outputs[v.name] = -1 * outputs[v.name] x0 = inputs.data_to_numpy() y0 = outputs.data_to_numpy() elif prev_param is not None: raise ValueError( "Parameter from previous optimization iteration are given but previous results are " "missing!") # if no previous results are given initialize center point as geometrical middle point of bounds if len(x0[0]) == 0 and not self.random_start: x0 = np.ones( (1, len(bounds))) * 0.5 * ((bounds[:, 1] + bounds[:, 0]).T) elif len(x0[0]) == 0 and self.random_start: weight = np.random.rand() x0 = np.ones( (1, len(bounds))) * (weight * (bounds[:, 1] + (1 - weight) * bounds[:, 0]).T) """ Set Nelder-Mead parameters, i.e., initialize or include data from previous iterations -------- prev_sim: array-like variable coordinates (points) of simplex from previous run prev_fsim: array-like function values corresponding to points of simplex from previous run x_iter: array-like variable coordinates and corresponding function values of potential new simplex points determined in one iteration of the NMS algorithm; note that within one iteration multiple points need to be evaluated; that's why we have to store the points of an unfinished iteration (start iteration -> request point -> run experiment -> restart same iteration with results of experiment -> request point -> run experiment ... -> finish iteration) red_dim: boolean True if dimension was reduced in one of the previous iteration and has not been recovered yet red_sim: array-like variable coordinates (points) of simplex before dimension was reduced red_fsim: array-like function values of points corresponding to simplex before dimension was reduced rec_dim: boolean True if dimension was revocered in last iteration memory: array-like list of all points for which the function was evaluated """ prev_sim, prev_fsim, x_iter, red_dim, red_sim, red_fsim, rec_dim, memory = ( None, None, None, None, None, None, None, [np.ones(dim) * float("inf")], ) # if this is not the first iteration of the Nelder-Mead algorithm, get parameters from previous iteration if prev_param: prev_sim = prev_param["sim"] red_dim = prev_param["red_dim"] red_sim = prev_param["red_sim"] red_fsim = prev_param["red_fsim"] rec_dim = prev_param["rec_dim"] memory = prev_param["memory"] # if dimension was recovered in last iteration, N functions evaluations were requested # that need to be assigned to the respective points in the simplex if rec_dim: prev_fsim = prev_param["fsim"] for k in range(len(x0)): for s in range(len(prev_sim)): if np.array_equal(prev_sim[s], x0[k]): prev_fsim[s] = y0[k] rec_dim = False # assign function values to respective points elif prev_param["fsim"] is not None: prev_fsim = prev_param["fsim"] x_iter = prev_param["x_iter"] for key, value in x_iter.items(): if value is not None: if key == "x_shrink": for k in range(len(x0)): for j in range(len(value)): if np.array_equal(value[j][0], np.asarray(x0[k])): x_iter[key][j][1] = y0[k] else: for k in range(len(x0)): if np.array_equal(value[0], np.asarray(x0[k])): x_iter[key][1] = y0[k] break else: prev_fsim = y0 # initialize with given simplex points (including function evaluations) for initialization elif prev_res is not None: prev_sim = x0 prev_fsim = y0 for p in x0.astype(float).tolist(): memory.append(p) # Run Nelder-Mead Simplex algorithm for one iteration overfull_simplex = False if not red_dim: request, sim, fsim, x_iter = self._minimize_neldermead( x0=x0[0], bounds=bounds, x_iter=x_iter, f=prev_fsim, sim=prev_sim, adaptive=self._adaptive, ) if not initial_run: ( overfull_simplex, prev_sim, prev_fsim, red_sim, red_fsim, overfull_dim, ) = self.check_overfull(request, sim, fsim, bounds) ## Reduce dimension if n+1 points are located in n-1 dimensions (if either red_dim = True, i.e., # optimization in the reduced dimension space was not finished in the last iteration, or overfull_simplex, i.e., # last Nelder-Mead call (with red_dim = False) lead to an overfull simplex). ## Note that in order to not loose any information, the simplex without dimension reduction is returned even # if the optimization in the reduced dimension space is not finished. ## If the optimization in the reduced dimension space was not finished in the last iteration (red_dim = True), # the simplex will automatically be reduced again. if red_dim or overfull_simplex: # prepare dimension reduction if red_dim: x_iter, overfull_dim = self.upstream_simplex_dim_red( prev_sim, x_iter) else: x_iter = None # save value of dimension reduced save_dim = prev_sim[0][overfull_dim] # delete overfull dimension new_prev_sim = np.delete(prev_sim, overfull_dim, 1) # delete bounds for overfull dimension new_bounds = np.delete(bounds, overfull_dim, 0) # Run one iteration of Nelder-Mead Simplex algorithm for reduced simplex request, sim, fsim, x_iter = self._minimize_neldermead( x0=new_prev_sim[0], x_iter=x_iter, bounds=new_bounds, f=prev_fsim, sim=new_prev_sim, adaptive=self._adaptive, ) overfull_simplex, _, _, _, _, _ = self.check_overfull( request, sim, fsim, bounds) if overfull_simplex: raise NotImplementedError( "Recursive dimension reduction not implemented yet.") # recover dimension after Nelder-Mead Simplex run (to return full request for experiment) request = np.insert(request, overfull_dim, save_dim, 1) sim = np.insert(sim, overfull_dim, save_dim, 1) # follow-up of dimension reduction x_iter = self.downstream_simplex_dim_red(x_iter, overfull_dim, save_dim) red_dim = True # if not overfull and no reduced dimension from previous iteration else: red_dim = False # Circle (suggested point that already has been investigated) if any(np.array([np.array(memory == x).all(1).any() for x in request])): ## if dimension is reduced and requested point has already been evaluated, recover dimension with # reflected and translated simplex before dimension reduction if red_dim: sim, fsim, request = self.recover_simplex_dim( sim, red_sim, red_fsim, overfull_dim, bounds, memory, self._dx) red_dim = False rec_dim = True # raise error else: stay_inner = True # raise NotImplementedError("Circle - point has already been investigated.") ## Only little changes in requested points, xatol = tolerance for changes in x, # or in function values, fatol = tolerance for changes in f ## TODO: add extra threshold to stop reduced dimension problem and recover dimension if not initial_run: xatol = (bounds[:, 1] - bounds[:, 0]) * self._dx fatol = self._df if (np.max(np.abs(sim[1:] - sim[0]), 0) <= xatol).all() or (np.max( np.abs(fsim[0] - fsim[1:])) <= fatol).any(): if red_dim: sim, fsim, request = self.recover_simplex_dim( sim, red_sim, red_fsim, overfull_dim, bounds, memory, self._dx) red_dim = False rec_dim = True else: print( "Warning, internal stopping criterion is reached. " "Either points of simplex or function values of points of simplex are very close to each other." ) # add requested points to memory for p in request.astype(float).tolist(): memory.append(p) # store parameters of iteration as parameter array param = [ sim, fsim, x_iter, red_dim, red_sim, red_fsim, rec_dim, memory ] param = dict( sim=sim, fsim=fsim, x_iter=x_iter, red_dim=red_dim, red_sim=red_sim, red_fsim=red_fsim, rec_dim=rec_dim, memory=memory, ) # Generate DataSet object with variable values of next experiments next_experiments = {} for i, v in enumerate(input_var_names): next_experiments[v] = request[:, i] next_experiments = DataSet.from_df(pd.DataFrame(data=next_experiments)) # Violate constraint mask_valid_next_experiments = self.check_constraints(next_experiments) if initial_run and not all(mask_valid_next_experiments): raise ValueError( "Default initialization failed due to constraints. Please enter an initial simplex with feasible points" ) if not any(mask_valid_next_experiments): stay_inner = True if stay_inner: # add infinity as next_experiments[("constraint", "DATA")] = False else: # add optimization strategy next_experiments[("constraint", "DATA")] = mask_valid_next_experiments next_experiments[( "strategy", "METADATA")] = ["Nelder-Mead Simplex"] * len(request) x_best = None f_best = float("inf") # fbest corresponds to the transformed function values if not initial_run: x_best = sim[0] f_best = fsim[0] x_best = self.round(x_best, bounds, self._dx) f_best = int(f_best * 10**int(np.log10(1 / self._df))) / 10**int( np.log10(1 / self._df)) # next_experiments = np.around(next_experiments, decimals=self._dx) # Do any necessary transformation back next_experiments = self.transform.un_transform( next_experiments, transform_descriptors=True) return next_experiments, x_best, f_best, param
def suggest_experiments(self, prev_res: DataSet = None, **kwargs): """Suggest experiments using Gryffin optimization strategy Parameters ---------- prev_res: :class:`~summit.utils.data.DataSet`, optional Dataset with data from previous experiments of previous iteration. If no data is passed, then random sampling will be used to suggest an initial design. Returns ------- next_experiments : :class:`~summit.utils.data.DataSet` A Dataset object with the suggested experiments """ param = None xbest = np.zeros(self.domain.num_continuous_dimensions()) obj = self.domain.output_variables[0] fbest = float("inf") # Suggest random initial design if prev_res is None: request = self.gryffin.recommend(observations=[]) else: # Get inputs and outputs inputs, outputs = self.transform.transform_inputs_outputs( prev_res, transform_descriptors=False) # Set up maximization and minimization by converting maximization to minimization problem for v in self.domain.variables: if v.is_objective and v.maximize: outputs[v.name] = -1 * outputs[v.name] inputs_dict = inputs.to_dict(orient="records") outputs_dict = outputs.to_dict(orient="records") prev_samples = [{ **{k1[0]: [v1] for k1, v1 in inputs_dict[i].items()}, **{k2[0]: v2 for k2, v2 in outputs_dict[i].items()}, } for i in range(len(inputs_dict))] observations = [] if self.prev_param is not None: observations = self.prev_param observations.extend(prev_samples) param = observations request = self.gryffin.recommend(observations=observations) for obs in observations: if obs[obj.name] < fbest: fbest = obs[obj.name] xbest = np.asarray( [v[0] for k, v in obs.items() if k != obj.name]) # Generate DataSet object with variable values of next next_experiments = None if request is not None and len(request) != 0: next_experiments = {} for k in request[0].keys(): next_experiments[k] = [r[k][0] for r in request] next_experiments = DataSet.from_df( pd.DataFrame(data=next_experiments)) next_experiments[("strategy", "METADATA")] = "GRYFFIN" obj = self.domain.output_variables[0] objective_dir = -1.0 if obj.maximize else 1.0 fbest = objective_dir * fbest self.fbest = fbest self.xbest = xbest self.prev_param = param # Do any necessary transformation back next_experiments = self.transform.un_transform( next_experiments, transform_descriptors=False) return next_experiments
def _inner_suggest_experiments( self, num_experiments, prev_res: DataSet = None, prev_param=None ): """Inner loop for generation of suggested experiments using the SNOBFIT method Parameters ---------- num_experiments: int The number of experiments (i.e., samples) to generate prev_res: summit.utils.data.DataSet, optional Dataset with data from previous experiments. If no data is passed, the SNOBFIT optimization algorithm will be initialized will suggest initial experiments. prev_param: file.txt TODO: how to handle this? File with parameters of SNOBFIT algorithm from previous iterations of a optimization problem. If no data is passed, the SNOBFIT optimization algorithm will be initialized. Returns ------- next_experiments: DataSet A `Dataset` object with the suggested experiments by SNOBFIT algorithm xbest: list List with variable settings of experiment with best outcome fbest: float Objective value at xbest param: list List with parameters and prev_param of SNOBFIT algorithm (required for next iteration) """ # Extract dimension of input domain dim = self.domain.num_continuous_dimensions() # intern stay_inner = False # Get bounds of input variables bounds = [] input_var_names = [] output_var_names = [] for v in self.domain.variables: if not v.is_objective: if isinstance(v, ContinuousVariable): bounds.append(v.bounds) input_var_names.append(v.name) elif isinstance(v, CategoricalVariable): if v.ds is not None: descriptor_names = v.ds.data_columns descriptors = np.asarray( [ v.ds.loc[:, [l]].values.tolist() for l in v.ds.data_columns ] ) else: raise ValueError("No descriptors given for {}".format(v.name)) for d in descriptors: bounds.append([np.min(np.asarray(d)), np.max(np.asarray(d))]) input_var_names.extend(descriptor_names) else: raise TypeError( "SNOBFIT can not handle variable type: {}".format(v.type) ) else: output_var_names.extend(v.name) bounds = np.asarray(bounds, dtype=float) # Initialization x0 = [] y0 = [] # Get previous results if prev_res is not None: # get always the same order according to the ordering in the domain -> this is actually done within transform # ordered_var_names = input_var_names + output_var_names # prev_res = prev_res[ordered_var_names] # transform inputs, outputs = self.transform.transform_inputs_outputs( prev_res, transform_descriptors=True ) # Set up maximization and minimization for v in self.domain.variables: if v.is_objective and v.maximize: outputs[v.name] = -1 * outputs[v.name] x0 = inputs.data_to_numpy() y0 = outputs.data_to_numpy() # Add uncertainties to measurements TODO: include uncertainties in input y = [] for i in range(y0.shape[0]): y.append([y0[i].tolist()[0], math.sqrt(numpy.spacing(1))]) y0 = np.asarray(y, dtype=float) # If no prev_res are given but prev_param -> raise error elif prev_param is not None: raise ValueError( "Parameter from previous optimization iteration are given but previous results are " "missing!" ) # if no previous results are given initialize with empty lists if not len(x0): x0 = np.array(x0).reshape(0, len(bounds)) y0 = np.array(y0).reshape(0, 2) """ Determine SNOBFIT parameters config structure variable defining the box [u,v] in which the points are to be generated, the number nreq of points to be generated and the probability p that a point of type 4 is generated config = struct('bounds',{u,v},'nreq',nreq,'p',p) dx only used for the definition of a new problem (when the program should continue from the values stored in file.mat, the call should have only 4 input parameters!) n-vector (n = dimension of the problem) of minimal stnp.spacing(1), i.e., two points are considered to be different if they differ by at least dx(i) in at least one coordinate i """ config = {"bounds": bounds, "p": self._p, "nreq": num_experiments} dx = (bounds[:, 1] - bounds[:, 0]) * self._dx_dim # Run SNOBFIT for one iteration request, xbest, fbest, param = self.snobfit(x0, y0, config, dx, prev_param) # Generate DataSet object with variable values of next experiments next_experiments = {} for i, v in enumerate(input_var_names): next_experiments[v] = request[:, i] next_experiments = DataSet.from_df(pd.DataFrame(data=next_experiments)) # Violate constraint mask_valid_next_experiments = self.check_constraints(next_experiments) if not any(mask_valid_next_experiments): stay_inner = True if stay_inner: # add infinity as next_experiments[("constraint", "DATA")] = False else: # add optimization strategy next_experiments[("constraint", "DATA")] = mask_valid_next_experiments next_experiments[("strategy", "METADATA")] = ["SNOBFIT"] * len(request) # Do any necessary transformation back next_experiments = self.transform.un_transform( next_experiments, transform_descriptors=True ) return next_experiments, xbest, fbest, param
def suggest_experiments( self, num_experiments=1, prev_res: DataSet = None, **kwargs ): """Suggest experiments using GPyOpt single-objective Bayesian Optimization Parameters ---------- num_experiments: int, optional The number of experiments (i.e., samples) to generate. Default is 1. prev_res: :class:`~summit.utils.data.DataSet`, optional Dataset with data from previous experiments of previous iteration. If no data is passed, then random sampling will be used to suggest an initial design. Returns ------- next_experiments : :class:`~summit.utils.data.DataSet` A Dataset object with the suggested experiments """ param = None xbest = np.zeros(self.domain.num_continuous_dimensions()) obj = self.domain.output_variables[0] objective_dir = -1.0 if obj.maximize else 1.0 fbest = float("inf") # Suggest random initial design if prev_res is None: """lhs design does not consider constraints lhs = LHS(self.domain) next_experiments = lhs.suggest_experiments((num_experiments)) return next_experiments, None, float("inf"), None """ feasible_region = GPyOpt.Design_space( space=self.input_domain, constraints=self.constraints ) request = GPyOpt.experiment_design.initial_design( "random", feasible_region, num_experiments ) else: # Get inputs and outputs inputs, outputs = self.transform.transform_inputs_outputs( prev_res, transform_descriptors=self.use_descriptors ) # Set up maximization and minimization by converting maximization to minimization problem for v in self.domain.variables: if v.is_objective and v.maximize: outputs[v.name] = -1 * outputs[v.name] if isinstance(v, CategoricalVariable): if not self.use_descriptors: inputs[v.name] = self.categorical_wrapper( inputs[v.name], v.levels ) inputs = inputs.to_numpy() outputs = outputs.to_numpy() if self.prev_param is not None: X_step = self.prev_param[0] Y_step = self.prev_param[1] X_step = np.vstack((X_step, inputs)) Y_step = np.vstack((Y_step, outputs)) else: X_step = inputs Y_step = outputs sobo_model = GPyOpt.methods.BayesianOptimization( f=None, domain=self.input_domain, constraints=self.constraints, model_type=self.gp_model_type, kernel=self.kernel, acquisition_type=self.acquisition_type, acquisition_optimizer_type=self.optimizer_type, normalize_Y=self.standardize_outputs, batch_size=num_experiments, evaluator_type=self.evaluator_type, maximize=False, ARD=self.ARD, exact_feval=self.exact_feval, X=X_step, Y=Y_step, ) request = sobo_model.suggest_next_locations() # Store parameters (history of suggested points and function evaluations) param = [X_step, Y_step] fbest = np.min(Y_step) xbest = X_step[np.argmin(Y_step)] # Generate DataSet object with variable values of next next_experiments = None transform_descriptors = False if request is not None and len(request) != 0: next_experiments = {} i_inp = 0 for v in self.domain.variables: if not v.is_objective: if isinstance(v, CategoricalVariable): if v.ds is None or not self.use_descriptors: cat_list = [] for j, entry in enumerate(request[:, i_inp]): cat_list.append( self.categorical_unwrap(entry, v.levels) ) next_experiments[v.name] = np.asarray(cat_list) i_inp += 1 else: descriptor_names = v.ds.data_columns for d in descriptor_names: next_experiments[d] = request[:, i_inp] i_inp += 1 transform_descriptors = True else: next_experiments[v.name] = request[:, i_inp] i_inp += 1 next_experiments = DataSet.from_df(pd.DataFrame(data=next_experiments)) next_experiments[("strategy", "METADATA")] = "Single-objective BayOpt" self.fbest = objective_dir * fbest self.xbest = xbest self.prev_param = param # Do any necessary transformation back next_experiments = self.transform.un_transform( next_experiments, transform_descriptors=self.use_descriptors ) return next_experiments
def fit_and_test(n_training_matlab, num_restarts=100, max_iters=2000, n_spectral_points=4000, use_spectral_sample=True, plot=True): # Read in data from one Matlab experiment X = pd.read_csv('data/matlab/experiment_1/X.csv', names=[f"x_{i}" for i in range(6)]) y = pd.read_csv('data/matlab/experiment_1/Y.csv', names=['y_0', 'y_1']) X = DataSet.from_df(X) y = DataSet.from_df(y) # Train-test split X_train = X.iloc[:n_training_matlab, :] X_test = X.iloc[n_training_matlab:, :] y_train = y.iloc[:n_training_matlab, :] y_test = y.iloc[n_training_matlab:, :] print("Number of training data:", X_train.shape[0]) print("Number of test data:", X_test.shape[0]) # Scale decision variables between 0 and 1 X_min = X_train.min() X_max = X_train.max() X_train_scaled = (X_train-X_min)/(X_max-X_min) X_test_scaled = (X_test-X_min)/(X_max-X_min) # Scale objectives to 0 mean and unit variance y_mean = y_train.mean() y_std = y_train.std() y_train_scaled = (y_train-y_mean)/y_std # Train model print(f"Fitting models (number of optimization restarts={num_restarts})") kerns = [GPy.kern.Exponential(input_dim=6,ARD=True) for _ in range(2)] models = ModelGroup({'y_0': GPyModel(kernel=kerns[0]), 'y_1': GPyModel(kernel=kerns[1])}) models.fit(X_train_scaled, y_train_scaled, num_restarts=num_restarts, max_iters=max_iters, parallel=True, n_spectral_points=n_spectral_points, spectral_sample=False) # spectral sampling done below for name, model in models.models.items(): hyp = model.hyperparameters print(f"Model {name} lengthscales: {hyp[0]}") print(f"Model {name} variance: {hyp[1]}") print(f"Model {name} noise: {hyp[2]}") # Model validation rmse = lambda pred, actual: np.sqrt(np.mean((pred-actual)**2, axis=0)) y_pred_train_scaled = models.predict(X_train_scaled, use_spectral_sample=False) y_pred_train_scaled = DataSet(y_pred_train_scaled, columns=['y_0', 'y_1']) y_pred_train = y_pred_train_scaled*y_std+y_mean rmse_train = rmse(y_pred_train.to_numpy(), y_train.to_numpy()) print(f"RMSE train y0 ={rmse_train[0].round(2)}, RMSE train y1={rmse_train[1].round(2)}") y_pred_test_scaled = models.predict(X_test_scaled, use_spectral_sample=False) y_pred_test_scaled = DataSet(y_pred_test_scaled, columns=['y_0', 'y_1']) y_pred_test = y_pred_test_scaled*y_std+y_mean rmse_test= rmse(y_pred_test.to_numpy(), y_test.to_numpy()) print(f"RMSE test y0 ={rmse_test[0].round(2)}, RMSE test y1={rmse_test[1].round(2)}") # Spectral sampling if use_spectral_sample: print(f"Spectral sampling with {n_spectral_points} spectral points.") for name, model in models.models.items(): model.spectral_sample(X_train_scaled, y_train_scaled[[name]], n_spectral_points=n_spectral_points) # Model validation on spectral sampling if use_spectral_sample: y_pred_train_scaled = models.predict(X_train_scaled, use_spectral_sample=True) y_pred_train_scaled = DataSet(y_pred_train_scaled, columns=['y_0', 'y_1']) y_pred_train = y_pred_train_scaled*y_std+y_mean rmse_train_spectral = rmse(y_pred_train.to_numpy(), y_train.to_numpy()) print(f"RMSE train spectral y0 ={rmse_train_spectral[0].round(2)}, RMSE train spectral y1={rmse_train_spectral[1].round(2)}") y_pred_test_scaled = models.predict(X_test_scaled, use_spectral_sample=True) y_pred_test_scaled = DataSet(y_pred_test_scaled, columns=['y_0', 'y_1']) y_pred_test = y_pred_test_scaled*y_std+y_mean rmse_test_spectral = rmse(y_pred_test.to_numpy(), y_test.to_numpy()) print(f"RMSE test spectral y0 ={rmse_test_spectral[0].round(2)}, RMSE test spectral y1={rmse_test_spectral[1].round(2)}") # Make parity plots for both objectives if plot: fig, axes = plt.subplots(1,2) fig.suptitle("With Spectral Sampling" if use_spectral_sample else "Without Spectral Sampling") for i, name in enumerate(models.models.keys()): axes[i].scatter(y_train[name], y_pred_train[name], label=f"Training: RMSE = {rmse_train[i].round(2)}") axes[i].scatter(y_test[name], y_pred_test[name], label=f"Test: RMSE = {rmse_test[i].round(2)}") axes[i].plot([0,2], [0,2]) axes[i].legend() axes[i].set_xlabel('Actual') axes[i].set_ylabel('Predicted') axes[i].set_title(name) plt.savefig('20200710_train_gp_matlab_data.png',dpi=300) plt.show() objectives = [m._model.objective_function() for m in models.models.values()] print("---------------------------------------------------------------") return dict(rmse_train_y0=rmse_train[0], rmse_train_y1=rmse_train[1], rmse_test_y0=rmse_test[0], rmse_test_y1=rmse_test[1], rmse_train_spectral_y0=rmse_train_spectral[0], rmse_train_spectral_y1=rmse_train_spectral[1], rmse_test_spectral_y0=rmse_test_spectral[0], rmse_test_spectral_y1=rmse_test_spectral[1], objective_y0=objectives[0], objective_y1=objectives[1])
def suggest_experiments( self, num_experiments=1, prev_res: DataSet = None, **kwargs ): """Suggest experiments using ENTMOOT tree-based Bayesian Optimization Parameters ---------- num_experiments: int, optional The number of experiments (i.e., samples) to generate. Default is 1. prev_res: :class:`~summit.utils.data.DataSet`, optional Dataset with data from previous experiments of previous iteration. If no data is passed, then random sampling will be used to suggest an initial design. Returns ------- next_experiments : :class:`~summit.utils.data.DataSet` A Dataset object with the suggested experiments """ param = None xbest = np.zeros(self.domain.num_continuous_dimensions()) obj = self.domain.output_variables[0] objective_dir = -1.0 if obj.maximize else 1.0 fbest = float("inf") bounds = [k["domain"] for k in self.input_domain] space = Space(bounds) core_model = get_core_gurobi_model(space) gvars = core_model.getVars() for c in self.constraints: left = LinExpr() left.addTerms(c[0], gvars) left.addConstant(c[1]) core_model.addLConstr(left, c[2], 0) core_model.update() entmoot_model = Optimizer( dimensions=bounds, base_estimator=self.estimator_type, std_estimator=self.std_estimator_type, n_initial_points=self.initial_points, initial_point_generator=self.generator_type, acq_func=self.acquisition_type, acq_optimizer=self.optimizer_type, random_state=None, acq_func_kwargs=None, acq_optimizer_kwargs={"add_model_core": core_model}, base_estimator_kwargs={"min_child_samples": self.min_child_samples}, std_estimator_kwargs=None, model_queue_size=None, verbose=False, ) # If we have previous results: if prev_res is not None: # Get inputs and outputs inputs, outputs = self.transform.transform_inputs_outputs( prev_res, transform_descriptors=self.use_descriptors ) # Set up maximization and minimization by converting maximization to minimization problem for v in self.domain.variables: if v.is_objective and v.maximize: outputs[v.name] = -1 * outputs[v.name] if isinstance(v, CategoricalVariable): if not self.use_descriptors: inputs[v.name] = self.categorical_wrapper( inputs[v.name], v.levels ) inputs = inputs.to_numpy() outputs = outputs.to_numpy() if self.prev_param is not None: X_step = self.prev_param[0] Y_step = self.prev_param[1] X_step = np.vstack((X_step, inputs)) Y_step = np.vstack((Y_step, outputs)) else: X_step = inputs Y_step = outputs # Convert to list form to give to optimizer prev_X = [list(x) for x in X_step] prev_y = [y for x in Y_step for y in x] # Train entmoot model entmoot_model.tell(prev_X, prev_y, fit=True) # Store parameters (history of suggested points and function evaluations) param = [X_step, Y_step] fbest = np.min(Y_step) xbest = X_step[np.argmin(Y_step)] request = np.array( entmoot_model.ask(n_points=num_experiments, strategy="cl_mean") ) # Generate DataSet object with variable values of next next_experiments = None transform_descriptors = False if request is not None and len(request) != 0: next_experiments = {} i_inp = 0 for v in self.domain.variables: if not v.is_objective: if isinstance(v, CategoricalVariable): if v.ds is None or not self.use_descriptors: cat_list = [] for j, entry in enumerate(request[:, i_inp]): cat_list.append( self.categorical_unwrap(entry, v.levels) ) next_experiments[v.name] = np.asarray(cat_list) i_inp += 1 else: descriptor_names = v.ds.data_columns for d in descriptor_names: next_experiments[d] = request[:, i_inp] i_inp += 1 transform_descriptors = True else: next_experiments[v.name] = request[:, i_inp] i_inp += 1 next_experiments = DataSet.from_df(pd.DataFrame(data=next_experiments)) next_experiments[("strategy", "METADATA")] = "ENTMOOT" self.fbest = objective_dir * fbest self.xbest = xbest self.prev_param = param # Do any necessary transformation back next_experiments = self.transform.un_transform( next_experiments, transform_descriptors=self.use_descriptors ) return next_experiments