def __init__(self, domain: Domain, transform: Transform = None, pretrained_model_config_path=None, model_size="standard", **kwargs): Strategy.__init__(self, domain, transform) # Create directories to store temporary files summit_config_path = get_summit_config_path() self.uuid_val = uuid.uuid4() # Unique identifier for this run tmp_dir = summit_config_path / "dro" / str(self.uuid_val) if not os.path.isdir(tmp_dir): os.makedirs(tmp_dir) self._pretrained_model_config_path = pretrained_model_config_path self._infer_model_path = tmp_dir self._model_size = model_size self.prev_param = None
def run(self, **kwargs): """ Run the closed loop experiment cycle Parameters ---------- save_freq : int, optional The frequency with which to checkpoint the state of the optimization. Defaults to None. save_at_end : bool, optional Save the state of the optimization at the end of a run, even if it is stopped early. Default is True. save_dir : str, optional The directory to save checkpoints locally. Defaults to `~/.summit/runner`. """ # Set parameters prev_res = None self.restarts = 0 n_objs = len(self.experiment.domain.output_variables) fbest_old = np.zeros(n_objs) fbest = np.zeros(n_objs) # Serialization save_freq = kwargs.get("save_freq") save_dir = kwargs.get("save_dir", str(get_summit_config_path())) self.uuid_val = uuid.uuid4() save_dir = pathlib.Path(save_dir) / "runner" / str(self.uuid_val) if not os.path.isdir(save_dir): os.makedirs(save_dir) save_at_end = kwargs.get("save_at_end", True) # Create neptune experiment if self.neptune_exp is None: session = Session(backend=HostedNeptuneBackend()) proj = session.get_project(self.neptune_project) neptune_exp = proj.create_experiment( name=self.neptune_experiment_name, description=self.neptune_description, upload_source_files=self.neptune_files, logger=self.logger, tags=self.neptune_tags, ) else: neptune_exp = self.neptune_exp # Run optimization loop for i in progress_bar(range(self.max_iterations)): # Get experiment suggestions if i == 0: k = self.n_init if self.n_init is not None else self.batch_size next_experiments = self.strategy.suggest_experiments( num_experiments=k) else: next_experiments = self.strategy.suggest_experiments( num_experiments=self.batch_size, prev_res=prev_res) prev_res = self.experiment.run_experiments(next_experiments) # Send best objective values to Neptune for j, v in enumerate(self.experiment.domain.output_variables): if i > 0: fbest_old[j] = fbest[j] if v.maximize: fbest[j] = self.experiment.data[v.name].max() elif not v.maximize: fbest[j] = self.experiment.data[v.name].min() neptune_exp.send_metric(v.name + "_best", fbest[j]) # Send hypervolume for multiobjective experiments if n_objs > 1: output_names = [ v.name for v in self.experiment.domain.output_variables ] data = self.experiment.data[output_names].copy() for v in self.experiment.domain.output_variables: if v.maximize: data[(v.name, "DATA")] = -1.0 * data[v.name] y_pareto, _ = pareto_efficient(data.to_numpy(), maximize=False) hv = hypervolume(y_pareto, self.ref) neptune_exp.send_metric("hypervolume", hv) # Save state if save_freq is not None: file = save_dir / f"iteration_{i}.json" if i % save_freq == 0: self.save(file) neptune_exp.send_artifact(str(file)) if not save_dir: os.remove(file) # Stop if no improvement compare = np.abs(fbest - fbest_old) > self.f_tol if all(compare) or i <= 1: nstop = 0 else: nstop += 1 if self.max_same is not None: if nstop >= self.max_same and self.restarts >= self.max_restarts: self.logger.info( f"{self.strategy.__class__.__name__} stopped after {i+1} iterations and {self.restarts} restarts." ) break elif nstop >= self.max_same: nstop = 0 prev_res = None self.strategy.reset() self.restarts += 1 # Save at end if save_at_end: file = save_dir / f"iteration_{i}.json" self.save(file) neptune_exp.send_artifact(str(file)) if not save_dir: os.remove(file) # Stop the neptune experiment neptune_exp.stop()
def run(self, **kwargs): """ Run the closed loop experiment cycle Parameters ---------- save_freq : int, optional The frequency with which to checkpoint the state of the optimization. Defaults to None. save_at_end : bool, optional Save the state of the optimization at the end of a run, even if it is stopped early. Default is True. save_dir : str, optional The directory to save checkpoints locally. Defaults to not saving locally. """ save_freq = kwargs.get("save_freq") save_dir = kwargs.get("save_dir", str(get_summit_config_path())) self.uuid_val = uuid.uuid4() save_dir = pathlib.Path(save_dir) / "runner" / str(self.uuid_val) if not os.path.isdir(save_dir): os.makedirs(save_dir) save_at_end = kwargs.get("save_at_end", True) n_objs = len(self.experiment.domain.output_variables) fbest_old = np.zeros(n_objs) fbest = np.zeros(n_objs) prev_res = None self.restarts = 0 for i in progress_bar(range(self.max_iterations)): # Get experiment suggestions if i == 0: k = self.n_init if self.n_init is not None else self.batch_size next_experiments = self.strategy.suggest_experiments( num_experiments=k) else: next_experiments = self.strategy.suggest_experiments( num_experiments=self.batch_size, prev_res=prev_res) prev_res = self.experiment.run_experiments(next_experiments) for j, v in enumerate(self.experiment.domain.output_variables): if i > 0: fbest_old[j] = fbest[j] if v.maximize: fbest[j] = self.experiment.data[v.name].max() elif not v.maximize: fbest[j] = self.experiment.data[v.name].min() # Save state if save_freq is not None: file = save_dir / f"iteration_{i}.json" if i % save_freq == 0: self.save(file) compare = np.abs(fbest - fbest_old) > self.f_tol if all(compare) or i <= 1: nstop = 0 else: nstop += 1 if self.max_same is not None: if nstop >= self.max_same and self.restarts >= self.max_restarts: self.logger.info( f"{self.strategy.__class__.__name__} stopped after {i+1} iterations and {self.restarts} restarts." ) break elif nstop >= self.max_same: nstop = 0 prev_res = None self.strategy.reset() self.restarts += 1 # Save at end if save_at_end: file = save_dir / f"iteration_{i}.json" self.save(file)
def suggest_experiments(self, num_experiments, prev_res: DataSet = None, **kwargs): """Suggest experiments using TSEMO Parameters ---------- num_experiments : int The number of experiments (i.e., samples) to generate prev_res : :class:`~summit.utils.data.DataSet`, optional Dataset with data from previous experiments. If no data is passed, then latin hypercube sampling will be used to suggest an initial design. Returns ------- next_experiments : :class:`~summit.utils.data.DataSet` A Dataset object with the suggested experiments """ # Suggest lhs initial design or append new experiments to previous experiments if prev_res is None: lhs = LHS(self.domain) self.iterations += 1 k = num_experiments if num_experiments > 1 else 2 return lhs.suggest_experiments(k, criterion="maximin") elif self.iterations == 1 and len(prev_res) == 1: lhs = LHS(self.domain) self.iterations += 1 self.all_experiments = prev_res return lhs.suggest_experiments(num_experiments) elif prev_res is not None and self.all_experiments is None: self.all_experiments = prev_res elif prev_res is not None and self.all_experiments is not None: self.all_experiments = self.all_experiments.append(prev_res) # Get inputs (decision variables) and outputs (objectives) inputs, outputs = self.transform.transform_inputs_outputs( self.all_experiments, transform_descriptors=True) if inputs.shape[0] < self.domain.num_continuous_dimensions(): self.logger.warning( f"The number of examples ({inputs.shape[0]}) is less the number of input dimensions ({self.domain.num_continuous_dimensions()}." ) # Scale decision variables [0,1] inputs_scaled = (inputs - self.inputs_min.to_numpy()) / ( self.inputs_max.to_numpy() - self.inputs_min.to_numpy()) # Standardize objectives self.output_mean = outputs.mean() std = outputs.std() std[std < 1e-5] = 1e-5 self.output_std = std outputs_scaled = (outputs - self.output_mean.to_numpy() ) / self.output_std.to_numpy() # Set up models input_dim = self.kern_dim self.models = { v.name: gpr( inputs_scaled.to_numpy(), outputs_scaled[[v.name]].to_numpy(), kernel=self.kernel(input_dim=input_dim, ARD=True), ) for v in self.domain.output_variables } output_dim = len(self.domain.output_variables) rmse_train = np.zeros(output_dim) rmse_train_spectral = np.zeros(output_dim) lengthscales = [None for _ in range(output_dim)] variances = [None for _ in range(output_dim)] noises = [None for _ in range(output_dim)] rffs = [None for _ in range(output_dim)] i = 0 num_restarts = kwargs.get("num_restarts", 100) # This is a kwarg solely for debugging self.logger.debug( f"Fitting models (number of optimization restarts={num_restarts})\n" ) for name, model in self.models.items(): # Constrain hyperparameters model.kern.lengthscale.constrain_bounded(np.sqrt(1e-3), np.sqrt(1e3), warning=False) model.kern.lengthscale.set_prior(GPy.priors.LogGaussian(0, 10), warning=False) model.kern.variance.constrain_bounded(np.sqrt(1e-3), np.sqrt(1e3), warning=False) model.kern.variance.set_prior(GPy.priors.LogGaussian(-6, 10), warning=False) model.Gaussian_noise.constrain_bounded(np.exp(-6), 1, warning=False) # Train model model.optimize_restarts(num_restarts=num_restarts, max_iters=10000, parallel=True, verbose=False) # self.logger.info model hyperparameters lengthscales[i] = model.kern.lengthscale.values variances[i] = model.kern.variance.values[0] noises[i] = model.Gaussian_noise.variance.values[0] self.logger.debug(f"Model {name} lengthscales: {lengthscales[i]}") self.logger.debug(f"Model {name} variance: {variances[i]}") self.logger.debug(f"Model {name} noise: {noises[i]}") # Model validation rmse_train[i] = rmse( model.predict(inputs_scaled.to_numpy())[0], outputs_scaled[[name]].to_numpy(), mean=self.output_mean[name].values[0], std=self.output_std[name].values[0], ) self.logger.debug(f"RMSE train {name} = {rmse_train[i].round(2)}") # Spectral sampling self.logger.debug( f"Spectral sampling {name} with {self.n_spectral_points} spectral points." ) if type(model.kern) == GPy.kern.Exponential: matern_nu = 1 elif type(model.kern) == GPy.kern.Matern32: matern_nu = 3 elif type(model.kern) == GPy.kern.Matern52: matern_nu = 5 elif type(model.kern) == GPy.kern.RBF: matern_nu = np.inf else: raise TypeError( "Spectral sample currently only works with Matern type kernels, including RBF." ) for _ in range(self.n_retries): try: rffs[i] = pyrff.sample_rff( lengthscales=lengthscales[i], scaling=np.sqrt(variances[i]), noise=noises[i], kernel_nu=matern_nu, X=inputs_scaled.to_numpy(), Y=outputs_scaled[[name]].to_numpy()[:, 0], M=self.n_spectral_points, ) break except np.linalg.LinAlgError as e: self.logger.error(e) except ValueError as e: self.logger.error(e) if rffs[i] is None: raise RuntimeError( f"Spectral sampling failed after {self.n_retries} retries." ) sample_f = lambda x: np.atleast_2d(rffs[i](x)).T rmse_train_spectral[i] = rmse( sample_f(inputs_scaled.to_numpy()), outputs_scaled[[name]].to_numpy(), mean=self.output_mean[name].values[0], std=self.output_std[name].values[0], ) self.logger.debug( f"RMSE train spectral {name} = {rmse_train_spectral[i].round(2)}" ) i += 1 # Save spectral samples dp_results = get_summit_config_path() / "tsemo" / str(self.uuid_val) os.makedirs(dp_results, exist_ok=True) pyrff.save_rffs(rffs, pathlib.Path(dp_results, "models.h5")) # NSGAII internal optimisation self.logger.info("Optimizing models using NSGAII.") optimizer = NSGA2(pop_size=self.pop_size) problem = TSEMOInternalWrapper(pathlib.Path(dp_results, "models.h5"), self.domain, n_var=self.kern_dim) termination = get_termination("n_gen", self.generations) self.internal_res = minimize(problem, optimizer, termination, seed=1, verbose=False) X = DataSet(self.internal_res.X, columns=self.columns) y = DataSet(self.internal_res.F, columns=[v.name for v in self.domain.output_variables]) # Select points that give maximum hypervolume improvement if X.shape[0] != 0 and y.shape[0] != 0: self.hv_imp, indices = self._select_max_hvi( outputs_scaled, y, num_experiments) # Unscale data X = (X * (self.inputs_max.to_numpy() - self.inputs_min.to_numpy()) + self.inputs_min.to_numpy()) y = y * self.output_std.to_numpy() + self.output_mean.to_numpy() # Join to get single dataset with inputs and outputs result = X.join(y) result = result.iloc[indices, :] # Do any necessary transformations back result[("strategy", "METADATA")] = "TSEMO" result = self.transform.un_transform(result, transform_descriptors=True) # Add model hyperparameters as metadata columns self.iterations += 1 i = 0 for name, model in self.models.items(): result[(f"{name}_variance", "METADATA")] = variances[i] result[(f"{name}_noise", "METADATA")] = noises[i] for var, l in zip(self.domain.input_variables, lengthscales[i]): result[(f"{name}_{var.name}_lengthscale", "METADATA")] = l result[("iterations", "METADATA")] = self.iterations i += 1 return result else: self.logger.warning("No suggestions found.") self.iterations += 1 return None
def load(self, filepath=None): if filepath is None: filepath = get_summit_config_path() / "tsemo" / str(self.uuid_val) os.makedirs(filepath, exist_ok=True) filepath = filepath / "models.h5" self.rff = pyrff.load_rffs(filepath)[0]
def save(self, filepath=None): if filepath is None: filepath = get_summit_config_path() / "tsemo" / str(self.uuid_val) os.makedirs(filepath, exist_ok=True) filepath = filepath / "models.h5" pyrff.save_rffs([self.rff], filepath)