def generate_data(self, config: dict, n: int): """Generate samples and reward for MORE algorithm. Returns: samples (np.ndarrray) reward (np.ndarray) counter (list) : indicates how many times a sample was used """ amount = self.samples_per_iter if n == 0: amount = self.warm_start samples = self.search_dist.sample(amount) new_samples_counter = [[s, 0] for s in samples] self.total_samples += amount # different behaviour for test functions and reaching tasks if config.params.problem != "test_func": rewards = self.problem(samples)[0] else: rewards = self.problem(samples) if config.params.minimize: rewards = -rewards self.sample_db.add_data_with_counter(new_samples_counter, rewards) s, r, c = self.sample_db.get_data_with_counter() if n == 0 and config.params.no_pool: self.sample_db = SimpleSampleDatabase(self.samples_per_iter) return s, r, c
def initialize(self, config: dict, rep: int, logger: cw2.cw_data.cw_pd_logger.PandasLogger) -> None: """Initialize the MORE algorithm, surrogate model and problem. The MORE algorithm has a search distribution that has to be initialized and a surrogate model, which uses a sample database. Args: config (dict) : configuration dict from yaml file """ if config.params.warm_start: self.warm_start = config.params.warm_start self.pool_size = config.params.pool_size if config.params.optim_params.pool_size is not None: self.pool_size = config.params.optim_params.pool_size self.sample_db = SimpleSampleDatabase(self.pool_size) self.samples_per_iter = config.params.samples_per_iter if config.params.optim_params.samples_per_iter is not None: self.samples_per_iter = config.params.optim_params.samples_per_iter if config.params.seed: np.random.seed(12351) self.init_problem(config) self.init_more(config) self.init_surrogate(config)
elif sur == "BLR": surrogate = QuadModelSubBLR(dim, model_options_sub) config = {**config, **model_options_sub} if log_wandb: wandb.init(project="example_reaching", group="more", config=config, job_type=f'{sur}_{task}reach_{name}', name="rep_0") ################################################################################ ################################################################################ x_start = 0.5 * np.random.randn(dim) init_sigma = 1 sample_db = SimpleSampleDatabase(max_samples) search_dist = GaussFullCov(x_start, init_sigma * np.eye(dim)) more = MORE(dim, more_config, logger=logger) total_samples = 0 if task == "via": # env = DmpAsyncVectorEnv([make_viapointreacher_env(i, allow_self_collision=False) for i in range(n_cpus)], # n_samples=n_samples) env = DmpAsyncVectorEnv([ make_viapointreacher_env(i, allow_self_collision=allow_self_collision, weights=weights_scale, penalty=penalty) for i in range(n_cpus) ], n_samples=n_samples)
max_samples = 150 samples_per_iter = 15 model_options_sub = { "normalize_features": True, "normalize_output": None, # "mean_std", # "mean_std_clipped", # "rank", "mean_std", "min_max", } more_config = MORE.get_default_config() # borrowing Rosenbrock from the cma package objective = nfreefunclasses[7](0, zerof=True, zerox=True) objective.initwithsize(curshape=(1, dim), dim=dim) sample_db = SimpleSampleDatabase(max_samples) x_start = objective.x_opt + 0.1 * np.random.randn(dim) init_sigma = 1 search_dist = GaussFullCov(x_start, init_sigma**2 * np.eye(dim)) surrogate = QuadModelSubBLR(dim, model_options_sub) more = MORE(dim, more_config, logger=logger) for i in range(max_iters): logger.info("Iteration {}".format(i)) new_samples = search_dist.sample(samples_per_iter) new_rewards = objective(new_samples) if minimize:
def fmin(objective, x_start, init_sigma, n_iters, target_dist=1e-8, algo_config: dict = {}, model_config: dict = {}, sample_db_config: dict = {}, budget=None, debug=False, minimize=False): from more.sample_db import SimpleSampleDatabase from more.quad_model import QuadModelLS from more.gauss_full_cov import GaussFullCov import attrdict as ad import logging logging.basicConfig(level=logging.DEBUG if debug else logging.INFO) logger = logging.getLogger('MORE') if debug: logger.setLevel("DEBUG") else: logger.setLevel("INFO") dim = len(x_start) default_algo_config = MORE.get_default_config(dim) default_algo_config.update(algo_config) default_model_config = QuadModelLS.get_default_config() default_model_config.update(model_config) default_sample_db_config = SimpleSampleDatabase.get_default_config(dim) default_sample_db_config.update(sample_db_config) algo_config = ad.AttrDict(default_algo_config) model_config = ad.AttrDict(default_model_config) sample_db_config = ad.AttrDict(default_sample_db_config) sample_db = SimpleSampleDatabase(sample_db_config.max_samples) search_dist = GaussFullCov(x_start, init_sigma * np.eye(dim)) surrogate = QuadModelLS(dim, model_config) more = MORE(dim, algo_config, logger=logger) if budget is None: budget = np.inf it = 0 obj_evals = 0 dist_to_opt = 1e10 while dist_to_opt > target_dist and it < n_iters and obj_evals < budget: logger.info("Iteration {}".format(it)) new_samples = search_dist.sample(algo_config.samples_per_iter) obj_evals += algo_config.samples_per_iter new_rewards = objective(new_samples) if minimize: # negate, MORE maximizes, but we want to minimize new_rewards = -new_rewards sample_db.add_data(new_samples, new_rewards) if len(sample_db.data_x ) < model_config.min_data_frac * surrogate.model_dim: continue samples, rewards = sample_db.get_data() success = surrogate.update_quad_model( samples, rewards, search_dist, ) if not success: continue new_mean, new_cov, success = more.step(search_dist, surrogate) search_dist.update_params(new_mean, new_cov) lam = objective(search_dist.mean.T) logger.debug("Loss at mean {}".format(lam)) logger.debug("Change KL cov {}, Change Entropy {}".format( more._kl_cov, more.beta)) logger.debug("Dist to x_opt {}".format( np.linalg.norm(objective._xopt - search_dist.mean.flatten()))) dist_to_opt = np.abs((objective._fopt - lam)) logger.debug("Dist to f_opt {}".format(dist_to_opt)) logger.debug( "-------------------------------------------------------------------------------" ) if dist_to_opt < 1e-8: break it += 1 return dist_to_opt, search_dist.mean
class CWMORE(cw2.experiment.AbstractIterativeExperiment): """Implementation of MORE for running clusterwork 2 Experiments. Making it possible to use yaml configuration for running experiments and doing grid search for hyperparameters. """ def __init__(self): super().__init__() self.problem = None self.optimizer = None self.search_dist = None self.surrogate = None self.sample_db = None self.total_samples = 0 self.samples_per_iter = None self.pool_size = None self.warm_start = None self.lin_terms = [] self.quad_terms = [] self.params = [] def init_problem(self, config: dict) -> None: """Initialize the problem, either a 2D reaching task or optimization test function. Args: config (dict) : dict containing parameters for configuration """ problem = config["params"]["problem"] n_samples = self.samples_per_iter n_cpus = 16 # reaching task uses parallelization if problem == "via": self.problem = DmpAsyncVectorEnv([make_viapointreacher_env(i) for i in range(n_cpus)], n_samples=n_samples) elif problem == "hole": self.problem = DmpAsyncVectorEnv([make_holereacher_env(i) for i in range(n_cpus)], n_samples) elif problem == "test_func": dim = config["params"]["dim"] self.problem = nfreefunclasses[config["params"]["objective"]](0, zerof=True, zerox=True) self.problem.initwithsize(curshape=(1, dim), dim=dim) def init_more(self, config: dict) -> None: """Initializes the MORE algorithm. When optimizing parameters we have to update the corresponding values. Args: config (dict) : configuration from yaml file """ more_config = {"epsilon": config["params"]["kl_bound"], "dim": config["params"]["dim"], "gamma": config["params"]["gamma"], "beta_0": config["params"]["entropy_loss_bound"], "minimize": config["params"]["minimize"]} self.optimizer = MORE(config["params"]["dim"], more_config, logger=None) x_start = 0.5 * np.random.randn(config["params"]["dim"]) init_sigma = 1 self.search_dist = GaussFullCov( x_start, init_sigma * np.eye(config["params"]["dim"])) def init_surrogate(self, config: dict): """Initializes the surrogate model. When optimizing parameters (for example with grid search) we have to update the corresponding values in the options for the surrogate models. Args: config (dict) : configuration from yaml file """ sur = config["params"]["surrogate"] if sur == "BLR": blr_options = {"normalize_features": True, "normalize_output": None} self.surrogate = QuadModelSubBLR(config["params"]["dim"], blr_options) elif sur == "LS": buffer_fac = 1.5 ls_options = {"max_samples": self.pool_size, "output_weighting": "rank", # "rank", "whiten_input": True, "normalize_features": True, "normalize_output": config["params"]["normalize_output"], # "mean_std", "mean_std_clipped", # "rank", "mean_std", "min_max", "top_data_fraction": 0.5, "min_clip_value": -3., "unnormalize_output": False, # "rank", "ridge_factor": 1e-12, "limit_model_opt": True, "refit": False, "buffer_fac": buffer_fac, "seed": None} self.surrogate = QuadModelLS(config["params"]["dim"], ls_options) elif sur == "RLS": rls_options = {"whiten_input": config["params"]["whiten"], "norm_feat": False, "unnorm_feat": False, "norm_out": False, "norm_type": "moving", # moving (average with weighting), running (mean of all samples) "weighting": config["params"]["weighting"], "cov": config["params"]["cov"], "std": config["params"]["std"], "delta": config["params"]["delta"], "alpha": config["params"]["alpha"], "spi": config["params"]["samples_per_iter"], "K": config["params"]["K"], "unnormalize_output": False, "output_weighting": False} self.surrogate = QuadModelRLS(config["params"]["dim"], rls_options) def initialize(self, config: dict, rep: int, logger: cw2.cw_data.cw_pd_logger.PandasLogger) -> None: """Initialize the MORE algorithm, surrogate model and problem. The MORE algorithm has a search distribution that has to be initialized and a surrogate model, which uses a sample database. Args: config (dict) : configuration dict from yaml file """ self.warm_start = config["params"]["warm_start"] self.pool_size = config["params"]["sample_pool"] self.sample_db = SimpleSampleDatabase(self.pool_size) if config["params"]["seed"]: np.random.seed(12312) self.samples_per_iter = config["params"]["samples_per_iter"] self.init_problem(config) self.init_more(config) self.init_surrogate(config) def generate_data(self, config: dict, n: int): """Generate samples and reward for one MORE iteration. Returns: samples (np.ndarrray) reward (np.ndarray) counter (list) : indicates how many times a sample was used """ amount = self.samples_per_iter if n == 0: amount = self.warm_start samples = self.search_dist.sample(amount) new_samples_counter = [[s, 0] for s in samples] self.total_samples += amount # different behaviour for test functions and reaching tasks if config["params"]["problem"] != "test_func": rewards = self.problem(samples)[0] else: rewards = self.problem(samples) if config["params"]["minimize"]: rewards = -rewards self.sample_db.add_data_with_counter(new_samples_counter, rewards) return self.sample_db.get_data_with_counter() def iterate(self, config: dict, rep: int, n: int) -> dict: """Do one iteration of MORE algorithm. Draw samples from search distribution, evaluate the samples on the problem and get corresponding reward. Use the samples and reward to estimate surrogate model. Do one MORE step, using the surrogate model to solve the optimization problem and updating the search distribution. Args: config (dict) : configuration dict from yaml file rep (int) : current repition of the experiment n (int) : current iteration of MORE algorithm Returns: (dict) : results (for example loss) from this iteration """ samples, rewards, counter = self.generate_data(config, n) if config["params"]["surrogate"] == "RLS": success = self.surrogate.fit(samples, rewards, self.search_dist, counter) else: success = self.surrogate.fit(samples, rewards, self.search_dist) if not success: return {"success": False} new_mean, new_cov, success = self.optimizer.step(self.search_dist, self.surrogate) if success: try: self.search_dist.update_params(new_mean, new_cov) except Exception as E: print(E) lam = self.problem(self.search_dist.mean.T) # different reward for test functions and reaching tasks if config["params"]["problem"] == "test_func": lam = np.abs((self.problem._fopt - lam)) else: lam = lam[0].item() results_dict = {"loss_at_mean": lam, "kl": self.optimizer._kl, "parameter": self.search_dist.mean.T, "entropy": self.search_dist.entropy, "total_samples": self.total_samples,} return results_dict def save_state(self, config: dict, rep: int, n: int) -> None: pass def finalize(self, surrender = None, crash: bool = False) -> dict: pass