Example #1
0
    def generate_data(self, config: dict, n: int):
        """Generate samples and reward for MORE algorithm.

        Returns:
            samples (np.ndarrray)
            reward (np.ndarray)
            counter (list) : indicates how many times a sample was used
        """
        amount = self.samples_per_iter
        if n == 0:
            amount = self.warm_start
        samples = self.search_dist.sample(amount)
        new_samples_counter = [[s, 0] for s in samples]
        self.total_samples += amount

        # different behaviour for test functions and reaching tasks
        if config.params.problem != "test_func":
            rewards = self.problem(samples)[0]
        else:
            rewards = self.problem(samples)
        if config.params.minimize:
            rewards = -rewards

        self.sample_db.add_data_with_counter(new_samples_counter, rewards)

        s, r, c = self.sample_db.get_data_with_counter()

        if n == 0 and config.params.no_pool:
            self.sample_db = SimpleSampleDatabase(self.samples_per_iter)
        return s, r, c
Example #2
0
    def initialize(self, config: dict, rep: int, logger: cw2.cw_data.cw_pd_logger.PandasLogger) -> None:
        """Initialize the MORE algorithm, surrogate model and problem.

        The MORE algorithm has a search distribution that has to be initialized and
        a surrogate model, which uses a sample database.

        Args:
            config (dict) : configuration dict from yaml file
        """
        if config.params.warm_start:
            self.warm_start = config.params.warm_start
        self.pool_size = config.params.pool_size
        if config.params.optim_params.pool_size is not None:
            self.pool_size = config.params.optim_params.pool_size
        self.sample_db = SimpleSampleDatabase(self.pool_size)

        self.samples_per_iter = config.params.samples_per_iter
        if config.params.optim_params.samples_per_iter is not None:
            self.samples_per_iter = config.params.optim_params.samples_per_iter

        if config.params.seed:
            np.random.seed(12351)

        self.init_problem(config)
        self.init_more(config)
        self.init_surrogate(config)
    elif sur == "BLR":
        surrogate = QuadModelSubBLR(dim, model_options_sub)
        config = {**config, **model_options_sub}

    if log_wandb:
        wandb.init(project="example_reaching",
                   group="more",
                   config=config,
                   job_type=f'{sur}_{task}reach_{name}',
                   name="rep_0")
    ################################################################################
    ################################################################################

    x_start = 0.5 * np.random.randn(dim)
    init_sigma = 1
    sample_db = SimpleSampleDatabase(max_samples)
    search_dist = GaussFullCov(x_start, init_sigma * np.eye(dim))
    more = MORE(dim, more_config, logger=logger)

    total_samples = 0

    if task == "via":
        # env = DmpAsyncVectorEnv([make_viapointreacher_env(i, allow_self_collision=False) for i in range(n_cpus)],
        # n_samples=n_samples)
        env = DmpAsyncVectorEnv([
            make_viapointreacher_env(i,
                                     allow_self_collision=allow_self_collision,
                                     weights=weights_scale,
                                     penalty=penalty) for i in range(n_cpus)
        ],
                                n_samples=n_samples)
Example #4
0
    max_samples = 150
    samples_per_iter = 15

    model_options_sub = {
        "normalize_features": True,
        "normalize_output":
        None,  # "mean_std",  # "mean_std_clipped",  # "rank", "mean_std", "min_max",
    }

    more_config = MORE.get_default_config()

    # borrowing Rosenbrock from the cma package
    objective = nfreefunclasses[7](0, zerof=True, zerox=True)
    objective.initwithsize(curshape=(1, dim), dim=dim)

    sample_db = SimpleSampleDatabase(max_samples)

    x_start = objective.x_opt + 0.1 * np.random.randn(dim)
    init_sigma = 1

    search_dist = GaussFullCov(x_start, init_sigma**2 * np.eye(dim))
    surrogate = QuadModelSubBLR(dim, model_options_sub)

    more = MORE(dim, more_config, logger=logger)

    for i in range(max_iters):
        logger.info("Iteration {}".format(i))
        new_samples = search_dist.sample(samples_per_iter)

        new_rewards = objective(new_samples)
        if minimize:
Example #5
0
def fmin(objective,
         x_start,
         init_sigma,
         n_iters,
         target_dist=1e-8,
         algo_config: dict = {},
         model_config: dict = {},
         sample_db_config: dict = {},
         budget=None,
         debug=False,
         minimize=False):

    from more.sample_db import SimpleSampleDatabase
    from more.quad_model import QuadModelLS
    from more.gauss_full_cov import GaussFullCov
    import attrdict as ad
    import logging

    logging.basicConfig(level=logging.DEBUG if debug else logging.INFO)
    logger = logging.getLogger('MORE')
    if debug:
        logger.setLevel("DEBUG")
    else:
        logger.setLevel("INFO")

    dim = len(x_start)
    default_algo_config = MORE.get_default_config(dim)
    default_algo_config.update(algo_config)
    default_model_config = QuadModelLS.get_default_config()
    default_model_config.update(model_config)
    default_sample_db_config = SimpleSampleDatabase.get_default_config(dim)
    default_sample_db_config.update(sample_db_config)

    algo_config = ad.AttrDict(default_algo_config)
    model_config = ad.AttrDict(default_model_config)
    sample_db_config = ad.AttrDict(default_sample_db_config)

    sample_db = SimpleSampleDatabase(sample_db_config.max_samples)

    search_dist = GaussFullCov(x_start, init_sigma * np.eye(dim))
    surrogate = QuadModelLS(dim, model_config)

    more = MORE(dim, algo_config, logger=logger)

    if budget is None:
        budget = np.inf
    it = 0
    obj_evals = 0
    dist_to_opt = 1e10

    while dist_to_opt > target_dist and it < n_iters and obj_evals < budget:
        logger.info("Iteration {}".format(it))
        new_samples = search_dist.sample(algo_config.samples_per_iter)
        obj_evals += algo_config.samples_per_iter

        new_rewards = objective(new_samples)
        if minimize:
            # negate, MORE maximizes, but we want to minimize
            new_rewards = -new_rewards

        sample_db.add_data(new_samples, new_rewards)

        if len(sample_db.data_x
               ) < model_config.min_data_frac * surrogate.model_dim:
            continue

        samples, rewards = sample_db.get_data()

        success = surrogate.update_quad_model(
            samples,
            rewards,
            search_dist,
        )
        if not success:
            continue

        new_mean, new_cov, success = more.step(search_dist, surrogate)

        search_dist.update_params(new_mean, new_cov)

        lam = objective(search_dist.mean.T)
        logger.debug("Loss at mean {}".format(lam))
        logger.debug("Change KL cov {}, Change Entropy {}".format(
            more._kl_cov, more.beta))
        logger.debug("Dist to x_opt {}".format(
            np.linalg.norm(objective._xopt - search_dist.mean.flatten())))

        dist_to_opt = np.abs((objective._fopt - lam))
        logger.debug("Dist to f_opt {}".format(dist_to_opt))
        logger.debug(
            "-------------------------------------------------------------------------------"
        )

        if dist_to_opt < 1e-8:
            break

        it += 1

    return dist_to_opt, search_dist.mean
Example #6
0
class CWMORE(cw2.experiment.AbstractIterativeExperiment):
    """Implementation of MORE for running clusterwork 2 Experiments.

    Making it possible to use yaml configuration for running experiments
    and doing grid search for hyperparameters.
    """
    def __init__(self):
        super().__init__()
        self.problem = None
        self.optimizer = None
        self.search_dist = None
        self.surrogate = None
        self.sample_db = None
        self.total_samples = 0
        self.samples_per_iter = None
        self.pool_size = None
        self.warm_start = None
        self.lin_terms = []
        self.quad_terms = []
        self.params = []


    def init_problem(self, config: dict) -> None:
        """Initialize the problem, either a 2D reaching task or optimization
           test function.

        Args:
            config (dict) : dict containing parameters for configuration
        """
        problem = config["params"]["problem"]
        n_samples = self.samples_per_iter
        n_cpus = 16 # reaching task uses parallelization

        if problem == "via":
            self.problem = DmpAsyncVectorEnv([make_viapointreacher_env(i) for i in range(n_cpus)],
                                    n_samples=n_samples)
        elif problem == "hole":
            self.problem = DmpAsyncVectorEnv([make_holereacher_env(i) for i in range(n_cpus)],
                                    n_samples)
        elif problem == "test_func":
            dim = config["params"]["dim"]
            self.problem = nfreefunclasses[config["params"]["objective"]](0, zerof=True, zerox=True)
            self.problem.initwithsize(curshape=(1, dim), dim=dim)


    def init_more(self, config: dict) -> None:
        """Initializes the MORE algorithm.

        When optimizing parameters we have to update the corresponding values.
        Args:
            config (dict) : configuration from yaml file
        """
        more_config = {"epsilon": config["params"]["kl_bound"],
                       "dim": config["params"]["dim"],
                       "gamma": config["params"]["gamma"],
                       "beta_0": config["params"]["entropy_loss_bound"],
                       "minimize": config["params"]["minimize"]}

        self.optimizer = MORE(config["params"]["dim"], more_config, logger=None)
        x_start = 0.5 * np.random.randn(config["params"]["dim"])
        init_sigma = 1
        self.search_dist = GaussFullCov(
            x_start, init_sigma * np.eye(config["params"]["dim"]))


    def init_surrogate(self, config: dict):
        """Initializes the surrogate model.

        When optimizing parameters (for example with grid search) we have to
        update the corresponding values in the options for the surrogate models.

        Args:
            config (dict) : configuration from yaml file
        """
        sur = config["params"]["surrogate"]
        if sur == "BLR":
            blr_options = {"normalize_features": True,
                           "normalize_output": None}
            self.surrogate = QuadModelSubBLR(config["params"]["dim"], blr_options)
        elif sur == "LS":
            buffer_fac = 1.5
            ls_options = {"max_samples": self.pool_size,
                          "output_weighting": "rank",  # "rank",
                          "whiten_input": True,
                          "normalize_features": True,
                          "normalize_output": config["params"]["normalize_output"],
                          # "mean_std", "mean_std_clipped",  # "rank", "mean_std", "min_max",
                          "top_data_fraction": 0.5,
                          "min_clip_value": -3.,
                          "unnormalize_output": False,  # "rank",
                          "ridge_factor": 1e-12,
                          "limit_model_opt": True,
                          "refit": False,
                          "buffer_fac": buffer_fac,
                          "seed": None}
            self.surrogate = QuadModelLS(config["params"]["dim"], ls_options)
        elif sur == "RLS":
            rls_options = {"whiten_input": config["params"]["whiten"],
                           "norm_feat": False,
                           "unnorm_feat": False,
                           "norm_out": False,
                           "norm_type": "moving", # moving (average with weighting), running (mean of all samples)
                           "weighting": config["params"]["weighting"],
                           "cov": config["params"]["cov"],
                           "std": config["params"]["std"],
                           "delta": config["params"]["delta"],
                           "alpha": config["params"]["alpha"],
                           "spi": config["params"]["samples_per_iter"],
                           "K": config["params"]["K"],
                           "unnormalize_output": False,
                           "output_weighting": False}
            self.surrogate = QuadModelRLS(config["params"]["dim"], rls_options)


    def initialize(self, config: dict, rep: int, logger: cw2.cw_data.cw_pd_logger.PandasLogger) -> None:
        """Initialize the MORE algorithm, surrogate model and problem.

        The MORE algorithm has a search distribution that has to be initialized and
        a surrogate model, which uses a sample database.

        Args:
            config (dict) : configuration dict from yaml file
        """
        self.warm_start = config["params"]["warm_start"]
        self.pool_size = config["params"]["sample_pool"]
        self.sample_db = SimpleSampleDatabase(self.pool_size)

        if config["params"]["seed"]:
            np.random.seed(12312)

        self.samples_per_iter = config["params"]["samples_per_iter"]

        self.init_problem(config)
        self.init_more(config)
        self.init_surrogate(config)


    def generate_data(self, config: dict, n: int):
        """Generate samples and reward for one MORE iteration.

        Returns:
            samples (np.ndarrray)
            reward (np.ndarray)
            counter (list) : indicates how many times a sample was used
        """
        amount = self.samples_per_iter
        if n == 0:
            amount = self.warm_start
        samples = self.search_dist.sample(amount)
        new_samples_counter = [[s, 0] for s in samples]
        self.total_samples += amount

        # different behaviour for test functions and reaching tasks
        if config["params"]["problem"] != "test_func":
            rewards = self.problem(samples)[0]
        else:
            rewards = self.problem(samples)
        if config["params"]["minimize"]:
            rewards = -rewards

        self.sample_db.add_data_with_counter(new_samples_counter, rewards)
        return self.sample_db.get_data_with_counter()


    def iterate(self, config: dict, rep: int, n: int) -> dict:
        """Do one iteration of MORE algorithm.

        Draw samples from search distribution, evaluate the samples
        on the problem and get corresponding reward. Use the samples and
        reward to estimate surrogate model. Do one MORE step, using the
        surrogate model to solve the optimization problem and updating
        the search distribution.

        Args:
            config (dict) : configuration dict from yaml file
            rep (int) : current repition of the experiment
            n (int) : current iteration of MORE algorithm

        Returns:
            (dict) : results (for example loss) from this iteration
        """
        samples, rewards, counter = self.generate_data(config, n)

        if config["params"]["surrogate"] == "RLS":
            success = self.surrogate.fit(samples, rewards, self.search_dist, counter)
        else:
            success = self.surrogate.fit(samples, rewards, self.search_dist)
        if not success:
            return {"success": False}

        new_mean, new_cov, success = self.optimizer.step(self.search_dist, self.surrogate)
        if success:
            try:
                self.search_dist.update_params(new_mean, new_cov)
            except Exception as E:
                print(E)

        lam = self.problem(self.search_dist.mean.T)
        # different reward for test functions and reaching tasks
        if config["params"]["problem"] == "test_func":
            lam = np.abs((self.problem._fopt - lam))
        else:
            lam = lam[0].item()

        results_dict = {"loss_at_mean": lam,
                        "kl": self.optimizer._kl,
                        "parameter": self.search_dist.mean.T,
                        "entropy": self.search_dist.entropy,
                        "total_samples": self.total_samples,}
        return results_dict


    def save_state(self, config: dict, rep: int, n: int) -> None:
        pass


    def finalize(self, surrender = None, crash: bool = False) -> dict:
        pass