def test_rosenbrock_cma(self): # # test case from cma.test # rosenbrock = cma.ff.rosen ORDER = 6 # dimension of problem span = 2 # upper and lower limits lower_bound = -span * np.ones(ORDER) upper_bound = span * np.ones(ORDER) res = cma.fmin(rosenbrock, [-1] * ORDER, 0.01, options={ 'ftarget': 1e-6, 'bounds': [lower_bound, upper_bound], 'verb_time': 0, 'verb_disp': 0, 'seed': 3 }) # - res[0] (xopt) -- best evaluated solution # - res[1] (fopt) -- respective function value # - res[2] (evalsopt) -- respective number of function evaluations # - res[3] (evals) -- number of overall conducted objective function evaluations # - res[4] (iterations) -- number of overall conducted iterations # - res[5] (xmean) -- mean of the final sample distribution # - res[6] (stds) -- effective stds of the final sample distribution # - res[-3] (stop) -- termination condition(s) in a dictionary # - res[-2] (cmaes) -- class `CMAEvolutionStrategy` instance # - res[-1] (logger) -- class `CMADataLogger` instance xopt = res[0] fopt = res[1] assert_near_equal(fopt, 0.0, 1e-3) assert_near_equal(xopt, np.ones(ORDER), 1e-3) es = cma.CMAEvolutionStrategy([1] * ORDER, 1).optimize(rosenbrock) assert_near_equal(es.result.fbest, 0.0, 1e-3) assert_near_equal(es.result.xbest, np.ones(ORDER), 1e-3)
def __init__(self, *args, **kwargs): super(CMAES, self).__init__(*args, **kwargs) # Make sure batch size is larger than one assert self.batch_size > 1, "Batch size must be >1 for CMA-ES" # Set up CMA-ES options cmaes_options = {"popsize": self.batch_size, "CMA_diagonal": True} # Initialise mean and flatten it self.mean = self.initialise_mean() self.mean = np.reshape(self.mean, (self.mean.size,)) # We want to store original (list of batches) actions for tell self.orig_actions = [] # Initialise CMA-ES self.optimizer = cma.CMAEvolutionStrategy(self.mean, self.cfg.MODEL.POLICY.INITIAL_SD, inopts=cmaes_options) self.actions = []
def cmaes(func, initP, var=1): import cma es = cma.CMAEvolutionStrategy(initP, var) best_so_far = func(initP) best_params = initP while not es.stop(): solutions = es.ask() f_vals = [func(s) for s in solutions] es.tell(solutions, f_vals) if np.min(f_vals) < best_so_far: best_so_far = np.min(f_vals) best_params = solutions[np.argmin(f_vals)] print('CMAES found a new set of best params, achieving', best_so_far) print('params', best_params) es.logger.add() es.disp() es.result_pretty() return best_params
def fit_cartpole(): global _env hist = [] _env = gym.make('CartPole-v1') _env._max_episode_steps = 500 nn = SimpleNeuralControllerNumpy(4, 1, 2, 5) nn.init_random_params() es = cma.CMAEvolutionStrategy(nn.get_parameters(), 0.2) for _ in range(800): solutions = es.ask() es.tell(solutions, [eval_nn(x) for x in solutions]) hist.append(-es.result.fbest) _env.close() return hist, -es.result.fbest, es.result.xbest
def search_cmaes(args, state, log_dir): args.logger = f"{log_dir}/{EVAL_LOG}" if not args.continue_iter: options = {'bounds': [0, 1], "maxiter": args.max_iter, "popsize": args.pop_size} es = cma.CMAEvolutionStrategy(state.get_random_individual(), SIGMA, options) make_logger(log_dir) else: es = pickle.load(open(f'{log_dir}/{OPT_SAVE}', 'rb')) while not es.stop(): X = es.ask() es.tell(X, [evaluate(args, state, x) for x in X]) es.logger.add() pickle.dump(es, open(f'{log_dir}/{OPT_SAVE}', 'wb')) best, best_val = es.result[0], es.result[1] write_best(best, best_val, state, log_dir) es.logger.disp()
def experiment_main(_run, _seed, cmaes_sigma, evaluations, use_eval_seed, cmaes_popsize): param_count = mapgen.count_params() print('param_count:', param_count) _run.info['param_count'] = param_count opts = {} if cmaes_popsize: opts['popsize'] = cmaes_popsize opts['seed'] = _seed es = cma.CMAEvolutionStrategy(param_count * [0], cmaes_sigma, opts) evaluation = 0 iteration = 0 while evaluation < evaluations: solutions = es.ask() print('asked to evaluate', len(solutions), 'solutions') if use_eval_seed: eval_seed = np.random.randint(100_000) else: eval_seed = 0 rewards = [evaluate(x, eval_seed=eval_seed) for x in solutions] rewards = dask.compute(*rewards) evaluation += len(solutions) iteration += 1 print('evaluation', evaluation) print('computed rewards:', list(reversed(sorted(rewards)))) es.tell(solutions, [-r for r in rewards]) _run.log_scalar("training.min_reward", min(rewards), evaluation) _run.log_scalar("training.max_reward", max(rewards), evaluation) _run.log_scalar("training.med_reward", np.median(rewards), evaluation) _run.log_scalar("training.avg_reward", np.average(rewards), evaluation) _run.result = max(rewards) save_array('xbest.dat', es.result.xbest) if iteration % 20 == 0: save_array(f'xfavorite-eval%07d.dat' % evaluation, es.result.xfavorite) save_array(f'stds-eval%07d.dat' % evaluation, es.result.stds) es.disp()
def Init(self, data=None): if len(self.Min)>0 or len(self.Max)>0: self.CMAESOptions['bounds']= [self.Min,self.Max] if data is not None: InsertDict(self.CMAESOptions, data['options']) self.Mean= data['xmean'] self.Std= 1.0 self.CMAESOptions['scaling_of_variables']= data['stds'] self.es= cma.CMAEvolutionStrategy(self.Mean, self.Std, self.CMAESOptions) self.solutions= [] self.scores= [] if data is not None: self.solutions= data['solutions'] self.scores= data['scores'] self.Logger= data['logger'] self.tmpfp= file(self.Logger,'a') self.Generation= data['generation'] else: self.tmpfp= file(self.Logger,'w')
def do_fit(self, count, params=None, sigma=1, popsize=8, seed=123): # what is the order of params which position represents which params? if self.optimizer is None: if params is None: params = self.params.scaled bounds = self.params.scaled_bounds opts = dict(bounds=bounds, popsize=popsize, seed=seed) self.optimizer = cma.CMAEvolutionStrategy(params, sigma, opts) for i in range(count): if self.optimizer.stop(): break points = self.optimizer.ask() values = self.fitness_multi( points ) # runs simulation and computes total fitness across featuers. self.optimizer.tell(points, values) self.optimizer.logger.add() # write plottable data to disc. self.optimizer.disp()
def train(params): env, policy, iters, animate, ID = params obs_dim, act_dim = env.obs_dim, env.act_dim w = parameters_to_vector(policy.parameters()).detach().numpy() es = cma.CMAEvolutionStrategy(w, 0.5) f = f_wrapper(env, policy, animate) weight_decay = 0.005 print("Env: {}, Policy: {}, Action space: {}, observation space: {}," " N_params: {}, ID: {}, wd = {}, comments: ...".format( env.__class__.__name__, policy.__class__.__name__, act_dim, obs_dim, len(w), ID, weight_decay)) it = 0 try: while not es.stop(): it += 1 if it > iters: break if it % 200 == 0: sdir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "agents/{}_{}_{}_es.p".format(env.__class__.__name__, policy.__class__.__name__, ID)) vector_to_parameters(torch.from_numpy(es.result.xbest).float(), policy.parameters()) T.save(policy, sdir) print("Saved checkpoint, {}".format(sdir)) if weight_decay > 0: sol = es.mean sol_penalty = np.square(es.mean) * weight_decay es.mean = sol - sol_penalty * (sol > 0) + sol_penalty * (sol < 0) X = es.ask(number=40) es.tell(X, [f(x) for x in X]) es.disp() except KeyboardInterrupt: print("User interrupted process.") return es.result.fbest
def cma_es_optimizer(self): es = cma.CMAEvolutionStrategy([0] \ * self.n_tot_actions, 1) while (not es.stop()) and \ es.result.iterations <= self.opt_iter: X = es.ask() # get list of new solutions futures = [ rollout.remote(self.env, x, self.n_actions, self.look_ahead) for x in X ] costs = [-ray.get(id) for id in futures] es.tell(X, costs) # feed values es.disp() actions = [ es.result.xbest[i * self.n_actions : \ (i + 1) * self.n_actions] for i in range(self.look_ahead) ] return actions
def __init__(self, edges, signals, json_): self.edges = edges self.signals = signals self.json_ = json_ len_strategy_params = len(edges.keys()) self.minmax = {} self.sigma_fac = 1.0 self.ask_update = False bounds = [0, np.inf] # None sigma0 = json_['optimizer']['config']['sigma'] self.max_loop = json_['optimizer']['config']['max_loop'] pop_size = json_['optimizer']['config']['pop_size'] # initList = len_strategy_params * [ json_['optimizer']['config']['salmon_init_point'] ]+ len(signals) * [ json_['optimizer']['config']['bison_init_point'] ] initList = len_strategy_params * [ 10.0]+ len(signals) * [ 1.0 ] self.es = cma.CMAEvolutionStrategy(initList, sigma0, {'bounds':bounds, 'seed':20181210, 'popsize':pop_size, 'verbose':True}) self.pop_size = pop_size self.sigma0 = sigma0 self.original_sigma = sigma0 print ('real max_loop = {}'.format(self.max_loop)) self.df = pd.DataFrame()
def __init__( self, evaluator, cudas=["0"], popsize=1, output_dir=None, sigma=0.2, ): super(HAZero, self).__init__(evaluator, cudas, popsize, output_dir) self._sigma = sigma self.evolution_stratefy = cma.CMAEvolutionStrategy( self.init_input, sigma, { 'popsize': self.popsize, 'bounds': [-1, 1], 'AdaptSigma': True, 'verb_disp': 1, 'verb_time': 'True', })
def cmaUser(mse, lamb, sigma=0.5, dim=D): func = lambda p: mse(p) lb = [-float('Inf')] * (dim - 2) ub = [float('Inf')] * (dim - 2) lb.append(0) ub.append(float('Inf')) lb.append(0) ub.append(float('Inf')) es = cma.CMAEvolutionStrategy( dim * [0], sigma, { 'popsize': lamb, 'boundary_handling': 'BoundTransform ', 'bounds': [lb, ub] }) #es = cma.CMAEvolutionStrategy(dim * [0], sigma,{'popsize':lamb}) while not es.stop(): solutions = es.ask() values = map(func, solutions) es.tell(solutions, values) return es.result()[0], es.result()[1], lamb
def __init__(self, num_params, sigma_init=0.5, popsize=255, weight_decay=0.0): """ Sigma: initial standard deviation Popsize: size of the population Num_params: number of parameters in the candidate solution Weight_decay: modify the reward according the a weighted mean of the parameters """ self.num_params = num_params self.sigma_init = sigma_init self.popsize = popsize self.weight_decay = weight_decay self.solutions = None self.es = cma.CMAEvolutionStrategy(self.num_params * [0], self.sigma_init, {'popsize': self.popsize})
def train(self, runner): """Initialize variables and start training. Args: runner (LocalRunner): LocalRunner is passed to give algorithm the access to runner.step_epochs(), which provides services such as snapshotting and sampler control. Returns: The average return in last epoch cycle. """ init_mean = self.policy.get_param_values() self.es = cma.CMAEvolutionStrategy(init_mean, self.sigma0) self.all_params = self._sample_params() self.cur_params = self.all_params[0] self.policy.set_param_values(self.cur_params) self.all_returns = [] return super().train(runner)
def best_A_cmaes(self): # applies the cmaes fit: initialguess = 2*np.random.rand(self.scalar_numbers)-1 initialsigma = np.random.randint(1,5) try: res = cma.CMAEvolutionStrategy(initialguess, initialsigma, {'verb_disp': 0}).optimize(self.evaluation_target).result reco = res.xfavorite rec = [] for u in range(reco.size): rec.append(reco[u]) except (RuntimeWarning, RuntimeError, ValueError, ZeroDivisionError, OverflowError, SystemError, AttributeError): return False, [1]*self.scalar_numbers return True, rec
def runOpt(BPBBO): import numpy as np import cma print ("testOps: %s" % (BPBBO.__name__)) numThetas = 1 numQualities = 1 if BPBBO.__name__=="BPBBOPlain": nHidden = [3,2] else: nHidden = [2,1] nSteps = 10 def f(x): return ( (x-.1)**2 ).sum() def fitness(b, p): b.setParams(p) nSteps = b.size() outThetas = b.getOutThetas() for i in range(0, nSteps): b.query(i) np.copyto(b.getThetas(i), outThetas) q = f(outThetas[0]) b.getQualities(i)[0] = q return q b = BPBBO(numThetas, numQualities, nHidden) b.resize(nSteps) es = cma.CMAEvolutionStrategy([0]*b.numParams(), sigma0=.1, inopts={'seed':1}) for _ in range(1000): ps = es.ask() evals = np.array([fitness(b,p) for p in ps]) es.tell(ps, evals) assert( evals.mean() < 1e-4 )
def run_test(): env = gym.make(env_name) es = cma.CMAEvolutionStrategy(model.num_params * [0], 0.5) num_sols = 8 # inherent to library rewards = [] projs = np.array([]).reshape(0, 2) iters = 0 while not es.stop(): solutions = es.ask() loss = [simulate(x, env) for x in solutions] es.tell(solutions, loss) reward = -sum(loss) rewards.append(reward) best_sol = solutions[np.argmin(loss)] # proj weights mean_sol = np.mean(np.array(solutions), axis=0) proj = PCA( n_components=2).fit_transform(np.array(solutions) - mean_sol) projs = np.vstack((projs, proj)) es.logger.add() # es.disp() if iters % 10 == 0: visualize_env(best_sol) iters += 1 env.close() visualize_env(best_sol) def animate(i): ax1.clear() ax1.scatter(projs[:i * num_sols + 1, 0], projs[:i * num_sols + 1, 1]) fig = plt.figure() ax1 = fig.add_subplot(121) ani = animation.FuncAnimation(fig, animate, interval=10) ax2 = fig.add_subplot(122) ax2.plot(rewards) plt.show()
def minimize(self, fun, bounds, guess=None, sdevs=0.3, rg=Generator(MT19937()), store=None): lower = bounds.lb upper = bounds.ub guess = self.guess if not self.guess is None else guess if guess is None: guess = rg.uniform(lower, upper) max_evaluations = self.max_eval_num(store) input_sigma = self.sdevs if not self.sdevs is None else sdevs try: import cma except ImportError as e: raise ImportError("Please install CMA (pip install cma)") try: es = cma.CMAEvolutionStrategy( guess, 0.1, { 'bounds': [lower, upper], 'typical_x': guess, 'scaling_of_variables': scale(lower, upper), 'popsize': self.popsize, 'CMA_stds': input_sigma, 'verbose': -1, 'verb_disp': -1 }) evals = 0 for i in range(max_evaluations): X, Y = es.ask_and_eval(fun) es.tell(X, Y) evals += self.popsize if es.stop(): break if evals > max_evaluations: break return es.result.xbest, es.result.fbest, evals except Exception as ex: print(ex)
def CMA(): opt = cma.CMAOptions() opt['tolfun'] = 1e-11 opt['popsize'] = n_samples opt['maxiter'] = n_iters opt['bounds'] = [0, 10] mu = np.ones((dim)) * 5 es = cma.CMAEvolutionStrategy(mu, 2., opt) stats = { 'loss': [], 'theta': [], 'mintheta': [], } best_solution = None best_loss = np.inf t0 = time.time() for i in range(n_iters): solutions = es.ask() loss = evaluate(solutions) loss = np.array(loss) idx = np.argmin(loss) es.tell(solutions, loss) curr_best = np.array(solutions).mean(0) curr_min = np.array(solutions)[idx] stats['theta'].append(curr_best) stats['loss'].append(loss.mean()) stats['mintheta'].append(curr_min) print("[INFO] iter %2d | time %10.4f | avg loss %10.4f | min loss %10.4f" % ( i, time.time() - t0, loss.mean(), loss.min())) V = transform(curr_best) print(V) M = transform(curr_min) print(M) if (i+1) % 5 == 0: with open("./stats/{}.npy".format(ENV), 'w') as f: np.save(f, stats)
def simple_best_fit_sum_of_squares(self, x0=None, sigma0=0.1, cma_random_seed=123): opts = cma.CMAOptions() opts["seed"] = cma_random_seed if x0 is None: x0 = [2.5, 1.] es = cma.CMAEvolutionStrategy(x0, sigma0, opts) while not es.stop(): X = es.ask() es.tell(X, [ self.sum_of_square_diffs(*self.scale_params_for_cmaes(x)) for x in X ]) es.disp() res = es.result best_sigma = compute_best_sigma_analytic(res[1], len(self._data["Response"])) self._best_fit_params = np.concatenate( (self.scale_params_for_cmaes(res[0]), [best_sigma]))
def main(beta): optim = cma.CMAEvolutionStrategy(10 * [180], 45, {'bounds': [0, 360]}) for i in range(100): print "generation", i solutions = optim.ask() fitnesses = [] for soln in solutions: fitness = eval_config("test", beta, soln) minimize = 100000000 - fitness fitnesses.append(minimize) optim.tell(solutions, fitnesses) #write results bestgenome = list(optim.result()[0]) bestfit = 100000000 - optim.result()[1] print "best", bestfit, "genome", bestgenome beststr = str(bestfit) + " : " + str(bestgenome) + "\n" resfile = open("cmaresult"+str(beta)+".dat",'a') resfile.write(beststr) resfile.close()
def __init__(self, mu0, std0, popsize): """ Args: mu0 (list or ndarray): initial mean std0 (float): initial standard deviation popsize (int): population size """ self.mu0 = mu0 self.std0 = std0 self.popsize = popsize # Create CMA-ES instance import cma self.es = cma.CMAEvolutionStrategy(self.mu0, self.std0, {'popsize': self.popsize}) self.solutions = None
def main(): es = cma.CMAEvolutionStrategy(2 * [0], 0.5, { 'popsize': 8, 'maxfevals': 320, 'verb_disp': 1, 'seed': 3 }) # es = cma.purecma.CMAES(2 * [0], 0.5, popsize=8, maxfevals=16) t0 = time.time() while not es.stop(): solutions = es.ask() es.tell(solutions, torc.map(rosenbrock, solutions)) es.logger.add(es) # write data to disc to be plotted es.disp() t1 = time.time() print(es.result[0]) print(es.result[1]) print(t1 - t0) cma.plot()
def __init__( self, num_params, # number of model parameters solution_init=None, # starting point sigma_init=0.10, # initial standard deviation popsize=255, # population size weight_decay=0.01): # weight decay coefficient self.num_params = num_params self.sigma_init = sigma_init self.popsize = popsize self.weight_decay = weight_decay self.solutions = None if solution_init is None: solution_init = self.num_params * [0] import cma self.es = cma.CMAEvolutionStrategy(solution_init, self.sigma_init, { 'popsize': self.popsize, })
def minimize(func_name): config = configparser.ConfigParser() config_name = os.path.join(project_dir, 'objective_function/config/low_dim.ini') config.read(config_name, encoding='utf-8') print(config.sections()) optimal_position_address = os.path.join( project_dir, config.get(func_name, 'optimal_position_address')) dim_size = config.getint(func_name, 'dim_size') dim_regs = eval(config.get(func_name, 'dim_regs')) budget = config.getint(func_name, 'budget') repeat = 30 set_optimal_position(optimal_position_address) seed = 0 random.seed(seed) np.random.seed(seed) for i in range(repeat): init_pos = [ np.random.uniform(dim_regs[0], dim_regs[1]) for _ in range(dim_size) ] es = cma.CMAEvolutionStrategy(init_pos, 0.5) # doctest: +ELLIPSIS while get_cnt() < budget: solutions = es.ask() es.tell(solutions, [function_cmaes_dict[func_name](x) for x in solutions]) es.logger.add() epoch_first_items(budget) append_all_epoch() sol = es.result_pretty() es.result_pretty() all_epoch = np.array(get_all_epoch()) log_address = os.path.join(project_dir, 'pycma_exp/log/low_dim/') file_name = os.path.join(log_address, '{}_{}.txt'.format(obj_name, dim_size)) os.makedirs(log_address, exist_ok=True) np.savetxt(file_name, all_epoch) print(all_epoch.shape)
def __init__(self, unique_id, model, message, parameters): super().__init__(unique_id, model, message, parameters) # SISTER uses CMA-ES on float vectors, that are then converted # to trade plans. If an initial message is put on the board, # this is converted to a floatvec, and is used to initialize cma-es vs = self.vector_size() self.initialVec = np.random.uniform(low=0.0, high=1.0, size=(vs, )) if not message: self.message, float_vec = self.float_vec_to_trade_plan( self.initialVec) self.initial_trade_plan = None else: self.initial_trade_plan = copy.deepcopy(self.message) mask = copy.deepcopy(self.initial_trade_plan) self.message, float_vec = self.float_vec_to_trade_plan( self.initialVec, mask) self.float_vec = float_vec self.model.blackboard.append(self.message) seed = random.randint(1, 1000000) params = { 'bounds': [0.0, 1.0], 'seed': seed, 'CMA_elitist': self.parameters['elitist'] } self.es = cma.CMAEvolutionStrategy(self.initialVec, self.parameters['sigma'], params) self.solutions = self.es.ask() self.results = [] self.next_solution = 0 self.agiTokens = 0 self.max_buyer_score = 0 self.max_seller_score = 0 print("IN SISTER init," + self.b[self.unique_id]['label'])
def __init__(self, noise_dim): super(EvoGen, self).__init__() self.filename = os.path.join( os.path.dirname(os.path.realpath(__file__)), "assets/hf_gen.png") self.sdir = "terrain_evogen_es.p".format() self.noise_dim = noise_dim self.pop_size = 24 self.weight_decay = 0.01 self.convnet = ConvGen(self.noise_dim) self.w = parameters_to_vector( self.convnet.parameters()).detach().numpy() print("N_conv params: {}".format(len(self.w))) self.es = cma.CMAEvolutionStrategy(self.w, 0.5) self.candidates = self.es.ask(self.pop_size) # self.es.tell(self.candidates, [0.] * self.pop_size) self.candidate_scores = [] self.candidate_idx = 0
def sample_cma_es(self, start_from, save_cmaes_params=True): if self.es is None: cma_es_params = self.flat_cmaes_weights if 'zero' in start_from: cma_es_params.fill(0.) initial_sigma = 1 else: initial_sigma = self.exploration_sigma self.es = cma.CMAEvolutionStrategy(x0=cma_es_params, sigma0=initial_sigma) if len(self.cma_es_params_to_try) == 0: if len(self.tested_cma_es_params) > 0: if self.cma_es_asked: self._save_es_generation(save_cmaes_params) self.es.tell(self.tested_cma_es_params, self.tested_cma_es_params_fvals) self.tested_cma_es_params.clear() self.tested_cma_es_params_fvals.clear() self.cma_es_asked = False self.cma_es_params_to_try = self.es.ask() self.cma_es_asked = True self.flat_cmaes_weights = self.cma_es_params_to_try.pop()
def psetup_cmaes(self): xstart = [] print "Maximum number of repetitions: %d" % self.max_reps print "Config: %s" % repr(self.param_settings) for idx in range(len(self.param_names)): settings = self.param_settings[idx] name = self.param_names[idx] if settings['type'] == "static": continue self.minimum.append(self.param_settings[idx]['min']) self.maximum.append(self.param_settings[idx]['max']) if settings['scaling'] == "linear": print "Linear scaling for parameter %s. \n\ Shown are the bounds between which values have the same sensitivity " % name bounds = [(self.reverse_linear_scaling(self.minimum[-1], self.maximum[-1], x)) for x in range(11) ] # because scaling is from 0 to 10 print[("%.2f" % x) for x in bounds] elif settings['scaling'] == "log": print "Log scaling for parameter %s. \n\ Shown are the bounds between which values have the same sensitivity " % name bounds = [(self.reverse_log_scaling(self.minimum[-1], self.maximum[-1], x)) for x in range(11)] print[("%.2e" % x) for x in bounds] print "Number of dimensions: %d" % len(self.minimum) xstart = ones( len(self.minimum) ) # just a dummy, not used as first generation is sampled uniformly self.pcmaes = cma.CMAEvolutionStrategy( xstart, 2, {'bounds': [0, 10] }) # because everything is scaled to [0,10] anyways self.rewards_of_plays = [([]) for _ in range(self.pcmaes.popsize)] self.eval_log.write( 'countes #evaluations #atomic evals #cumulated atomic evals #evals without bandit\n' )