def main(): # run a linear regression test # d = 100 # lin_reg = LinearRegression(d=d) # opt = CMAES(d, m=np.random.rand(d), sig=0.1, fitness=lin_reg, log_dir='log/linreg') # opt.fit(200) # results = np.load('log/linreg/train_results.npy') # plt.xlabel('generation') # plt.ylabel('loss') # plt.title('CMA-ES on Linear Regression') # plt.plot(np.arange(len(results)), results) # plt.savefig('log/linreg/results.png') # # plt.show() # run a test on lunar lander lander = LunarLander(continuous=True) d = lander.mu.dim opt = CMAES(d, m=np.random.rand(d), lam=100, sig=0.5, fitness=lander, log_dir='log/lander', save_models=True) opt.fit(100) results = np.load('log/lander/train_results.npy')*-1 # multiply by -1 becasue CMA-ES minimized negative reward so this switches it back plt.xlabel('generation') plt.ylabel('reward') plt.title('CMA-ES on LunarLander') plt.plot(np.arange(len(results)), results) plt.savefig('log/lander/results.png')
def evaluate(mode: str, dimensions: int = 10, iterations: int = 100, lambda_arg: int = 100, frequency: int = None, objectives: list = None): if objectives is None: objectives = ['quadratic', 'felli', 'bent'] ecdf_list = [] print("Starting evaluation...") print( f"mode: {mode}; dimensions: {dimensions}; frequency: {frequency}; iterations: {iterations}; " f"population: {lambda_arg}") for objective in objectives: print(" Currently running:", objective) for i in range(iterations): algo = CMAES(objective, dimensions, mode, frequency, lambda_arg) algo.generation_loop() ecdf_list.append(algo.ecdf(_TARGETS)) return _get_ecdf_data(ecdf_list)
def learn_representation(args, Q_data, TR_targets=None, fname="deafult.dat"): size = (125, 101, 3) dsize = (10, 10, 3) offset = size[0] * size[1] doffset = dsize[0] * dsize[1] width = 0.4 if args.rbf: kind = 'rbf' elif args.nrbf: kind = 'nrbf' else: kind = 'rbf' Q_init = np.zeros(Q_data.size) start = time.time() Q_hat, F_hat = mp_cma_run(args, Q_data, Q_init, TR_targets, size, dsize, width, kind) print("Required time: {}".format(time.time() - start)) # Saving Q_hat.tofile("policies/q_{}".format(fname)) F_hat.tofile("policies/f_{}".format(fname)) print("Saving complete") mp_size = (size[0], size[1], 1) mp_dsize = (dsize[0], dsize[1], 1) with CMAES(mp_size, mp_dsize, width, kind, name='plotting') as cmaes: for i in range(0, 1): q_init = Q_init[offset * i:offset * (i + 1)] show_grid_representation(q_init, (0, 1), (125, 101, 1)) q_data = Q_data[offset * i:offset * (i + 1)] show_grid_representation(q_data, (0, 1), (125, 101, 1)) q_hat = Q_hat[offset * i:offset * (i + 1)] show_grid_representation(q_hat, (0, 1), (125, 101, 1)) q_hat_ff = cmaes.evaluate(F_hat[doffset * i:doffset * (i + 1)]) show_grid_representation(q_hat_ff, (0, 1), (125, 101, 1)) assert (q_hat == q_hat_ff).all() if TR_targets is not None: tr_idxs = np.nonzero(TR_targets[:, 2] == i)[0] tr_target = np.copy(TR_targets[tr_idxs, :]) c_tr = cmaes.tr_cost(q_data, tr_target, size[0]) print("Current tr cost {}".format(c_tr)) c_tr = cmaes.tr_cost(q_hat, tr_target, size[0]) print("Next tr cost {}".format(c_tr)) waitforbuttonpress()
def mp_run(q_datas, q_inits, tr_targets, size, dsize, width, kind, n): q_data = q_datas[n] q_init = q_inits[n] tr_target = tr_targets[n] th_name = multiprocessing.current_process().name with CMAES(size, dsize, width, kind, name=th_name) as cmaes: f_init = cmaes.initial(q_init) f_hat, cost0, cost1 = cmaes.optimize(q_data, f_init, tr_target) q_hat_ref = cmaes.evaluate(f_hat) q_hat = np.copy(q_hat_ref) cost0_ = np.array(cost0) cost1_ = np.array(cost1) print("\tInitial {} cost: {}, {}, {}".format(th_name, cost0_[0], cost0_[1], cost0_[2])) print("\tFinal {} cost: {}, {}, {}".format(th_name, cost1_[0], cost1_[1], cost1_[2])) return (q_hat, f_hat)
def cma_test(): size = (125, 101, 1) dsize = (3, 2, 1) width = 0.4 kind = 'rbf' f_true = np.array([0, 500, 0, 0, 0, -500], dtype='float64') #f_true = np.zeros((dsize[0]*dsize[1],), dtype='float64') with CMAES(size, dsize, width, kind, name='cma_test') as cmaes: q_data_ref = cmaes.evaluate(f_true) q_data = np.copy(q_data_ref) Q_data = np.tile(q_data, 3) TR_targets = prepare_targets( Q_data, "pendulum_sarsa_grid_rand_play-test-0.csv", 0.97) tr_idxs = np.nonzero(TR_targets[:, 2] == 0)[0] #tr_idxs = [tr_idxs[0]] tr_target = TR_targets[tr_idxs, :] f_init = np.zeros(f_true.shape) q_init_ref = cmaes.evaluate(f_init) q_init = np.copy(q_init_ref) c_tr = cmaes.tr_cost(q_init, tr_target, size[0]) print("Initial tr cost {}".format(c_tr)) f_hat = cmaes.optimize(q_data, f_init, tr_target) q_hat = cmaes.evaluate(f_hat) print(np.linalg.norm(q_hat - q_data) + 1 * np.linalg.norm(f_hat)) print(cmaes.objective(f_hat, q_data)) show_grid_representation(q_init, (0, 1), (size[0], size[1], 1)) show_grid_representation(q_data, (0, 1), (size[0], size[1], 1)) show_grid_representation(q_hat, (0, 1), (size[0], size[1], 1)) plt.scatter(tr_target[:, 0], tr_target[:, 1], c='k', s=40, marker='+') c_tr = cmaes.tr_cost(q_hat, tr_target, size[0]) print("Final tr cost {}".format(c_tr)) waitforbuttonpress()
def test_compare_qf(fname): size = (125, 101, 3) dsize = (10, 10, 3) offset = size[0] * size[1] with CMAES(size, dsize, width=0.4, kind='rbf') as cmaes: q0 = load_grid_representation("policies/q_{}".format(fname)) f0 = np.fromfile("policies/f_{}".format(fname)) q0_ref = cmaes.evaluate(f0) csv_data = csv_read( ["trajectories/pendulum_sarsa_grid_rand_play-test-0.csv"]) tr = load_trajectories(csv_data) see_by_layers(q0, tr, offset) see_by_layers(q0_ref, tr, offset) p0 = calc_grid_policy(q0, (0, 1), (125, 101, 3)) show_grid_representation(p0, (0, 1), (125, 101, 1)) plt.scatter(tr[:, 0], tr[:, 1], c='w', s=40, marker='+') plt.waitforbuttonpress()
def rbf_test(): size = (125, 101, 3) dsize = (10, 10, 3) dnum = np.prod(dsize) offset = size[0] * size[1] with CMAES(size, dsize, width=0.4, kind='rbf') as cmaes: #f_init = 500*np.random.uniform(-1, 1, size=(1, dnum)) f_init = np.ones([ dnum, ]) * 0 f_init[0, ] = -500 f_init[1, ] = 500 q_init_ref = cmaes.evaluate(f_init) for i in range(1): show_grid_representation(q_init_ref[offset * i:offset * (i + 1)], (0, 1), (size[0], size[1], 1)) #q_init_ref.tofile("q_rbf_test.dat") #f_init.tofile("f_rbf_test.dat") waitforbuttonpress()
def __init__(self, train_featurizer=False): self.cmaes = CMAES(train_featurizer)
def main(argv): global maxsteps global environment global filedir global saveeach global nrobots global algoname argc = len(argv) # if called without parameters display help information if (argc == 1): helper() sys.exit(-1) # Default parameters: filename = None # configuration file cseed = 1 # seed nreplications = 1 # nreplications filedir = './' # directory testfile = None # file containing the policy to be tested test = 0 # whether we rewant to test a policy (1=show behavior, 2=show neurons) displayneurons = 0 # whether we want to display the activation state of the neurons useTf = False # whether we want to use tensorflow to implement the policy i = 1 while (i < argc): if (argv[i] == "-f"): i += 1 if (i < argc): filename = argv[i] i += 1 elif (argv[i] == "-s"): i += 1 if (i < argc): cseed = int(argv[i]) i += 1 elif (argv[i] == "-n"): i += 1 if (i < argc): nreplications = int(argv[i]) i += 1 elif (argv[i] == "-a"): i += 1 if (i < argc): algorithm = argv[i] i += 1 elif (argv[i] == "-t"): i += 1 test = 1 if (i < argc): testfile = argv[i] i += 1 elif (argv[i] == "-T"): i += 1 test = 2 displayneurons = 1 if (i < argc): testfile = argv[i] i += 1 elif (argv[i] == "-d"): i += 1 if (i < argc): filedir = argv[i] i += 1 elif (argv[i] == "-tf"): i += 1 useTf = True else: # We simply ignore the argument print("\033[1mWARNING: unrecognized argument %s \033[0m" % argv[i]) i += 1 # load the .ini file if filename is not None: parseConfigFile(filename) else: print("\033[1mERROR: You need to specify an .ini file\033[0m" % filename) sys.exit(-1) # if a directory is not specified, we use the current directory if filedir is None: filedir = scriptdirname # check whether the user specified a valid algorithm availableAlgos = ('CMAES', 'Salimans', 'xNES', 'sNES', 'SSS', 'pepg', 'coevo2', 'coevo2r') if algoname not in availableAlgos: print("\033[1mAlgorithm %s is unknown\033[0m" % algoname) print("Please use one of the following algorithms:") for a in availableAlgos: print("%s" % a) sys.exit(-1) print("Environment %s nreplications %d maxmsteps %dm " % (environment, nreplications, maxsteps / 1000000)) env = None policy = None # Evorobot Environments (we expect observation and action made of numpy array of float32) if "Er" in environment: ErProblem = __import__(environment) env = ErProblem.PyErProblem() # Create a new doublepole object #action_space = spaces.Box(-1., 1., shape=(env.noutputs,), dtype='float32') #observation_space = spaces.Box(-np.inf, np.inf, shape=(env.ninputs,), dtype='float32') ob = np.arange(env.ninputs * nrobots, dtype=np.float32) ac = np.arange(env.noutputs * nrobots, dtype=np.float32) done = np.arange(1, dtype=np.int32) env.copyObs(ob) env.copyAct(ac) env.copyDone(done) from policy import ErPolicy policy = ErPolicy(env, env.ninputs, env.noutputs, env.low, env.high, ob, ac, done, filename, cseed, nrobots, heterogeneous, test) # Bullet environment (we expect observation and action made of numpy array of float32) if "Bullet" in environment: import gym from gym import spaces import pybullet import pybullet_envs # import balance_bot env = gym.make(environment) # Define the objects required (they depend on the environment) ob = np.arange(env.observation_space.shape[0], dtype=np.float32) ac = np.arange(env.action_space.shape[0], dtype=np.float32) # Define the policy from policy import BulletPolicy policy = BulletPolicy(env, env.observation_space.shape[0], env.action_space.shape[0], env.action_space.low[0], env.action_space.high[0], ob, ac, filename, cseed, nrobots, heterogeneous, test) # Gym environment (we expect observation and action made of numpy array of float64 or discrete actions) if (not "Bullet" in environment) and (not "Er" in environment): import gym from gym import spaces env = gym.make(environment) # Define the objects required (they depend on the environment) ob = np.arange(env.observation_space.shape[0], dtype=np.float32) if (isinstance(env.action_space, gym.spaces.box.Box)): ac = np.arange(env.action_space.shape[0], dtype=np.float32) else: ac = np.arange(env.action_space.n, dtype=np.float32) # Define the policy if (isinstance(env.action_space, gym.spaces.box.Box)): from policy import GymPolicy policy = GymPolicy(env, env.observation_space.shape[0], env.action_space.shape[0], env.action_space.low[0], env.action_space.high[0], ob, ac, filename, cseed, nrobots, heterogeneous, test) else: from policy import GymPolicyDiscr policy = GymPolicyDiscr(env, env.observation_space.shape[0], env.action_space.n, 0.0, 0.0, ob, ac, filename, cseed, nrobots, heterogeneous, test) policy.environment = environment policy.saveeach = saveeach # Create the algorithm class if (algoname == 'CMAES'): from cmaes import CMAES algo = CMAES(env, policy, cseed, filedir) elif (algoname == 'Salimans'): from salimans import Salimans algo = Salimans(env, policy, cseed, filedir) elif (algoname == 'xNES'): from xnes import xNES algo = xNES(env, policy, cseed, filedir) elif (algoname == 'sNES'): from snes import sNES algo = sNES(env, policy, cseed, filedir) elif (algoname == 'SSS'): from sss import SSS algo = SSS(env, policy, cseed, filedir) elif (algoname == 'coevo2'): from coevo2 import coevo2 algo = coevo2(env, policy, cseed, filedir) elif (algoname == 'coevo2r'): from coevo2r import coevo2 algo = coevo2(env, policy, cseed, filedir) elif (algoname == 'pepg'): from pepg import pepg algo = pepg(env, policy, cseed, filedir) # Set evolutionary variables algo.setEvoVars(sampleSize, stepsize, noiseStdDev, sameenvcond, wdecay, evalCenter, saveeachg, fromgeneration, crossoverrate) if (test > 0): # test a policy print("Run Test: Environment %s testfile %s" % (environment, testfile)) algo.test(testfile) else: # run evolution if (cseed != 0): print("Run Evolve: Environment %s Seed %d Nreplications %d" % (environment, cseed, nreplications)) for r in range(nreplications): algo.run(maxsteps) algo.seed += 1 policy.seed += 1 algo.reset() policy.reset() else: print("\033[1mPlease indicate the seed to run evolution\033[0m")
from cmaes import CMAES from features import simple_featurizer, dellacherie_featurizer, bcts_featurizer import numpy as np from envs.tetris import TetrisEnv from tqdm import tqdm N = 20 player = CMAES(bcts_featurizer) player.load('CMAES.csv') lines = [] env = TetrisEnv() for i in range(N): done = False env.reset() while not done and env.state.cleared < 100000: _, reward, done, info = env.step(player.act(env.state)) lines.append(env.state.cleared) print(env.state.cleared, 'moving avg', np.average(lines)) print('{} games played'.format(N)) print('Avg lines cleared', float(sum(lines)) / N) print('Max lines cleared', float(max(lines))) print('Lines cleared per game', lines)
def bayesOpt(inputs: np.ndarray, outputs: np.ndarray, func: Callable, theta_d: np.ndarray, num_iters: int, maximize: bool = True, chol: bool = False): """ Function which performs bayesian optimization using Gaussian Processes and the CMA-ES optimization algorithm. :param inputs: a NumPy array containing the previously explored values of the objective function. :param outputs: the values of the objective function evaluated at each input value. :param func: the objective function. :param theta_d: hyperparameter for the Gaussian Process. :param num_iters: number of GP iteration to run. :param maximize: indicate whether the objective function should be maximized (True) or minimized (False). :param chol: indicate whether to use Cholesky decomposition for GP evaluation (True) or to use matrix inversion (False). """ if len(inputs.shape) < 2: inputs = np.expand_dims(inputs, -1) if maximize: UCB = -np.inf else: UCB = np.inf x_t = np.zeros(inputs.shape[1]) t = 0 bestx_t = x_t bestUCB = UCB #GP iterations while t < num_iters: #this function computes the upper confidence value of the GP at x_t def kernelFunc(x_t: np.ndarray, maximize: bool, chol: bool): theta_0 = 0.1 kappa = 0.1 kernel = np.hstack( [inputs.reshape(inputs.shape[0], 1, inputs.shape[1])] * inputs.shape[0]) kernelT = np.transpose(kernel, (1, 0, 2)) #RBF kernel for covariance matrix K = theta_0 * np.exp(-0.5 * np.sum( np.power(kernel - kernelT, 2.) / np.power(theta_d, 2.), axis=-1)) # K = theta_0 * np.fromfunction(lambda i, j: np.exp( -0.5 * np.sum(np.power(inputs[i.astype(np.int),:] - inputs[j.astype(np.int),:], 2.) / np.power(theta_d, 2.), axis=-1)), # (25,25), dtype=inputs.dtype) K_t = theta_0 * np.exp(-0.5 * np.sum( np.power(inputs - x_t, 2.) / np.power(theta_d, 2.), axis=-1)) K_tt = theta_0 * (np.exp(-0.5 * np.sum( np.power(x_t - x_t, 2.) / np.power(theta_d, 2.), axis=-1)) + np.random.normal(0, 1e-5)) if chol: L = np.linalg.cholesky(K + 1e-3 * np.eye(inputs.shape[0])) alpha = np.linalg.solve(L.T, np.linalg.solve(L, outputs)) mean = K_t.dot(alpha) v = np.linalg.solve(L, K_t) var = K_tt - v.dot(v) else: KI_inv = np.linalg.inv(K + 1e-3 * np.eye(inputs.shape[0])) mean = np.dot(K_t.transpose(), np.dot(KI_inv, outputs)) var = K_tt - np.dot(K_t.transpose(), np.dot(KI_inv, K_t)) assert var > 0, "Var < 0, please reduce K_tt noise" if maximize: UCB = mean + kappa * np.sqrt(var) else: UCB = mean - kappa * np.sqrt(var) return UCB #run CMA-ES to find maximum of the GP if maximize: xmean = inputs[np.argmax(outputs)] else: xmean = inputs[np.argmin(outputs)] bestX = CMAES(xmean, 0.1, 100, lambda x: kernelFunc(x, maximize, chol), None, not maximize) inputs = np.vstack((inputs, bestX)) outputs = np.hstack((outputs, func(bestX))) if maximize: bestx_t = inputs[np.argmax(outputs)] print("Current best x_t: {}, best val: {}".format( bestx_t, np.max(outputs))) else: bestx_t = inputs[np.argmin(outputs)] print("Current best x_t: {}, best val: {}".format( bestx_t, np.min(outputs))) t += 1 return inputs, outputs
self._f = None self._penalty = None self._violation = [] def f(solution): return np.dot(solution._x_repaired, solution._x_repaired) lbound = np.ones(N) * (-5.0) ubound = np.ones(N) * 5.0 A = np.array([[-1.0] + [0.0] * (N - 1)]) b = np.array([-1.0]) # --------------------------------------------------------------------- # if CASE == 1: print("Case 1: (1 linear ineq + bound)") cma = CMAES(xmean0=(lbound + ubound) / 2., sigma0=(ubound - lbound) / 4.) ch = ARCHLinear(fobjective=f, matA=A, vecb=b, weight=cma.w, bound=(lbound, ubound)) checker = Checker(cma) logger = Logger(cma, variable_list=['xmean', 'D', 'sigma']) issatisfied = False fbestsofar = np.inf while not issatisfied: xx = cma.sample_candidate() sol_list = [Solution(x) for x in xx] xcov = cma.transform(cma.transform(np.eye(N)).T)