def initialize_settings(sigma_init=0.1, sigma_decay=0.9999): global population, filebase, game, model, num_params, es, PRECISION, SOLUTION_PACKET_SIZE, RESULT_PACKET_SIZE population = num_worker * num_worker_trial filebase = 'log/' + gamename + '.' + optimizer + '.' + str( num_episode) + '.' + str(population) game = config.games[gamename] model = make_model(game) num_params = model.param_count print("size of model", num_params) if optimizer == 'ses': ses = PEPG(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_alpha=0.2, sigma_limit=0.02, elite_ratio=0.1, weight_decay=0.005, popsize=population) es = ses elif optimizer == 'ga': ga = SimpleGA(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_limit=0.02, elite_ratio=0.1, weight_decay=0.005, popsize=population) es = ga elif optimizer == 'cma': cma = CMAES(num_params, sigma_init=sigma_init, popsize=population) es = cma elif optimizer == 'pepg': pepg = PEPG(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_alpha=0.20, sigma_limit=0.02, learning_rate=0.01, learning_rate_decay=1.0, learning_rate_limit=0.01, weight_decay=0.005, popsize=population) es = pepg else: oes = OpenES(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_limit=0.02, learning_rate=0.01, learning_rate_decay=1.0, learning_rate_limit=0.01, antithetic=antithetic, weight_decay=0.005, popsize=population) es = oes PRECISION = 10000 SOLUTION_PACKET_SIZE = (5 + num_params) * num_worker_trial RESULT_PACKET_SIZE = 4 * num_worker_trial
def debugRun(): config() x = np.random.randn(NPARAMS) print("The fitness of initial guess", fit_func(x)) # oes = OpenES(NPARAMS, # number of model parameters # sigma_init=0.5, # initial standard deviation # sigma_decay=0.999, # don't anneal standard deviation # learning_rate=0.1, # learning rate for standard deviation # learning_rate_decay = 1.0, # annealing the learning rate # popsize=NPOPULATION, # population size # antithetic=False, # whether to use antithetic sampling # weight_decay=0.00, # weight decay coefficient # rank_fitness=False, # use rank rather than fitness numbers # forget_best=False) # print("-----test oes--------------") pepg = PEPG( NPARAMS, # number of model parameters sigma_init=0.5, # initial standard deviation learning_rate=0.1, # learning rate for standard deviation learning_rate_decay=1.0, # don't anneal the learning rate popsize=NPOPULATION, # population size average_baseline=False, # set baseline to average of batch weight_decay=0.00, # weight decay coefficient rank_fitness=False, # use rank rather than fitness numbers forget_best=False) # don't keep the historical best solution) # pepg_history = test_solver(pepg) # history = debug_solver(pepg) history = np.array(history) print(history.shape) # done pickle_out = open("pepg_rose.pickle", "wb") pickle.dump(history, pickle_out) pickle_out.close()
ga_history = test_solver(ga) cmaes = CMAES(NPARAMS, popsize=NPOPULATION, weight_decay=0.0, sigma_init = 0.5 ) cma_history = test_solver(cmaes) pepg = PEPG(NPARAMS, # number of model parameters sigma_init=0.5, # initial standard deviation learning_rate=0.1, # learning rate for standard deviation learning_rate_decay=1.0, # don't anneal the learning rate popsize=NPOPULATION, # population size average_baseline=False, # set baseline to average of batch weight_decay=0.00, # weight decay coefficient rank_fitness=False, # use rank rather than fitness numbers forget_best=False) pepg_history = test_solver(pepg) oes = OpenES(NPARAMS, # number of model parameters sigma_init=0.5, # initial standard deviation sigma_decay=0.999, # don't anneal standard deviation learning_rate=0.1, # learning rate for standard deviation learning_rate_decay = 1.0, # annealing the learning rate popsize=NPOPULATION, # population size antithetic=False, # whether to use antithetic sampling weight_decay=0.00, # weight decay coefficient
def initialize_settings(sigma_init=0.1, sigma_decay=0.9999, init_opt = ''): global population, filebase, controller_filebase, model, num_params, es, PRECISION, SOLUTION_PACKET_SIZE, RESULT_PACKET_SIZE population = num_worker * num_worker_trial filebase = './log/'+env_name+'.'+optimizer+'.'+str(num_episode)+'.'+str(population) controller_filebase = './controller/'+env_name+'.'+optimizer+'.'+str(num_episode)+'.'+str(population) model = make_model() num_params = model.param_count #print("size of model", num_params) if len(init_opt) > 0: es = pickle.load(open(init_opt, 'rb')) else: if optimizer == 'ses': ses = PEPG(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_alpha=0.2, sigma_limit=0.02, elite_ratio=0.1, weight_decay=0.005, popsize=population) es = ses elif optimizer == 'ga': ga = SimpleGA(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_limit=0.02, elite_ratio=0.1, weight_decay=0.005, popsize=population) es = ga elif optimizer == 'cma': cma = CMAES(num_params, sigma_init=sigma_init, popsize=population) es = cma elif optimizer == 'pepg': pepg = PEPG(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_alpha=0.20, sigma_limit=0.02, learning_rate=0.01, learning_rate_decay=1.0, learning_rate_limit=0.01, weight_decay=0.005, popsize=population) es = pepg else: oes = OpenES(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_limit=0.02, learning_rate=0.01, learning_rate_decay=1.0, learning_rate_limit=0.01, antithetic=antithetic, weight_decay=0.005, popsize=population) es = oes PRECISION = 10000 SOLUTION_PACKET_SIZE = (4+num_params)*num_worker_trial RESULT_PACKET_SIZE = 4*num_worker_trial
def train_controller_pepg(): # env_name = "SonicTheHedgehog-Genesis" # None env_name = "SonicTheHedgehog2-Genesis" # env_name = "SonicAndKnuckles-Genesis" # env_name = "SonicTheHedgehog3-Genesis" # env_name = "SonicAndKnuckles3-Genesis" env = retro.make(env_name) # print(env.observation_space) # Box(224, 320, 3) # print(env.action_space) # MultiBinary(12) # print(env.action_space.sample()) # [1 1 1 0 1 0 1 0 0 1 1 1] conv_vae_filename = "weights/conv_vae_SonicAndKnuckles.pkl" # 3, 4608 lstm_mdn_filename = "weights/lstm_mdn_SonicAndKnuckles.pkl" # 4608 controller_filename = "weights/controller_6656_12.pkl" # conv_vae_filename = "weights/conv_vae_gray_edges.pkl" # 1, 1024 # lstm_mdn_filename = "weights/lstm_mdn_gray_edges.pkl" # 1024 # controller_filename = "weights/controller_rnn_1024_12.pkl" # only forward pass conv_vae = ConvVAE((3, 128, 128), 4608) if os.path.exists(conv_vae_filename): print("loading conv vae weights") conv_vae.load_state_dict(torch.load(conv_vae_filename)) # only forward pass lstm_mdn = LSTM(vector_size=4608) if os.path.exists(lstm_mdn_filename): print("loading lstm mdn weights") lstm_mdn.load_state_dict(torch.load(lstm_mdn_filename)) controller = Controller(input_size=6656, action_size=12) # 6656 if os.path.exists(controller_filename): print("loading controller weights") controller.load_state_dict(torch.load(controller_filename)) # solver = CMAES(num_params=79884, sigma_init=4, popsize=100) solver = PEPG(num_params=79884, sigma_init=4, elite_ratio=0.25, popsize=100, forget_best=False) solver_sigma_mu_weights_filename = "weights/solver_sigma_mu_weights_34_0.30942985.npz" print("load sigma mu to solver") data = np.load(solver_sigma_mu_weights_filename) solver.mu = data["mu"] solver.sigma = data["sigma"] ## save sigma mu # pepg_mu = solver.mu # pepg_sigma = solver.sigma # np.savez(solver_sigma_mu_filename, mu=pepg_mu, sigma=pepg_sigma) # params = list(controller.parameters()) # weight = params[0] # [12, 6656] 79 872 # bias = params[1] # [12] # # summ: 79 884 # weight = weight.view(-1) # 79872 # # weights = torch.cat((weight, bias), dim=0) # [79884] generations = 40000 for generation in range(generations): solutions = solver.ask() # (40, 79884) fitness_list = np.zeros(solver.popsize) # (40,) for i in range(solver.popsize): fitness_list[i] = evaluate(solutions[i], conv_vae, lstm_mdn, controller, env, n_steps=512) print(i, fitness_list[i]) solver.tell(fitness_list) result = solver.result() # first element is the best solution, second element is the best fitness # print(result[0]) # (79884,) # print(result[1]) # -10732.263849138297 print(generation, result[1]) ############################### ## save solver sigma mu weights print("save pepg data") solver_sigma_mu_filename = "weights/solver_sigma_mu_weights_%s_%s.npz" % ( generation, result[1]) pepg_mu = solver.mu pepg_sigma = solver.sigma weights = result[0] np.savez(solver_sigma_mu_filename, mu=pepg_mu, sigma=pepg_sigma, weights=weights) # save controller weights print("save controller weights") torch.save(controller.state_dict(), controller_filename)
def initialize_settings(sigma_init=0.1, sigma_decay=0.9999): global population, filebase, game, controller, num_params, es, PRECISION, SOLUTION_PACKET_SIZE, RESULT_PACKET_SIZE population = num_worker * num_worker_trial filedir = 'results/{}/{}/log/'.format(exp_name, env_name) if not os.path.exists(filedir): os.makedirs(filedir) filebase = filedir + env_name + '.' + optimizer + '.' + str( num_episode) + '.' + str(population) controller = make_controller(args=config_args) num_params = controller.param_count print("size of model", num_params) if optimizer == 'ses': ses = PEPG(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_alpha=0.2, sigma_limit=0.02, elite_ratio=0.1, weight_decay=0.005, popsize=population) es = ses elif optimizer == 'ga': ga = SimpleGA(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_limit=0.02, elite_ratio=0.1, weight_decay=0.005, popsize=population) es = ga elif optimizer == 'cma': cma = CMAES(num_params, sigma_init=sigma_init, popsize=population) es = cma elif optimizer == 'pepg': pepg = PEPG(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_alpha=0.20, sigma_limit=0.02, learning_rate=0.01, learning_rate_decay=1.0, learning_rate_limit=0.01, weight_decay=0.005, popsize=population) es = pepg else: oes = OpenES(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_limit=0.02, learning_rate=0.01, learning_rate_decay=1.0, learning_rate_limit=0.01, antithetic=antithetic, weight_decay=0.005, popsize=population) es = oes PRECISION = 10000 SOLUTION_PACKET_SIZE = (5 + num_params) * num_worker_trial RESULT_PACKET_SIZE = 4 * num_worker_trial
def initialize_settings(sigma_init=0.1, sigma_decay=0.9999): global population, filebase, game, model, num_params, es, PRECISION, SOLUTION_PACKET_SIZE, RESULT_PACKET_SIZE, model_name, novelty_search, unique_id, novelty_mode, BC_SIZE, ns_mode population = num_worker * num_worker_trial os.makedirs(os.path.join(ROOT, 'log'), exist_ok=True) filebase = os.path.join(ROOT, 'log', gamename+'.'+optimizer+'.'+ model_name + '.' + str(num_episode)+'.'+str(population)) + '.' + unique_id if novelty_search: filebase = filebase + '.novelty' if novelty_mode == 'h': BC_SIZE = H_SIZE elif novelty_mode == 'z': BC_SIZE = Z_SIZE elif novelty_mode =='h_concat': BC_SIZE = BC_SEQ_LENGTH * H_SIZE #NOVELTY_THRESHOLD = 180 elif novelty_mode == 'z_concat': BC_SIZE = BC_SEQ_LENGTH * Z_SIZE elif novelty_mode == 'a_concat': BC_SIZE = BC_SEQ_LENGTH * A_SIZE else: BC_SIZE = 9 # dummy bc size not used because the reward if the distance travelled. if novelty_mode: filebase = filebase + '.' + novelty_mode if ns_mode: filebase = filebase + '.' + ns_mode model = make_model(model_name, load_model=True) num_params = model.param_count print("size of model", num_params) PRECISION = 10000 SOLUTION_PACKET_SIZE = (5 + num_params) * num_worker_trial RESULT_PACKET_SIZE = (4 + BC_SIZE) * num_worker_trial if optimizer == 'ses': ses = PEPG(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_alpha=0.2, sigma_limit=0.02, elite_ratio=0.1, weight_decay=0.005, popsize=population) es = ses elif optimizer == 'ga': ga = SimpleGA(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_limit=0.02, elite_ratio=0.1, weight_decay=0.005, popsize=population) es = ga elif optimizer == 'cma': cma = CMAES(num_params, sigma_init=sigma_init, popsize=population) es = cma elif optimizer == 'pepg': pepg = PEPG(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_alpha=0.20, sigma_limit=0.02, learning_rate=0.01, learning_rate_decay=1.0, learning_rate_limit=0.01, weight_decay=0.005, popsize=population) es = pepg else: oes = OpenES(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_limit=0.02, learning_rate=0.01, learning_rate_decay=1.0, learning_rate_limit=0.01, antithetic=antithetic, weight_decay=0.005, popsize=population) es = oes
def testRun(): config() x = np.random.randn(NPARAMS) print("The fitness of initial guess", fit_func(x)) pepg = PEPG( NPARAMS, # number of model parameters sigma_init=0.5, # initial standard deviation learning_rate=0.1, # learning rate for standard deviation learning_rate_decay=1.0, # don't anneal the learning rate popsize=NPOPULATION, # population size average_baseline=False, # set baseline to average of batch weight_decay=0.00, # weight decay coefficient rank_fitness=False, # use rank rather than fitness numbers forget_best=False) # don't keep the historical best solution) pepg_history = test_solver(pepg) # pepgV = PEPGVariant( NPARAMS, # number of model parameters sigma_init=0.5, # initial standard deviation learning_rate=0.1, # learning rate for standard deviation learning_rate_decay=1.0, # don't anneal the learning rate popsize=NPOPULATION, # population size average_baseline=False, # set baseline to average of batch weight_decay=0.00, # weight decay coefficient rank_fitness=False, # use rank rather than fitness numbers forget_best=False, # don't keep the historical best solution diversity_best=0.1) # use the diversity issue for just testing print("-----test PEPG vairant-----") pepgv_history = test_solver(pepgV) # print("---test PEPG variant with different diversity-----") pepgV2 = PEPGVariant( NPARAMS, # number of model parameters sigma_init=0.5, # initial standard deviation learning_rate=0.1, # learning rate for standard deviation learning_rate_decay=1.0, # don't anneal the learning rate popsize=NPOPULATION, # population size average_baseline=False, # set baseline to average of batch weight_decay=0.00, # weight decay coefficient rank_fitness=False, # use rank rather than fitness numbers forget_best=False, # don't keep the historical best solution diversity_best=1) # use the diversity issue for just testing # done pepgV2_history = test_solver(pepgV2) oes = OpenES( NPARAMS, # number of model parameters sigma_init=0.5, # initial standard deviation sigma_decay=0.999, # don't anneal standard deviation learning_rate=0.1, # learning rate for standard deviation learning_rate_decay=1.0, # annealing the learning rate popsize=NPOPULATION, # population size antithetic=False, # whether to use antithetic sampling weight_decay=0.00, # weight decay coefficient rank_fitness=False, # use rank rather than fitness numbers forget_best=False) print("-----test oes--------------") oes_history = test_solver(oes) cmaes = CMAES(NPARAMS, popsize=NPOPULATION, weight_decay=0.0, sigma_init=0.5) cma_history = test_solver(cmaes) best_history = [0] * MAX_ITERATION plt.figure(figsize=(16, 8), dpi=150) optimum_line, = plt.plot(best_history, color="black", linewidth=0.5, linestyle="-.", label='Global Optimum') pepgv_line, = plt.plot(pepgv_history, color="red", linewidth=1.0, linestyle="-", label='PEPGV / NES') pepg_line, = plt.plot(pepg_history, color="blue", linewidth=1.0, linestyle="-.", label='PEPG / NES') oes_line, = plt.plot(oes_history, color="orange", linewidth=1.0, linestyle="-", label='OpenAI-ES') cma_line, = plt.plot(cma_history, color="green", linewidth=1.0, linestyle="-", label='CMA-ES') plt.legend(handles=[optimum_line, pepgv_line, pepg_line, oes_line], loc='best') plt.xlim(0, 100) plt.xlabel('generation') plt.ylabel('loss') plt.savefig("./results/rose_" + str(NPARAMS) + "d.svg")
def initialize_settings(sigma_init=0.1, sigma_decay=0.9999, weight_decay=0.005): global population, filebase, game, model, num_params, es, PRECISION, SOLUTION_PACKET_SIZE, RESULT_PACKET_SIZE population = num_worker * num_worker_trial filebase = 'log/' + gamename + '.' + optimizer + '.' + str( num_episode) + '.' + str(population) game = config.games[gamename] model = make_model(game) num_params = model.param_count print("size of model", num_params) if optimizer == 'ses': ses = PEPG(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_alpha=0.2, sigma_limit=0.02, elite_ratio=0.1, weight_decay=weight_decay, popsize=population) es = ses elif optimizer == 'ga': ga = SimpleGA(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_limit=0.02, elite_ratio=0.1, weight_decay=weight_decay, popsize=population) es = ga elif optimizer == 'cma': cma = CMAES(num_params, sigma_init=sigma_init, popsize=population, weight_decay=weight_decay) es = cma elif optimizer == 'pepg': pepg = PEPG(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_alpha=0.20, sigma_limit=0.02, learning_rate=0.01, learning_rate_decay=1.0, learning_rate_limit=0.01, weight_decay=weight_decay, popsize=population) es = pepg elif optimizer == 'oes': oes = OpenES(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_limit=0.02, learning_rate=0.01, learning_rate_decay=1.0, learning_rate_limit=0.01, antithetic=antithetic, weight_decay=weight_decay, popsize=population) es = oes # elif optimizer == 'pso': # pso = PSO(num_params, # sigma_init=sigma_init, # weight_decay=weight_decay, # popsize=population) # es = pso elif optimizer == 'global_pso': pso = Pyswarms(num_params, sigma_init=sigma_init, weight_decay=weight_decay, popsize=population, communication_topology='global') es = pso elif optimizer == 'local_pso': pso = Pyswarms(num_params, sigma_init=sigma_init, weight_decay=weight_decay, popsize=population, communication_topology='local') es = pso elif optimizer == 'random_pso': pso = Pyswarms(num_params, sigma_init=sigma_init, weight_decay=weight_decay, popsize=population, communication_topology='random') es = pso else: if optimizer in list(sorted(ng.optimizers.registry.keys())): ng_optimizer = Nevergrad(optimizer, num_params, sigma_init=sigma_init, popsize=population, weight_decay=weight_decay) es = ng_optimizer else: raise ValueError('Could not find optimizer!') PRECISION = 10000 SOLUTION_PACKET_SIZE = (5 + num_params) * num_worker_trial RESULT_PACKET_SIZE = 4 * num_worker_trial