def initialize_settings(sigma_init=0.1, sigma_decay=0.9999): global population, filebase, game, model, num_params, es, PRECISION, SOLUTION_PACKET_SIZE, RESULT_PACKET_SIZE population = num_worker * num_worker_trial filebase = 'log/' + gamename + '.' + optimizer + '.' + str( num_episode) + '.' + str(population) game = config.games[gamename] model = make_model(game) num_params = model.param_count print("size of model", num_params) if optimizer == 'ses': ses = PEPG(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_alpha=0.2, sigma_limit=0.02, elite_ratio=0.1, weight_decay=0.005, popsize=population) es = ses elif optimizer == 'ga': ga = SimpleGA(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_limit=0.02, elite_ratio=0.1, weight_decay=0.005, popsize=population) es = ga elif optimizer == 'cma': cma = CMAES(num_params, sigma_init=sigma_init, popsize=population) es = cma elif optimizer == 'pepg': pepg = PEPG(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_alpha=0.20, sigma_limit=0.02, learning_rate=0.01, learning_rate_decay=1.0, learning_rate_limit=0.01, weight_decay=0.005, popsize=population) es = pepg else: oes = OpenES(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_limit=0.02, learning_rate=0.01, learning_rate_decay=1.0, learning_rate_limit=0.01, antithetic=antithetic, weight_decay=0.005, popsize=population) es = oes PRECISION = 10000 SOLUTION_PACKET_SIZE = (5 + num_params) * num_worker_trial RESULT_PACKET_SIZE = 4 * num_worker_trial
def evolve(): evaler = Evaluator() solver = OpenES(evaler.n_weights, popsize=200) del evaler # for generation in tqdm(count(), unit="generation"): pool = Pool(4, init_worker) best_solution_so_far = None try: for generation in count(): # ask the ES to give us a set of candidate solutions solutions = solver.ask() # create an array to hold the solutions. # solver.popsize = population size # rewards = np.zeros(solver.popsize) # calculate the reward for each given solution # using your own evaluate() method # for i in range(solver.popsize): # rewards[i] = evaler.evaluate(solutions[i]) rewards = pool.map(worker, solutions, 10) # give rewards back to ES solver.tell(rewards) # get best parameter, reward from ES reward_vector = solver.result() generation_max_reward = max(rewards) generation_mean_reward = sum(rewards) / len(rewards) generation_min_reward = min(rewards) # print("gen: {},max:{},vector:{}".format(generation, generation_max_reward, reward_vector[1])) print("gen: {},max:{},mean:{},min:{}".format( generation, generation_max_reward, generation_mean_reward, generation_min_reward)) best_solution_so_far = reward_vector[0] if generation_max_reward > MY_REQUIRED_REWARD: return reward_vector[0] except KeyboardInterrupt: return best_solution_so_far
# defines genetic algorithm solver ga = SimpleGA( NPARAMS, # number of model parameters sigma_init=0.5, # initial standard deviation popsize=NPOPULATION, # population size elite_ratio=0.1, # percentage of the elites forget_best=args.forget_best, # forget the historical best elites weight_decay=0.00, # weight decay coefficient ) oes = OpenES( NPARAMS, # number of model parameters sigma_init=0.5, # initial standard deviation sigma_decay=0.999, # don't anneal standard deviation learning_rate=0.1, # learning rate for standard deviation learning_rate_decay=1.0, # annealing the learning rate popsize=NPOPULATION, # population size antithetic=False, # whether to use antithetic sampling weight_decay=0.00, # weight decay coefficient rank_fitness=False, # use rank rather than fitness numbers forget_best=False) # defines CMA-ES algorithm solver cmaes = CMAES(NPARAMS, popsize=NPOPULATION, weight_decay=0.0, sigma_init=0.5) print(mp.cpu_count()) pool = mp.Pool(mp.cpu_count()) fit_func = evluate_func # defines a function to use solver to solve fit_func
def initialize_settings(sigma_init=0.1, sigma_decay=0.9999, init_opt = ''): global population, filebase, controller_filebase, model, num_params, es, PRECISION, SOLUTION_PACKET_SIZE, RESULT_PACKET_SIZE population = num_worker * num_worker_trial filebase = './log/'+env_name+'.'+optimizer+'.'+str(num_episode)+'.'+str(population) controller_filebase = './controller/'+env_name+'.'+optimizer+'.'+str(num_episode)+'.'+str(population) model = make_model() num_params = model.param_count #print("size of model", num_params) if len(init_opt) > 0: es = pickle.load(open(init_opt, 'rb')) else: if optimizer == 'ses': ses = PEPG(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_alpha=0.2, sigma_limit=0.02, elite_ratio=0.1, weight_decay=0.005, popsize=population) es = ses elif optimizer == 'ga': ga = SimpleGA(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_limit=0.02, elite_ratio=0.1, weight_decay=0.005, popsize=population) es = ga elif optimizer == 'cma': cma = CMAES(num_params, sigma_init=sigma_init, popsize=population) es = cma elif optimizer == 'pepg': pepg = PEPG(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_alpha=0.20, sigma_limit=0.02, learning_rate=0.01, learning_rate_decay=1.0, learning_rate_limit=0.01, weight_decay=0.005, popsize=population) es = pepg else: oes = OpenES(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_limit=0.02, learning_rate=0.01, learning_rate_decay=1.0, learning_rate_limit=0.01, antithetic=antithetic, weight_decay=0.005, popsize=population) es = oes PRECISION = 10000 SOLUTION_PACKET_SIZE = (4+num_params)*num_worker_trial RESULT_PACKET_SIZE = 4*num_worker_trial
def train_controller_openes(): # env_name = "SonicTheHedgehog-Genesis" # None env_name = "SonicTheHedgehog2-Genesis" # env_name = "SonicAndKnuckles-Genesis" # env_name = "SonicTheHedgehog3-Genesis" # env_name = "SonicAndKnuckles3-Genesis" env = retro.make(env_name) # print(env.observation_space) # Box(224, 320, 3) # print(env.action_space) # MultiBinary(12) # print(env.action_space.sample()) # [1 1 1 0 1 0 1 0 0 1 1 1] # conv_vae_filename = "weights/conv_vae_SonicAndKnuckles.pkl" # 3, 4608 # lstm_mdn_filename = "weights/lstm_mdn_SonicAndKnuckles.pkl" # 4608 # controller_filename = "weights/controller_6656_12.pkl" conv_vae_filename = "weights/conv_vae_gray.pkl" # 1, 1024 lstm_mdn_filename = "weights/lstm_mdn_gray.pkl" # 1024 controller_filename = "weights/controller_rnn_1024_12.pkl" evaluator_filename = "weights/evaluator_openes_weights_20_0.499982.npz" population_size = 256 generations = 5000 # only forward pass conv_vae = ConvVAE((1, 128, 128), 1024) if os.path.exists(conv_vae_filename): print("loading conv vae weights") conv_vae.load_state_dict(torch.load(conv_vae_filename)) # only forward pass lstm_mdn = LSTM(vector_size=1024) if os.path.exists(lstm_mdn_filename): print("loading lstm mdn weights") lstm_mdn.load_state_dict(torch.load(lstm_mdn_filename)) controller = Controller_RNN(input_size=1024, batch_size=2) # 6656 if os.path.exists(controller_filename): print("loading controller weights") controller.load_state_dict(torch.load(controller_filename)) # evaluator restore if os.path.exists(evaluator_filename): print("loading evaluator data") data = np.load(evaluator_filename) weights = data["weights"] print("inserting weights into controller") controller.set_weights(weights) evaluator = OpenES(num_params=793612, popsize=population_size, existing_weights=weights) else: print("extracting controller weights") state_dict = controller.state_dict() # 6 rnn_weight_ih_l0 = state_dict[ "rnn.weight_ih_l0"] # [512, 1024] 524 288 rnn_weight_hh_l0 = state_dict[ "rnn.weight_hh_l0"] # [512, 512] 262 144 rnn_bias_ih_l0 = state_dict["rnn.bias_ih_l0"] # [512] 512 rnn_bias_hh_l0 = state_dict["rnn.bias_hh_l0"] # [512] 512 dence_weight = state_dict["dence.weight"] # [12, 512] 6144 dence_bias = state_dict["dence.bias"] # [12] 12 # 793 612 rnn_weight_ih_l0 = torch.flatten(rnn_weight_ih_l0) # [524288] rnn_weight_hh_l0 = torch.flatten(rnn_weight_hh_l0) # [262144] dence_weight = torch.flatten(dence_weight) # [6144] flattened_controller_weights = torch.cat( (rnn_weight_ih_l0, rnn_weight_hh_l0, rnn_bias_ih_l0, rnn_bias_hh_l0, dence_weight, dence_bias), dim=0) # [793612] flattened_controller_weights = flattened_controller_weights.data.numpy( ) evaluator = OpenES(num_params=793612, popsize=population_size, existing_weights=flattened_controller_weights) for generation in range(generations): solutions = evaluator.ask() # (64, 793612) fitness = np.zeros(population_size) # 64 for i in range(population_size): fitness[i] = evaluate(weights=solutions[i], conv_vae=conv_vae, lstm_mdn=lstm_mdn, controller=controller, env=env) evaluator.tell(fitness) result = evaluator.result( ) # first element is the best solution, second element is the best fitness best_fitness = result[1] best_weights = result[0] print(generation, best_fitness) ############################## ## save evaluator data weights print("save evaluator data weights") evaluator_weights_filename = "weights/evaluator_openes_weights_%s_%s.npz" % ( generation, result[1]) np.savez(evaluator_weights_filename, weights=best_weights)
def initialize_settings(sigma_init=0.1, sigma_decay=0.9999): global population, filebase, game, controller, num_params, es, PRECISION, SOLUTION_PACKET_SIZE, RESULT_PACKET_SIZE population = num_worker * num_worker_trial filedir = 'results/{}/{}/log/'.format(exp_name, env_name) if not os.path.exists(filedir): os.makedirs(filedir) filebase = filedir + env_name + '.' + optimizer + '.' + str( num_episode) + '.' + str(population) controller = make_controller(args=config_args) num_params = controller.param_count print("size of model", num_params) if optimizer == 'ses': ses = PEPG(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_alpha=0.2, sigma_limit=0.02, elite_ratio=0.1, weight_decay=0.005, popsize=population) es = ses elif optimizer == 'ga': ga = SimpleGA(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_limit=0.02, elite_ratio=0.1, weight_decay=0.005, popsize=population) es = ga elif optimizer == 'cma': cma = CMAES(num_params, sigma_init=sigma_init, popsize=population) es = cma elif optimizer == 'pepg': pepg = PEPG(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_alpha=0.20, sigma_limit=0.02, learning_rate=0.01, learning_rate_decay=1.0, learning_rate_limit=0.01, weight_decay=0.005, popsize=population) es = pepg else: oes = OpenES(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_limit=0.02, learning_rate=0.01, learning_rate_decay=1.0, learning_rate_limit=0.01, antithetic=antithetic, weight_decay=0.005, popsize=population) es = oes PRECISION = 10000 SOLUTION_PACKET_SIZE = (5 + num_params) * num_worker_trial RESULT_PACKET_SIZE = 4 * num_worker_trial
def initialize_settings(sigma_init=0.1, sigma_decay=0.9999): global population, filebase, game, model, num_params, es, PRECISION, SOLUTION_PACKET_SIZE, RESULT_PACKET_SIZE, model_name, novelty_search, unique_id, novelty_mode, BC_SIZE, ns_mode population = num_worker * num_worker_trial os.makedirs(os.path.join(ROOT, 'log'), exist_ok=True) filebase = os.path.join(ROOT, 'log', gamename+'.'+optimizer+'.'+ model_name + '.' + str(num_episode)+'.'+str(population)) + '.' + unique_id if novelty_search: filebase = filebase + '.novelty' if novelty_mode == 'h': BC_SIZE = H_SIZE elif novelty_mode == 'z': BC_SIZE = Z_SIZE elif novelty_mode =='h_concat': BC_SIZE = BC_SEQ_LENGTH * H_SIZE #NOVELTY_THRESHOLD = 180 elif novelty_mode == 'z_concat': BC_SIZE = BC_SEQ_LENGTH * Z_SIZE elif novelty_mode == 'a_concat': BC_SIZE = BC_SEQ_LENGTH * A_SIZE else: BC_SIZE = 9 # dummy bc size not used because the reward if the distance travelled. if novelty_mode: filebase = filebase + '.' + novelty_mode if ns_mode: filebase = filebase + '.' + ns_mode model = make_model(model_name, load_model=True) num_params = model.param_count print("size of model", num_params) PRECISION = 10000 SOLUTION_PACKET_SIZE = (5 + num_params) * num_worker_trial RESULT_PACKET_SIZE = (4 + BC_SIZE) * num_worker_trial if optimizer == 'ses': ses = PEPG(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_alpha=0.2, sigma_limit=0.02, elite_ratio=0.1, weight_decay=0.005, popsize=population) es = ses elif optimizer == 'ga': ga = SimpleGA(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_limit=0.02, elite_ratio=0.1, weight_decay=0.005, popsize=population) es = ga elif optimizer == 'cma': cma = CMAES(num_params, sigma_init=sigma_init, popsize=population) es = cma elif optimizer == 'pepg': pepg = PEPG(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_alpha=0.20, sigma_limit=0.02, learning_rate=0.01, learning_rate_decay=1.0, learning_rate_limit=0.01, weight_decay=0.005, popsize=population) es = pepg else: oes = OpenES(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_limit=0.02, learning_rate=0.01, learning_rate_decay=1.0, learning_rate_limit=0.01, antithetic=antithetic, weight_decay=0.005, popsize=population) es = oes
def testRun(): config() x = np.random.randn(NPARAMS) print("The fitness of initial guess", fit_func(x)) pepg = PEPG( NPARAMS, # number of model parameters sigma_init=0.5, # initial standard deviation learning_rate=0.1, # learning rate for standard deviation learning_rate_decay=1.0, # don't anneal the learning rate popsize=NPOPULATION, # population size average_baseline=False, # set baseline to average of batch weight_decay=0.00, # weight decay coefficient rank_fitness=False, # use rank rather than fitness numbers forget_best=False) # don't keep the historical best solution) pepg_history = test_solver(pepg) # pepgV = PEPGVariant( NPARAMS, # number of model parameters sigma_init=0.5, # initial standard deviation learning_rate=0.1, # learning rate for standard deviation learning_rate_decay=1.0, # don't anneal the learning rate popsize=NPOPULATION, # population size average_baseline=False, # set baseline to average of batch weight_decay=0.00, # weight decay coefficient rank_fitness=False, # use rank rather than fitness numbers forget_best=False, # don't keep the historical best solution diversity_best=0.1) # use the diversity issue for just testing print("-----test PEPG vairant-----") pepgv_history = test_solver(pepgV) # print("---test PEPG variant with different diversity-----") pepgV2 = PEPGVariant( NPARAMS, # number of model parameters sigma_init=0.5, # initial standard deviation learning_rate=0.1, # learning rate for standard deviation learning_rate_decay=1.0, # don't anneal the learning rate popsize=NPOPULATION, # population size average_baseline=False, # set baseline to average of batch weight_decay=0.00, # weight decay coefficient rank_fitness=False, # use rank rather than fitness numbers forget_best=False, # don't keep the historical best solution diversity_best=1) # use the diversity issue for just testing # done pepgV2_history = test_solver(pepgV2) oes = OpenES( NPARAMS, # number of model parameters sigma_init=0.5, # initial standard deviation sigma_decay=0.999, # don't anneal standard deviation learning_rate=0.1, # learning rate for standard deviation learning_rate_decay=1.0, # annealing the learning rate popsize=NPOPULATION, # population size antithetic=False, # whether to use antithetic sampling weight_decay=0.00, # weight decay coefficient rank_fitness=False, # use rank rather than fitness numbers forget_best=False) print("-----test oes--------------") oes_history = test_solver(oes) cmaes = CMAES(NPARAMS, popsize=NPOPULATION, weight_decay=0.0, sigma_init=0.5) cma_history = test_solver(cmaes) best_history = [0] * MAX_ITERATION plt.figure(figsize=(16, 8), dpi=150) optimum_line, = plt.plot(best_history, color="black", linewidth=0.5, linestyle="-.", label='Global Optimum') pepgv_line, = plt.plot(pepgv_history, color="red", linewidth=1.0, linestyle="-", label='PEPGV / NES') pepg_line, = plt.plot(pepg_history, color="blue", linewidth=1.0, linestyle="-.", label='PEPG / NES') oes_line, = plt.plot(oes_history, color="orange", linewidth=1.0, linestyle="-", label='OpenAI-ES') cma_line, = plt.plot(cma_history, color="green", linewidth=1.0, linestyle="-", label='CMA-ES') plt.legend(handles=[optimum_line, pepgv_line, pepg_line, oes_line], loc='best') plt.xlim(0, 100) plt.xlabel('generation') plt.ylabel('loss') plt.savefig("./results/rose_" + str(NPARAMS) + "d.svg")
def initialize_settings(sigma_init=0.1, sigma_decay=0.9999, weight_decay=0.005): global population, filebase, game, model, num_params, es, PRECISION, SOLUTION_PACKET_SIZE, RESULT_PACKET_SIZE population = num_worker * num_worker_trial filebase = 'log/' + gamename + '.' + optimizer + '.' + str( num_episode) + '.' + str(population) game = config.games[gamename] model = make_model(game) num_params = model.param_count print("size of model", num_params) if optimizer == 'ses': ses = PEPG(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_alpha=0.2, sigma_limit=0.02, elite_ratio=0.1, weight_decay=weight_decay, popsize=population) es = ses elif optimizer == 'ga': ga = SimpleGA(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_limit=0.02, elite_ratio=0.1, weight_decay=weight_decay, popsize=population) es = ga elif optimizer == 'cma': cma = CMAES(num_params, sigma_init=sigma_init, popsize=population, weight_decay=weight_decay) es = cma elif optimizer == 'pepg': pepg = PEPG(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_alpha=0.20, sigma_limit=0.02, learning_rate=0.01, learning_rate_decay=1.0, learning_rate_limit=0.01, weight_decay=weight_decay, popsize=population) es = pepg elif optimizer == 'oes': oes = OpenES(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_limit=0.02, learning_rate=0.01, learning_rate_decay=1.0, learning_rate_limit=0.01, antithetic=antithetic, weight_decay=weight_decay, popsize=population) es = oes # elif optimizer == 'pso': # pso = PSO(num_params, # sigma_init=sigma_init, # weight_decay=weight_decay, # popsize=population) # es = pso elif optimizer == 'global_pso': pso = Pyswarms(num_params, sigma_init=sigma_init, weight_decay=weight_decay, popsize=population, communication_topology='global') es = pso elif optimizer == 'local_pso': pso = Pyswarms(num_params, sigma_init=sigma_init, weight_decay=weight_decay, popsize=population, communication_topology='local') es = pso elif optimizer == 'random_pso': pso = Pyswarms(num_params, sigma_init=sigma_init, weight_decay=weight_decay, popsize=population, communication_topology='random') es = pso else: if optimizer in list(sorted(ng.optimizers.registry.keys())): ng_optimizer = Nevergrad(optimizer, num_params, sigma_init=sigma_init, popsize=population, weight_decay=weight_decay) es = ng_optimizer else: raise ValueError('Could not find optimizer!') PRECISION = 10000 SOLUTION_PACKET_SIZE = (5 + num_params) * num_worker_trial RESULT_PACKET_SIZE = 4 * num_worker_trial