def evolve(): evaler = Evaluator() solver = OpenES(evaler.n_weights, popsize=200) del evaler # for generation in tqdm(count(), unit="generation"): pool = Pool(4, init_worker) best_solution_so_far = None try: for generation in count(): # ask the ES to give us a set of candidate solutions solutions = solver.ask() # create an array to hold the solutions. # solver.popsize = population size # rewards = np.zeros(solver.popsize) # calculate the reward for each given solution # using your own evaluate() method # for i in range(solver.popsize): # rewards[i] = evaler.evaluate(solutions[i]) rewards = pool.map(worker, solutions, 10) # give rewards back to ES solver.tell(rewards) # get best parameter, reward from ES reward_vector = solver.result() generation_max_reward = max(rewards) generation_mean_reward = sum(rewards) / len(rewards) generation_min_reward = min(rewards) # print("gen: {},max:{},vector:{}".format(generation, generation_max_reward, reward_vector[1])) print("gen: {},max:{},mean:{},min:{}".format( generation, generation_max_reward, generation_mean_reward, generation_min_reward)) best_solution_so_far = reward_vector[0] if generation_max_reward > MY_REQUIRED_REWARD: return reward_vector[0] except KeyboardInterrupt: return best_solution_so_far
def train_controller_openes(): # env_name = "SonicTheHedgehog-Genesis" # None env_name = "SonicTheHedgehog2-Genesis" # env_name = "SonicAndKnuckles-Genesis" # env_name = "SonicTheHedgehog3-Genesis" # env_name = "SonicAndKnuckles3-Genesis" env = retro.make(env_name) # print(env.observation_space) # Box(224, 320, 3) # print(env.action_space) # MultiBinary(12) # print(env.action_space.sample()) # [1 1 1 0 1 0 1 0 0 1 1 1] # conv_vae_filename = "weights/conv_vae_SonicAndKnuckles.pkl" # 3, 4608 # lstm_mdn_filename = "weights/lstm_mdn_SonicAndKnuckles.pkl" # 4608 # controller_filename = "weights/controller_6656_12.pkl" conv_vae_filename = "weights/conv_vae_gray.pkl" # 1, 1024 lstm_mdn_filename = "weights/lstm_mdn_gray.pkl" # 1024 controller_filename = "weights/controller_rnn_1024_12.pkl" evaluator_filename = "weights/evaluator_openes_weights_20_0.499982.npz" population_size = 256 generations = 5000 # only forward pass conv_vae = ConvVAE((1, 128, 128), 1024) if os.path.exists(conv_vae_filename): print("loading conv vae weights") conv_vae.load_state_dict(torch.load(conv_vae_filename)) # only forward pass lstm_mdn = LSTM(vector_size=1024) if os.path.exists(lstm_mdn_filename): print("loading lstm mdn weights") lstm_mdn.load_state_dict(torch.load(lstm_mdn_filename)) controller = Controller_RNN(input_size=1024, batch_size=2) # 6656 if os.path.exists(controller_filename): print("loading controller weights") controller.load_state_dict(torch.load(controller_filename)) # evaluator restore if os.path.exists(evaluator_filename): print("loading evaluator data") data = np.load(evaluator_filename) weights = data["weights"] print("inserting weights into controller") controller.set_weights(weights) evaluator = OpenES(num_params=793612, popsize=population_size, existing_weights=weights) else: print("extracting controller weights") state_dict = controller.state_dict() # 6 rnn_weight_ih_l0 = state_dict[ "rnn.weight_ih_l0"] # [512, 1024] 524 288 rnn_weight_hh_l0 = state_dict[ "rnn.weight_hh_l0"] # [512, 512] 262 144 rnn_bias_ih_l0 = state_dict["rnn.bias_ih_l0"] # [512] 512 rnn_bias_hh_l0 = state_dict["rnn.bias_hh_l0"] # [512] 512 dence_weight = state_dict["dence.weight"] # [12, 512] 6144 dence_bias = state_dict["dence.bias"] # [12] 12 # 793 612 rnn_weight_ih_l0 = torch.flatten(rnn_weight_ih_l0) # [524288] rnn_weight_hh_l0 = torch.flatten(rnn_weight_hh_l0) # [262144] dence_weight = torch.flatten(dence_weight) # [6144] flattened_controller_weights = torch.cat( (rnn_weight_ih_l0, rnn_weight_hh_l0, rnn_bias_ih_l0, rnn_bias_hh_l0, dence_weight, dence_bias), dim=0) # [793612] flattened_controller_weights = flattened_controller_weights.data.numpy( ) evaluator = OpenES(num_params=793612, popsize=population_size, existing_weights=flattened_controller_weights) for generation in range(generations): solutions = evaluator.ask() # (64, 793612) fitness = np.zeros(population_size) # 64 for i in range(population_size): fitness[i] = evaluate(weights=solutions[i], conv_vae=conv_vae, lstm_mdn=lstm_mdn, controller=controller, env=env) evaluator.tell(fitness) result = evaluator.result( ) # first element is the best solution, second element is the best fitness best_fitness = result[1] best_weights = result[0] print(generation, best_fitness) ############################## ## save evaluator data weights print("save evaluator data weights") evaluator_weights_filename = "weights/evaluator_openes_weights_%s_%s.npz" % ( generation, result[1]) np.savez(evaluator_weights_filename, weights=best_weights)