def get(self, rollout_names, img_resize=(64, 64), save_path=os.path.dirname(os.path.abspath(__file__))): env = gym.make(self.env_name) for i in rollout_names: #different random policy for every rollout policy = World_Model("random vae", "random mdn rnn", 3, self.device, random=True) runner = Env_Runner(self.device) # let the agent start at random track tile to enrich vae and mdnrnn obs, actions, rewards = runner.run(env, policy, img_resize=(64, 64), random_start=True) data = { "obs": np.array(obs), "actions": np.array(actions), "rewards": np.array(rewards) } file = open(save_path + "\\" + self.env_name + f'_dataset\\{i}', "wb") pickle.dump(data, file) file.close() env.close()
def worker(solutions, env): fitness_solutions = [] if not isinstance(solutions, list): solutions = [solutions] for weights in solutions: wm = World_Model(dirname + "\\vae.pt", dirname + "\\mdn_rnn.pt", actions, device) w = weights[0:actions * (hidden_size + latent_size)] b = weights[actions * (hidden_size + latent_size)::] w = nn.Parameter( torch.tensor(np.reshape( w, (actions, hidden_size + latent_size))).type('torch.FloatTensor').to(device)) b = nn.Parameter(torch.tensor(b).type('torch.FloatTensor').to(device)) wm.set_controller(w, b) fitness = [] for i in range(num_rollouts): runner = Env_Runner(device) wm.reset_rnn() _, _, rewards = runner.run(env, wm, img_resize=(64, 64)) # append negative return, because ES will try to minimize it fitness.append(-np.sum(np.array(rewards))) env.close() fitness_solutions.append(np.mean(np.array(fitness))) return fitness_solutions
#RL and MLP Parameters rl_reward = [0.0, -1.0, 1.0] #rewards for : Draw, Win, Loose rl_beta = 2 #bot_RL_MLP so ca. 1 - 5 mlp_hidden = 10 #number of hidden neurons mlp_learning_rate = 0.1 #learning-rate of the MLP #Misc Parameters runs = 10000000 #the number of runs log_interval = 1000 #print status every x runs save_interval = 1000 save_filename = "b" + str(initial_stones) + ".dat" draw_graph = False world = World_Model (size_x, size_y, size_win, gravity, initial_stones = initial_stones) sensor = world.get_sensor_info() f = [0]*len(sensor) for i in range(len(sensor)): f[i] = sensor[i] #Choose Bots bot_RL = Bot_RL_MLP(size_x, size_y, rl_beta, mlp_hidden, mlp_learning_rate, rl_reward, initial_field = f, player_ID = 1) bot_train = Bot_Random.Bot_Random_Static(size_x, size_y) #bot_train = Bot_Random.Bot_Random_Dynamic(size_x, size_y) win = [[],[],[]] scale = [] winner = [0,0,0] for counter in range (runs):
# RL and MLP Parameters rl_reward = [0.0, -1.0, 1.0] # rewards for : Draw, Win, Loose rl_beta = 3 # bot_RL_MLP so ca. 1 - 5 mlp_hidden = 10 # number of hidden neurons mlp_learning_rate = 0.1 # learning-rate of the MLP # Misc Parameters runs = 10000000 # the number of runs log_interval = 1000 # print status every x runs save_interval = 1000 save_filename = "b" + str(initial_stones) + ".dat" draw_graph = False world = World_Model(size_x, size_y, size_win, gravity, initial_stones=initial_stones) sensor = world.get_sensor_info() f = [0] * len(sensor) for i in range(len(sensor)): f[i] = sensor[i] # Choose Bots bot_2 = Bot_RL_MLP(size_x, size_y, rl_beta, mlp_hidden, mlp_learning_rate, rl_reward, initial_field=f, player_ID=1) bot_1 = Bot_Random.Bot_Random_Static(size_x, size_y) # bot_1 = Bot_Random.Bot_Random_Dynamic(size_x, size_y) win = [[], [], []] scale = [] winner = [0, 0, 0] for counter in range(runs):