def get(self,
            rollout_names,
            img_resize=(64, 64),
            save_path=os.path.dirname(os.path.abspath(__file__))):

        env = gym.make(self.env_name)

        for i in rollout_names:

            #different random policy for every rollout
            policy = World_Model("random vae",
                                 "random mdn rnn",
                                 3,
                                 self.device,
                                 random=True)

            runner = Env_Runner(self.device)

            # let the agent start at random track tile to enrich vae and mdnrnn
            obs, actions, rewards = runner.run(env,
                                               policy,
                                               img_resize=(64, 64),
                                               random_start=True)

            data = {
                "obs": np.array(obs),
                "actions": np.array(actions),
                "rewards": np.array(rewards)
            }
            file = open(save_path + "\\" + self.env_name + f'_dataset\\{i}',
                        "wb")
            pickle.dump(data, file)
            file.close()

        env.close()
def worker(solutions, env):

    fitness_solutions = []

    if not isinstance(solutions, list):
        solutions = [solutions]

    for weights in solutions:

        wm = World_Model(dirname + "\\vae.pt", dirname + "\\mdn_rnn.pt",
                         actions, device)

        w = weights[0:actions * (hidden_size + latent_size)]
        b = weights[actions * (hidden_size + latent_size)::]

        w = nn.Parameter(
            torch.tensor(np.reshape(
                w, (actions, hidden_size +
                    latent_size))).type('torch.FloatTensor').to(device))
        b = nn.Parameter(torch.tensor(b).type('torch.FloatTensor').to(device))

        wm.set_controller(w, b)

        fitness = []
        for i in range(num_rollouts):

            runner = Env_Runner(device)
            wm.reset_rnn()
            _, _, rewards = runner.run(env, wm, img_resize=(64, 64))

            # append negative return, because ES will try to minimize it
            fitness.append(-np.sum(np.array(rewards)))

        env.close()

        fitness_solutions.append(np.mean(np.array(fitness)))

    return fitness_solutions
Ejemplo n.º 3
0
#RL and MLP Parameters
rl_reward = [0.0, -1.0, 1.0]    #rewards for : Draw, Win, Loose
rl_beta = 2                     #bot_RL_MLP so ca. 1 - 5
mlp_hidden = 10                 #number of hidden neurons
mlp_learning_rate = 0.1         #learning-rate of the MLP

#Misc Parameters
runs          = 10000000      #the number of runs
log_interval  = 1000          #print status every x runs
save_interval = 1000
save_filename = "b" + str(initial_stones) + ".dat"
draw_graph    = False


world = World_Model (size_x, size_y, size_win, gravity, initial_stones = initial_stones)
sensor = world.get_sensor_info()
f = [0]*len(sensor)
for i in range(len(sensor)):
    f[i] = sensor[i]

#Choose Bots
bot_RL = Bot_RL_MLP(size_x, size_y, rl_beta, mlp_hidden, mlp_learning_rate, rl_reward, initial_field = f, player_ID = 1)
bot_train = Bot_Random.Bot_Random_Static(size_x, size_y)
#bot_train = Bot_Random.Bot_Random_Dynamic(size_x, size_y)

win    = [[],[],[]]
scale  = []
winner = [0,0,0]

for counter in range (runs):
Ejemplo n.º 4
0
# RL and MLP Parameters
rl_reward = [0.0, -1.0, 1.0]  # rewards for : Draw, Win, Loose
rl_beta = 3  # bot_RL_MLP so ca. 1 - 5
mlp_hidden = 10  # number of hidden neurons
mlp_learning_rate = 0.1  # learning-rate of the MLP

# Misc Parameters
runs = 10000000  # the number of runs
log_interval = 1000  # print status every x runs
save_interval = 1000
save_filename = "b" + str(initial_stones) + ".dat"
draw_graph = False


world = World_Model(size_x, size_y, size_win, gravity, initial_stones=initial_stones)
sensor = world.get_sensor_info()
f = [0] * len(sensor)
for i in range(len(sensor)):
    f[i] = sensor[i]

# Choose Bots
bot_2 = Bot_RL_MLP(size_x, size_y, rl_beta, mlp_hidden, mlp_learning_rate, rl_reward, initial_field=f, player_ID=1)
bot_1 = Bot_Random.Bot_Random_Static(size_x, size_y)
# bot_1 = Bot_Random.Bot_Random_Dynamic(size_x, size_y)

win = [[], [], []]
scale = []
winner = [0, 0, 0]

for counter in range(runs):