def get(self,
            rollout_names,
            img_resize=(64, 64),
            save_path=os.path.dirname(os.path.abspath(__file__))):

        env = gym.make(self.env_name)

        for i in rollout_names:

            #different random policy for every rollout
            policy = World_Model("random vae",
                                 "random mdn rnn",
                                 3,
                                 self.device,
                                 random=True)

            runner = Env_Runner(self.device)

            # let the agent start at random track tile to enrich vae and mdnrnn
            obs, actions, rewards = runner.run(env,
                                               policy,
                                               img_resize=(64, 64),
                                               random_start=True)

            data = {
                "obs": np.array(obs),
                "actions": np.array(actions),
                "rewards": np.array(rewards)
            }
            file = open(save_path + "\\" + self.env_name + f'_dataset\\{i}',
                        "wb")
            pickle.dump(data, file)
            file.close()

        env.close()
def worker(solutions, env):

    fitness_solutions = []

    if not isinstance(solutions, list):
        solutions = [solutions]

    for weights in solutions:

        wm = World_Model(dirname + "\\vae.pt", dirname + "\\mdn_rnn.pt",
                         actions, device)

        w = weights[0:actions * (hidden_size + latent_size)]
        b = weights[actions * (hidden_size + latent_size)::]

        w = nn.Parameter(
            torch.tensor(np.reshape(
                w, (actions, hidden_size +
                    latent_size))).type('torch.FloatTensor').to(device))
        b = nn.Parameter(torch.tensor(b).type('torch.FloatTensor').to(device))

        wm.set_controller(w, b)

        fitness = []
        for i in range(num_rollouts):

            runner = Env_Runner(device)
            wm.reset_rnn()
            _, _, rewards = runner.run(env, wm, img_resize=(64, 64))

            # append negative return, because ES will try to minimize it
            fitness.append(-np.sum(np.array(rewards)))

        env.close()

        fitness_solutions.append(np.mean(np.array(fitness)))

    return fitness_solutions