Exemple #1
0
def evolve():
    evaler = Evaluator()
    solver = OpenES(evaler.n_weights, popsize=200)
    del evaler
    # for generation in tqdm(count(), unit="generation"):
    pool = Pool(4, init_worker)
    best_solution_so_far = None
    try:
        for generation in count():

            # ask the ES to give us a set of candidate solutions
            solutions = solver.ask()

            # create an array to hold the solutions.
            # solver.popsize = population size
            # rewards = np.zeros(solver.popsize)

            # calculate the reward for each given solution
            # using your own evaluate() method
            # for i in range(solver.popsize):
            #     rewards[i] = evaler.evaluate(solutions[i])
            rewards = pool.map(worker, solutions, 10)

            # give rewards back to ES
            solver.tell(rewards)

            # get best parameter, reward from ES
            reward_vector = solver.result()

            generation_max_reward = max(rewards)
            generation_mean_reward = sum(rewards) / len(rewards)
            generation_min_reward = min(rewards)

            # print("gen: {},max:{},vector:{}".format(generation, generation_max_reward, reward_vector[1]))
            print("gen: {},max:{},mean:{},min:{}".format(
                generation, generation_max_reward, generation_mean_reward,
                generation_min_reward))
            best_solution_so_far = reward_vector[0]
            if generation_max_reward > MY_REQUIRED_REWARD:
                return reward_vector[0]
    except KeyboardInterrupt:
        return best_solution_so_far
def train_controller_openes():

    # env_name = "SonicTheHedgehog-Genesis" # None
    env_name = "SonicTheHedgehog2-Genesis"
    # env_name = "SonicAndKnuckles-Genesis"
    # env_name = "SonicTheHedgehog3-Genesis"
    # env_name = "SonicAndKnuckles3-Genesis"

    env = retro.make(env_name)
    # print(env.observation_space) # Box(224, 320, 3)
    # print(env.action_space) # MultiBinary(12)
    # print(env.action_space.sample()) # [1 1 1 0 1 0 1 0 0 1 1 1]

    # conv_vae_filename = "weights/conv_vae_SonicAndKnuckles.pkl" # 3, 4608
    # lstm_mdn_filename = "weights/lstm_mdn_SonicAndKnuckles.pkl" # 4608
    # controller_filename = "weights/controller_6656_12.pkl"

    conv_vae_filename = "weights/conv_vae_gray.pkl"  # 1, 1024
    lstm_mdn_filename = "weights/lstm_mdn_gray.pkl"  # 1024
    controller_filename = "weights/controller_rnn_1024_12.pkl"
    evaluator_filename = "weights/evaluator_openes_weights_20_0.499982.npz"

    population_size = 256
    generations = 5000

    # only forward pass
    conv_vae = ConvVAE((1, 128, 128), 1024)
    if os.path.exists(conv_vae_filename):
        print("loading conv vae weights")
        conv_vae.load_state_dict(torch.load(conv_vae_filename))

    # only forward pass
    lstm_mdn = LSTM(vector_size=1024)
    if os.path.exists(lstm_mdn_filename):
        print("loading lstm mdn weights")
        lstm_mdn.load_state_dict(torch.load(lstm_mdn_filename))

    controller = Controller_RNN(input_size=1024, batch_size=2)  # 6656
    if os.path.exists(controller_filename):
        print("loading controller weights")
        controller.load_state_dict(torch.load(controller_filename))

    # evaluator restore
    if os.path.exists(evaluator_filename):
        print("loading evaluator data")
        data = np.load(evaluator_filename)
        weights = data["weights"]
        print("inserting weights into controller")
        controller.set_weights(weights)

        evaluator = OpenES(num_params=793612,
                           popsize=population_size,
                           existing_weights=weights)

    else:
        print("extracting controller weights")
        state_dict = controller.state_dict()  # 6
        rnn_weight_ih_l0 = state_dict[
            "rnn.weight_ih_l0"]  # [512, 1024]  524 288
        rnn_weight_hh_l0 = state_dict[
            "rnn.weight_hh_l0"]  # [512, 512]   262 144
        rnn_bias_ih_l0 = state_dict["rnn.bias_ih_l0"]  # [512]        512
        rnn_bias_hh_l0 = state_dict["rnn.bias_hh_l0"]  # [512]        512
        dence_weight = state_dict["dence.weight"]  # [12, 512]    6144
        dence_bias = state_dict["dence.bias"]  # [12]         12
        # 793 612

        rnn_weight_ih_l0 = torch.flatten(rnn_weight_ih_l0)  # [524288]
        rnn_weight_hh_l0 = torch.flatten(rnn_weight_hh_l0)  # [262144]
        dence_weight = torch.flatten(dence_weight)  # [6144]

        flattened_controller_weights = torch.cat(
            (rnn_weight_ih_l0, rnn_weight_hh_l0, rnn_bias_ih_l0,
             rnn_bias_hh_l0, dence_weight, dence_bias),
            dim=0)  # [793612]
        flattened_controller_weights = flattened_controller_weights.data.numpy(
        )

        evaluator = OpenES(num_params=793612,
                           popsize=population_size,
                           existing_weights=flattened_controller_weights)

    for generation in range(generations):

        solutions = evaluator.ask()  # (64, 793612)

        fitness = np.zeros(population_size)  # 64

        for i in range(population_size):

            fitness[i] = evaluate(weights=solutions[i],
                                  conv_vae=conv_vae,
                                  lstm_mdn=lstm_mdn,
                                  controller=controller,
                                  env=env)

        evaluator.tell(fitness)

        result = evaluator.result(
        )  # first element is the best solution, second element is the best fitness

        best_fitness = result[1]
        best_weights = result[0]

        print(generation, best_fitness)

        ##############################
        ## save evaluator data weights
        print("save evaluator data weights")
        evaluator_weights_filename = "weights/evaluator_openes_weights_%s_%s.npz" % (
            generation, result[1])
        np.savez(evaluator_weights_filename, weights=best_weights)