Beispiel #1
0
def initialize_settings(sigma_init=0.1, sigma_decay=0.9999):
    global population, filebase, game, model, num_params, es, PRECISION, SOLUTION_PACKET_SIZE, RESULT_PACKET_SIZE
    population = num_worker * num_worker_trial
    filebase = 'log/' + gamename + '.' + optimizer + '.' + str(
        num_episode) + '.' + str(population)
    game = config.games[gamename]
    model = make_model(game)
    num_params = model.param_count
    print("size of model", num_params)

    if optimizer == 'ses':
        ses = PEPG(num_params,
                   sigma_init=sigma_init,
                   sigma_decay=sigma_decay,
                   sigma_alpha=0.2,
                   sigma_limit=0.02,
                   elite_ratio=0.1,
                   weight_decay=0.005,
                   popsize=population)
        es = ses
    elif optimizer == 'ga':
        ga = SimpleGA(num_params,
                      sigma_init=sigma_init,
                      sigma_decay=sigma_decay,
                      sigma_limit=0.02,
                      elite_ratio=0.1,
                      weight_decay=0.005,
                      popsize=population)
        es = ga
    elif optimizer == 'cma':
        cma = CMAES(num_params, sigma_init=sigma_init, popsize=population)
        es = cma
    elif optimizer == 'pepg':
        pepg = PEPG(num_params,
                    sigma_init=sigma_init,
                    sigma_decay=sigma_decay,
                    sigma_alpha=0.20,
                    sigma_limit=0.02,
                    learning_rate=0.01,
                    learning_rate_decay=1.0,
                    learning_rate_limit=0.01,
                    weight_decay=0.005,
                    popsize=population)
        es = pepg
    else:
        oes = OpenES(num_params,
                     sigma_init=sigma_init,
                     sigma_decay=sigma_decay,
                     sigma_limit=0.02,
                     learning_rate=0.01,
                     learning_rate_decay=1.0,
                     learning_rate_limit=0.01,
                     antithetic=antithetic,
                     weight_decay=0.005,
                     popsize=population)
        es = oes

    PRECISION = 10000
    SOLUTION_PACKET_SIZE = (5 + num_params) * num_worker_trial
    RESULT_PACKET_SIZE = 4 * num_worker_trial
Beispiel #2
0
def evolve():
    evaler = Evaluator()
    solver = OpenES(evaler.n_weights, popsize=200)
    del evaler
    # for generation in tqdm(count(), unit="generation"):
    pool = Pool(4, init_worker)
    best_solution_so_far = None
    try:
        for generation in count():

            # ask the ES to give us a set of candidate solutions
            solutions = solver.ask()

            # create an array to hold the solutions.
            # solver.popsize = population size
            # rewards = np.zeros(solver.popsize)

            # calculate the reward for each given solution
            # using your own evaluate() method
            # for i in range(solver.popsize):
            #     rewards[i] = evaler.evaluate(solutions[i])
            rewards = pool.map(worker, solutions, 10)

            # give rewards back to ES
            solver.tell(rewards)

            # get best parameter, reward from ES
            reward_vector = solver.result()

            generation_max_reward = max(rewards)
            generation_mean_reward = sum(rewards) / len(rewards)
            generation_min_reward = min(rewards)

            # print("gen: {},max:{},vector:{}".format(generation, generation_max_reward, reward_vector[1]))
            print("gen: {},max:{},mean:{},min:{}".format(
                generation, generation_max_reward, generation_mean_reward,
                generation_min_reward))
            best_solution_so_far = reward_vector[0]
            if generation_max_reward > MY_REQUIRED_REWARD:
                return reward_vector[0]
    except KeyboardInterrupt:
        return best_solution_so_far
Beispiel #3
0
# defines genetic algorithm solver
ga = SimpleGA(
    NPARAMS,  # number of model parameters
    sigma_init=0.5,  # initial standard deviation
    popsize=NPOPULATION,  # population size
    elite_ratio=0.1,  # percentage of the elites
    forget_best=args.forget_best,  # forget the historical best elites
    weight_decay=0.00,  # weight decay coefficient
)

oes = OpenES(
    NPARAMS,  # number of model parameters
    sigma_init=0.5,  # initial standard deviation
    sigma_decay=0.999,  # don't anneal standard deviation
    learning_rate=0.1,  # learning rate for standard deviation
    learning_rate_decay=1.0,  # annealing the learning rate
    popsize=NPOPULATION,  # population size
    antithetic=False,  # whether to use antithetic sampling
    weight_decay=0.00,  # weight decay coefficient
    rank_fitness=False,  # use rank rather than fitness numbers
    forget_best=False)

# defines CMA-ES algorithm solver
cmaes = CMAES(NPARAMS, popsize=NPOPULATION, weight_decay=0.0, sigma_init=0.5)

print(mp.cpu_count())
pool = mp.Pool(mp.cpu_count())
fit_func = evluate_func


# defines a function to use solver to solve fit_func
def initialize_settings(sigma_init=0.1, sigma_decay=0.9999, init_opt = ''):
  global population, filebase, controller_filebase, model, num_params, es, PRECISION, SOLUTION_PACKET_SIZE, RESULT_PACKET_SIZE
  population = num_worker * num_worker_trial
  filebase = './log/'+env_name+'.'+optimizer+'.'+str(num_episode)+'.'+str(population)
  controller_filebase = './controller/'+env_name+'.'+optimizer+'.'+str(num_episode)+'.'+str(population)

  model = make_model()

  num_params = model.param_count
  #print("size of model", num_params)

  if len(init_opt) > 0:
    es = pickle.load(open(init_opt, 'rb'))  
  else:
    if optimizer == 'ses':
      ses = PEPG(num_params,
        sigma_init=sigma_init,
        sigma_decay=sigma_decay,
        sigma_alpha=0.2,
        sigma_limit=0.02,
        elite_ratio=0.1,
        weight_decay=0.005,
        popsize=population)
      es = ses
    elif optimizer == 'ga':
      ga = SimpleGA(num_params,
        sigma_init=sigma_init,
        sigma_decay=sigma_decay,
        sigma_limit=0.02,
        elite_ratio=0.1,
        weight_decay=0.005,
        popsize=population)
      es = ga
    elif optimizer == 'cma':
      cma = CMAES(num_params,
        sigma_init=sigma_init,
        popsize=population)
      es = cma
    elif optimizer == 'pepg':
      pepg = PEPG(num_params,
        sigma_init=sigma_init,
        sigma_decay=sigma_decay,
        sigma_alpha=0.20,
        sigma_limit=0.02,
        learning_rate=0.01,
        learning_rate_decay=1.0,
        learning_rate_limit=0.01,
        weight_decay=0.005,
        popsize=population)
      es = pepg
    else:
      oes = OpenES(num_params,
        sigma_init=sigma_init,
        sigma_decay=sigma_decay,
        sigma_limit=0.02,
        learning_rate=0.01,
        learning_rate_decay=1.0,
        learning_rate_limit=0.01,
        antithetic=antithetic,
        weight_decay=0.005,
        popsize=population)
      es = oes

  PRECISION = 10000
  SOLUTION_PACKET_SIZE = (4+num_params)*num_worker_trial
  RESULT_PACKET_SIZE = 4*num_worker_trial
Beispiel #5
0
def train_controller_openes():

    # env_name = "SonicTheHedgehog-Genesis" # None
    env_name = "SonicTheHedgehog2-Genesis"
    # env_name = "SonicAndKnuckles-Genesis"
    # env_name = "SonicTheHedgehog3-Genesis"
    # env_name = "SonicAndKnuckles3-Genesis"

    env = retro.make(env_name)
    # print(env.observation_space) # Box(224, 320, 3)
    # print(env.action_space) # MultiBinary(12)
    # print(env.action_space.sample()) # [1 1 1 0 1 0 1 0 0 1 1 1]

    # conv_vae_filename = "weights/conv_vae_SonicAndKnuckles.pkl" # 3, 4608
    # lstm_mdn_filename = "weights/lstm_mdn_SonicAndKnuckles.pkl" # 4608
    # controller_filename = "weights/controller_6656_12.pkl"

    conv_vae_filename = "weights/conv_vae_gray.pkl"  # 1, 1024
    lstm_mdn_filename = "weights/lstm_mdn_gray.pkl"  # 1024
    controller_filename = "weights/controller_rnn_1024_12.pkl"
    evaluator_filename = "weights/evaluator_openes_weights_20_0.499982.npz"

    population_size = 256
    generations = 5000

    # only forward pass
    conv_vae = ConvVAE((1, 128, 128), 1024)
    if os.path.exists(conv_vae_filename):
        print("loading conv vae weights")
        conv_vae.load_state_dict(torch.load(conv_vae_filename))

    # only forward pass
    lstm_mdn = LSTM(vector_size=1024)
    if os.path.exists(lstm_mdn_filename):
        print("loading lstm mdn weights")
        lstm_mdn.load_state_dict(torch.load(lstm_mdn_filename))

    controller = Controller_RNN(input_size=1024, batch_size=2)  # 6656
    if os.path.exists(controller_filename):
        print("loading controller weights")
        controller.load_state_dict(torch.load(controller_filename))

    # evaluator restore
    if os.path.exists(evaluator_filename):
        print("loading evaluator data")
        data = np.load(evaluator_filename)
        weights = data["weights"]
        print("inserting weights into controller")
        controller.set_weights(weights)

        evaluator = OpenES(num_params=793612,
                           popsize=population_size,
                           existing_weights=weights)

    else:
        print("extracting controller weights")
        state_dict = controller.state_dict()  # 6
        rnn_weight_ih_l0 = state_dict[
            "rnn.weight_ih_l0"]  # [512, 1024]  524 288
        rnn_weight_hh_l0 = state_dict[
            "rnn.weight_hh_l0"]  # [512, 512]   262 144
        rnn_bias_ih_l0 = state_dict["rnn.bias_ih_l0"]  # [512]        512
        rnn_bias_hh_l0 = state_dict["rnn.bias_hh_l0"]  # [512]        512
        dence_weight = state_dict["dence.weight"]  # [12, 512]    6144
        dence_bias = state_dict["dence.bias"]  # [12]         12
        # 793 612

        rnn_weight_ih_l0 = torch.flatten(rnn_weight_ih_l0)  # [524288]
        rnn_weight_hh_l0 = torch.flatten(rnn_weight_hh_l0)  # [262144]
        dence_weight = torch.flatten(dence_weight)  # [6144]

        flattened_controller_weights = torch.cat(
            (rnn_weight_ih_l0, rnn_weight_hh_l0, rnn_bias_ih_l0,
             rnn_bias_hh_l0, dence_weight, dence_bias),
            dim=0)  # [793612]
        flattened_controller_weights = flattened_controller_weights.data.numpy(
        )

        evaluator = OpenES(num_params=793612,
                           popsize=population_size,
                           existing_weights=flattened_controller_weights)

    for generation in range(generations):

        solutions = evaluator.ask()  # (64, 793612)

        fitness = np.zeros(population_size)  # 64

        for i in range(population_size):

            fitness[i] = evaluate(weights=solutions[i],
                                  conv_vae=conv_vae,
                                  lstm_mdn=lstm_mdn,
                                  controller=controller,
                                  env=env)

        evaluator.tell(fitness)

        result = evaluator.result(
        )  # first element is the best solution, second element is the best fitness

        best_fitness = result[1]
        best_weights = result[0]

        print(generation, best_fitness)

        ##############################
        ## save evaluator data weights
        print("save evaluator data weights")
        evaluator_weights_filename = "weights/evaluator_openes_weights_%s_%s.npz" % (
            generation, result[1])
        np.savez(evaluator_weights_filename, weights=best_weights)
Beispiel #6
0
def initialize_settings(sigma_init=0.1, sigma_decay=0.9999):
    global population, filebase, game, controller, num_params, es, PRECISION, SOLUTION_PACKET_SIZE, RESULT_PACKET_SIZE
    population = num_worker * num_worker_trial
    filedir = 'results/{}/{}/log/'.format(exp_name, env_name)
    if not os.path.exists(filedir):
        os.makedirs(filedir)
    filebase = filedir + env_name + '.' + optimizer + '.' + str(
        num_episode) + '.' + str(population)
    controller = make_controller(args=config_args)

    num_params = controller.param_count
    print("size of model", num_params)

    if optimizer == 'ses':
        ses = PEPG(num_params,
                   sigma_init=sigma_init,
                   sigma_decay=sigma_decay,
                   sigma_alpha=0.2,
                   sigma_limit=0.02,
                   elite_ratio=0.1,
                   weight_decay=0.005,
                   popsize=population)
        es = ses
    elif optimizer == 'ga':
        ga = SimpleGA(num_params,
                      sigma_init=sigma_init,
                      sigma_decay=sigma_decay,
                      sigma_limit=0.02,
                      elite_ratio=0.1,
                      weight_decay=0.005,
                      popsize=population)
        es = ga
    elif optimizer == 'cma':
        cma = CMAES(num_params, sigma_init=sigma_init, popsize=population)
        es = cma
    elif optimizer == 'pepg':
        pepg = PEPG(num_params,
                    sigma_init=sigma_init,
                    sigma_decay=sigma_decay,
                    sigma_alpha=0.20,
                    sigma_limit=0.02,
                    learning_rate=0.01,
                    learning_rate_decay=1.0,
                    learning_rate_limit=0.01,
                    weight_decay=0.005,
                    popsize=population)
        es = pepg
    else:
        oes = OpenES(num_params,
                     sigma_init=sigma_init,
                     sigma_decay=sigma_decay,
                     sigma_limit=0.02,
                     learning_rate=0.01,
                     learning_rate_decay=1.0,
                     learning_rate_limit=0.01,
                     antithetic=antithetic,
                     weight_decay=0.005,
                     popsize=population)
        es = oes

    PRECISION = 10000
    SOLUTION_PACKET_SIZE = (5 + num_params) * num_worker_trial
    RESULT_PACKET_SIZE = 4 * num_worker_trial
Beispiel #7
0
def initialize_settings(sigma_init=0.1, sigma_decay=0.9999):
  global population, filebase, game, model, num_params, es, PRECISION, SOLUTION_PACKET_SIZE, RESULT_PACKET_SIZE, model_name, novelty_search, unique_id, novelty_mode, BC_SIZE, ns_mode
  population = num_worker * num_worker_trial
  os.makedirs(os.path.join(ROOT, 'log'), exist_ok=True)
  filebase = os.path.join(ROOT, 'log', gamename+'.'+optimizer+'.'+ model_name + '.' + str(num_episode)+'.'+str(population)) + '.' + unique_id
  if novelty_search:
    filebase = filebase + '.novelty'
  if novelty_mode == 'h':
    BC_SIZE = H_SIZE
  elif novelty_mode == 'z':
    BC_SIZE = Z_SIZE
  elif novelty_mode =='h_concat':
    BC_SIZE = BC_SEQ_LENGTH * H_SIZE
    #NOVELTY_THRESHOLD = 180
  elif novelty_mode == 'z_concat':
    BC_SIZE = BC_SEQ_LENGTH * Z_SIZE
  elif novelty_mode == 'a_concat':
    BC_SIZE = BC_SEQ_LENGTH * A_SIZE
  else:
    BC_SIZE = 9  # dummy bc size not used because the reward if the distance travelled.

  if novelty_mode:
    filebase = filebase + '.' + novelty_mode

  if ns_mode:
    filebase = filebase + '.' + ns_mode

  model = make_model(model_name, load_model=True)
  num_params = model.param_count
  print("size of model", num_params)
  PRECISION = 10000
  SOLUTION_PACKET_SIZE = (5 + num_params) * num_worker_trial
  RESULT_PACKET_SIZE = (4 + BC_SIZE) * num_worker_trial

  if optimizer == 'ses':
    ses = PEPG(num_params,
      sigma_init=sigma_init,
      sigma_decay=sigma_decay,
      sigma_alpha=0.2,
      sigma_limit=0.02,
      elite_ratio=0.1,
      weight_decay=0.005,
      popsize=population)
    es = ses
  elif optimizer == 'ga':
    ga = SimpleGA(num_params,
      sigma_init=sigma_init,
      sigma_decay=sigma_decay,
      sigma_limit=0.02,
      elite_ratio=0.1,
      weight_decay=0.005,
      popsize=population)
    es = ga
  elif optimizer == 'cma':
    cma = CMAES(num_params,
      sigma_init=sigma_init,
      popsize=population)
    es = cma
  elif optimizer == 'pepg':
    pepg = PEPG(num_params,
      sigma_init=sigma_init,
      sigma_decay=sigma_decay,
      sigma_alpha=0.20,
      sigma_limit=0.02,
      learning_rate=0.01,
      learning_rate_decay=1.0,
      learning_rate_limit=0.01,
      weight_decay=0.005,
      popsize=population)
    es = pepg
  else:
    oes = OpenES(num_params,
      sigma_init=sigma_init,
      sigma_decay=sigma_decay,
      sigma_limit=0.02,
      learning_rate=0.01,
      learning_rate_decay=1.0,
      learning_rate_limit=0.01,
      antithetic=antithetic,
      weight_decay=0.005,
      popsize=population)
    es = oes
Beispiel #8
0
def testRun():
    config()
    x = np.random.randn(NPARAMS)
    print("The fitness of initial guess", fit_func(x))

    pepg = PEPG(
        NPARAMS,  # number of model parameters
        sigma_init=0.5,  # initial standard deviation
        learning_rate=0.1,  # learning rate for standard deviation
        learning_rate_decay=1.0,  # don't anneal the learning rate
        popsize=NPOPULATION,  # population size
        average_baseline=False,  # set baseline to average of batch
        weight_decay=0.00,  # weight decay coefficient
        rank_fitness=False,  # use rank rather than fitness numbers
        forget_best=False)  # don't keep the historical best solution)

    pepg_history = test_solver(pepg)  #

    pepgV = PEPGVariant(
        NPARAMS,  # number of model parameters
        sigma_init=0.5,  # initial standard deviation
        learning_rate=0.1,  # learning rate for standard deviation
        learning_rate_decay=1.0,  # don't anneal the learning rate
        popsize=NPOPULATION,  # population size
        average_baseline=False,  # set baseline to average of batch
        weight_decay=0.00,  # weight decay coefficient
        rank_fitness=False,  # use rank rather than fitness numbers
        forget_best=False,  # don't keep the historical best solution
        diversity_best=0.1)  # use the diversity issue for just testing

    print("-----test PEPG vairant-----")
    pepgv_history = test_solver(pepgV)  #

    print("---test PEPG variant with different diversity-----")

    pepgV2 = PEPGVariant(
        NPARAMS,  # number of model parameters
        sigma_init=0.5,  # initial standard deviation
        learning_rate=0.1,  # learning rate for standard deviation
        learning_rate_decay=1.0,  # don't anneal the learning rate
        popsize=NPOPULATION,  # population size
        average_baseline=False,  # set baseline to average of batch
        weight_decay=0.00,  # weight decay coefficient
        rank_fitness=False,  # use rank rather than fitness numbers
        forget_best=False,  # don't keep the historical best solution
        diversity_best=1)  # use the diversity issue for just testing

    # done

    pepgV2_history = test_solver(pepgV2)

    oes = OpenES(
        NPARAMS,  # number of model parameters
        sigma_init=0.5,  # initial standard deviation
        sigma_decay=0.999,  # don't anneal standard deviation
        learning_rate=0.1,  # learning rate for standard deviation
        learning_rate_decay=1.0,  # annealing the learning rate
        popsize=NPOPULATION,  # population size
        antithetic=False,  # whether to use antithetic sampling
        weight_decay=0.00,  # weight decay coefficient
        rank_fitness=False,  # use rank rather than fitness numbers
        forget_best=False)

    print("-----test oes--------------")
    oes_history = test_solver(oes)

    cmaes = CMAES(NPARAMS,
                  popsize=NPOPULATION,
                  weight_decay=0.0,
                  sigma_init=0.5)
    cma_history = test_solver(cmaes)

    best_history = [0] * MAX_ITERATION
    plt.figure(figsize=(16, 8), dpi=150)

    optimum_line, = plt.plot(best_history,
                             color="black",
                             linewidth=0.5,
                             linestyle="-.",
                             label='Global Optimum')
    pepgv_line, = plt.plot(pepgv_history,
                           color="red",
                           linewidth=1.0,
                           linestyle="-",
                           label='PEPGV / NES')
    pepg_line, = plt.plot(pepg_history,
                          color="blue",
                          linewidth=1.0,
                          linestyle="-.",
                          label='PEPG / NES')
    oes_line, = plt.plot(oes_history,
                         color="orange",
                         linewidth=1.0,
                         linestyle="-",
                         label='OpenAI-ES')
    cma_line, = plt.plot(cma_history,
                         color="green",
                         linewidth=1.0,
                         linestyle="-",
                         label='CMA-ES')

    plt.legend(handles=[optimum_line, pepgv_line, pepg_line, oes_line],
               loc='best')

    plt.xlim(0, 100)

    plt.xlabel('generation')
    plt.ylabel('loss')

    plt.savefig("./results/rose_" + str(NPARAMS) + "d.svg")
Beispiel #9
0
def initialize_settings(sigma_init=0.1,
                        sigma_decay=0.9999,
                        weight_decay=0.005):
    global population, filebase, game, model, num_params, es, PRECISION, SOLUTION_PACKET_SIZE, RESULT_PACKET_SIZE
    population = num_worker * num_worker_trial
    filebase = 'log/' + gamename + '.' + optimizer + '.' + str(
        num_episode) + '.' + str(population)
    game = config.games[gamename]
    model = make_model(game)
    num_params = model.param_count
    print("size of model", num_params)

    if optimizer == 'ses':
        ses = PEPG(num_params,
                   sigma_init=sigma_init,
                   sigma_decay=sigma_decay,
                   sigma_alpha=0.2,
                   sigma_limit=0.02,
                   elite_ratio=0.1,
                   weight_decay=weight_decay,
                   popsize=population)
        es = ses
    elif optimizer == 'ga':
        ga = SimpleGA(num_params,
                      sigma_init=sigma_init,
                      sigma_decay=sigma_decay,
                      sigma_limit=0.02,
                      elite_ratio=0.1,
                      weight_decay=weight_decay,
                      popsize=population)
        es = ga
    elif optimizer == 'cma':
        cma = CMAES(num_params,
                    sigma_init=sigma_init,
                    popsize=population,
                    weight_decay=weight_decay)
        es = cma
    elif optimizer == 'pepg':
        pepg = PEPG(num_params,
                    sigma_init=sigma_init,
                    sigma_decay=sigma_decay,
                    sigma_alpha=0.20,
                    sigma_limit=0.02,
                    learning_rate=0.01,
                    learning_rate_decay=1.0,
                    learning_rate_limit=0.01,
                    weight_decay=weight_decay,
                    popsize=population)
        es = pepg
    elif optimizer == 'oes':
        oes = OpenES(num_params,
                     sigma_init=sigma_init,
                     sigma_decay=sigma_decay,
                     sigma_limit=0.02,
                     learning_rate=0.01,
                     learning_rate_decay=1.0,
                     learning_rate_limit=0.01,
                     antithetic=antithetic,
                     weight_decay=weight_decay,
                     popsize=population)
        es = oes
    # elif optimizer == 'pso':
    #   pso = PSO(num_params,
    #     sigma_init=sigma_init,
    #     weight_decay=weight_decay,
    #     popsize=population)
    #   es = pso
    elif optimizer == 'global_pso':
        pso = Pyswarms(num_params,
                       sigma_init=sigma_init,
                       weight_decay=weight_decay,
                       popsize=population,
                       communication_topology='global')
        es = pso
    elif optimizer == 'local_pso':
        pso = Pyswarms(num_params,
                       sigma_init=sigma_init,
                       weight_decay=weight_decay,
                       popsize=population,
                       communication_topology='local')
        es = pso
    elif optimizer == 'random_pso':
        pso = Pyswarms(num_params,
                       sigma_init=sigma_init,
                       weight_decay=weight_decay,
                       popsize=population,
                       communication_topology='random')
        es = pso
    else:
        if optimizer in list(sorted(ng.optimizers.registry.keys())):
            ng_optimizer = Nevergrad(optimizer,
                                     num_params,
                                     sigma_init=sigma_init,
                                     popsize=population,
                                     weight_decay=weight_decay)
            es = ng_optimizer
        else:
            raise ValueError('Could not find optimizer!')

    PRECISION = 10000
    SOLUTION_PACKET_SIZE = (5 + num_params) * num_worker_trial
    RESULT_PACKET_SIZE = 4 * num_worker_trial