コード例 #1
0
def run(args):
    env = gym.make(args.env_name)
    observation = env.reset()
    steps_per_episode = args.steps
    model_file_output_path = args.from_file
    should_recycle_population = args.recycle_population

    # N is episode steps length; D_in is input observation dimension;
    # H is hidden layer dimension; D_out is output action space dimension.
    N, D_in, H, D_out = args.batch_size, observation.shape[0], 40, 1

    agent = Agent(env, steps_per_episode, args.maximize, args.reward_reducer)
    model = Net(N, D_in, H, D_out, agent)

    # set log level
    agent.log_level = args.log_level

    if args.load_model:
        model.load_state_dict(torch.load(model_file_output_path))
        model.eval()

    # connect network to agent
    agent.attach_model(model)

    if args.load_model:
        agent.run_forever(steps_per_episode)
        exit()

    # fill
    model.train()

    scaling_factor = args.scaling_factor
    crossover_rate = args.crossover_rate
    population_size = args.population_size
    episodes_num = args.episodes  # number of episodes

    # generate flattened weights
    model.flatten()

    problem_size = model.flattened.shape[0]

    print("problem_size: ", problem_size)

    # Initial population, Fitness values
    x = torch.randn(population_size, problem_size, dtype=torch.float64)
    y = torch.randn(population_size, D_out, dtype=torch.float64)

    # Convert to c pointers
    x_c = x.detach().numpy().ctypes.data_as(c.POINTER(
        c.c_double))  # c pointer init population
    y_c = y.detach().numpy().ctypes.data_as(c.POINTER(
        c.c_double))  # c pointer init fitness values

    agent.out_population = x.detach().numpy()
    agent.out_fitnesses = y.detach().numpy()

    # TODO: make these adjustable
    optimizer = getattr(devo, args.optimizer_name)

    generations = episodes_num // population_size

    # Runs 1 generation of DE at a time, using previous run's out population
    # as an input for the next one
    if should_recycle_population:
        for g in range(generations):
            # # Using Adaptive-DEs
            optimizer.run(
                population_size,
                population_size,  # population size
                scaling_factor,  # scaling factor
                crossover_rate,  # crossover rate
                agent.objective_func,
                problem_size,  # problem size
                -100,  # unused value
                100,  # unused value
                x_c,
                y_c,
                agent.results_callback  # no results callback needed
            )

            x_c = agent.out_population.ctypes.data_as(c.POINTER(c.c_double))
            y_c = agent.out_fitnesses.ctypes.data_as(c.POINTER(c.c_double))
    else:
        # # Using Adaptive-DEs
        optimizer.run(
            episodes_num,
            population_size,  # population size
            scaling_factor,  # scaling factor
            crossover_rate,  # crossover rate
            agent.objective_func,
            problem_size,  # problem size
            -100,  # unused value
            100,  # unused value
            x_c,
            y_c,
            agent.results_callback  # no results callback needed
        )

    # Get mins - inverted in output
    print("min_fitness: ", agent.min_reward)

    model.update_weights_from_vec(agent.min_weights)

    result = -agent.run_episode(agent.steps_per_episode, True)

    if args.should_test:
        print("test_run(expected: {}, actual: {})".format(
            agent.min_reward, result))

    env.close()

    if args.save_model:
        print("model_file: ", model_file_output_path)
        # save model
        torch.save(model.state_dict(), model_file_output_path)