def run(args): env = gym.make(args.env_name) observation = env.reset() steps_per_episode = args.steps model_file_output_path = args.from_file should_recycle_population = args.recycle_population # N is episode steps length; D_in is input observation dimension; # H is hidden layer dimension; D_out is output action space dimension. N, D_in, H, D_out = args.batch_size, observation.shape[0], 40, 1 agent = Agent(env, steps_per_episode, args.maximize, args.reward_reducer) model = Net(N, D_in, H, D_out, agent) # set log level agent.log_level = args.log_level if args.load_model: model.load_state_dict(torch.load(model_file_output_path)) model.eval() # connect network to agent agent.attach_model(model) if args.load_model: agent.run_forever(steps_per_episode) exit() # fill model.train() scaling_factor = args.scaling_factor crossover_rate = args.crossover_rate population_size = args.population_size episodes_num = args.episodes # number of episodes # generate flattened weights model.flatten() problem_size = model.flattened.shape[0] print("problem_size: ", problem_size) # Initial population, Fitness values x = torch.randn(population_size, problem_size, dtype=torch.float64) y = torch.randn(population_size, D_out, dtype=torch.float64) # Convert to c pointers x_c = x.detach().numpy().ctypes.data_as(c.POINTER( c.c_double)) # c pointer init population y_c = y.detach().numpy().ctypes.data_as(c.POINTER( c.c_double)) # c pointer init fitness values agent.out_population = x.detach().numpy() agent.out_fitnesses = y.detach().numpy() # TODO: make these adjustable optimizer = getattr(devo, args.optimizer_name) generations = episodes_num // population_size # Runs 1 generation of DE at a time, using previous run's out population # as an input for the next one if should_recycle_population: for g in range(generations): # # Using Adaptive-DEs optimizer.run( population_size, population_size, # population size scaling_factor, # scaling factor crossover_rate, # crossover rate agent.objective_func, problem_size, # problem size -100, # unused value 100, # unused value x_c, y_c, agent.results_callback # no results callback needed ) x_c = agent.out_population.ctypes.data_as(c.POINTER(c.c_double)) y_c = agent.out_fitnesses.ctypes.data_as(c.POINTER(c.c_double)) else: # # Using Adaptive-DEs optimizer.run( episodes_num, population_size, # population size scaling_factor, # scaling factor crossover_rate, # crossover rate agent.objective_func, problem_size, # problem size -100, # unused value 100, # unused value x_c, y_c, agent.results_callback # no results callback needed ) # Get mins - inverted in output print("min_fitness: ", agent.min_reward) model.update_weights_from_vec(agent.min_weights) result = -agent.run_episode(agent.steps_per_episode, True) if args.should_test: print("test_run(expected: {}, actual: {})".format( agent.min_reward, result)) env.close() if args.save_model: print("model_file: ", model_file_output_path) # save model torch.save(model.state_dict(), model_file_output_path)