def evaluate(net, args, replay_memory, dict_all_returns, key, store_transition=True): total_reward = 0.0 env = utils.NormalizedActions(gym.make(env_tag)) state = env.reset() num_frames = 0 state = utils.to_tensor(state).unsqueeze(0) # replay_buffer = replay_memory.ReplayMemory(args.buffer_size) # replay_memory[key] = replay_memory if args.is_cuda: state = state.cuda() done = False while not done: if store_transition: num_frames += 1 # if render and is_render: env.render() action = net.forward(state) action.clamp(-1, 1) action = utils.to_numpy(action.cpu()) # if is_action_noise: action += self.ounoise.noise() # print("1") next_state, reward, done, info = env.step(action.flatten()) # Simulate one step in environment next_state = utils.to_tensor(next_state).unsqueeze(0) if args.is_cuda: next_state = next_state.cuda() total_reward += reward if store_transition: add_experience(state, action, next_state, reward, done, replay_memory, args) # replay_memory[key] = replay_memory # if len(replay_buffer) > args.batch_size: # transitions = replay_buffer.sample(args.batch_size) # batch = replay_memory.Transition(*zip(*transitions)) # replay_queue.put(batch) state = next_state
# print("ddpg time:", (time.time()-time_evolution)/3600) return best_train_fitness, test_score, elite_index if __name__ == "__main__": num_processes = 4 parameters = Parameters() # Create the Parameters class tracker = utils.Tracker(parameters, ['erl'], '_score.csv') # Initiate tracker frame_tracker = utils.Tracker(parameters, ['frame_erl'], '_score.csv') # Initiate tracker time_tracker = utils.Tracker(parameters, ['time_erl'], '_score.csv') #Create Env env = utils.NormalizedActions(gym.make(env_tag)) parameters.action_dim = env.action_space.shape[0] parameters.state_dim = env.observation_space.shape[0] #Seed env.seed(parameters.seed) torch.manual_seed(parameters.seed) np.random.seed(parameters.seed) random.seed(parameters.seed) #Create Agent agent = Agent(parameters, env) print('Running', env_tag, ' State_dim:', parameters.state_dim, ' Action_dim:', parameters.action_dim) next_save = 100
if __name__ == "__main__": parameters = Parameters( parser) # Inject the cla arguments in the parameters object tracker = utils.Tracker(parameters, ['erl'], '_score.csv') # Initiate tracker frame_tracker = utils.Tracker(parameters, ['frame_erl'], '_score.csv') # Initiate tracker time_tracker = utils.Tracker(parameters, ['time_erl'], '_score.csv') ddpg_tracker = utils.Tracker(parameters, ['ddpg'], '_score.csv') selection_tracker = utils.Tracker(parameters, ['elite', 'selected', 'discarded'], '_selection.csv') # Create Env env = utils.NormalizedActions(gym.make(parameters.env_name)) parameters.action_dim = env.action_space.shape[0] parameters.state_dim = env.observation_space.shape[0] # Write the parameters to a the info file and print them parameters.write_params(stdout=True) # Seed env.seed(parameters.seed) torch.manual_seed(parameters.seed) np.random.seed(parameters.seed) random.seed(parameters.seed) # Tests the variation operators after that is saved first with -save_periodic if parameters.test_operators: operator_runner = OperatorRunner(parameters, env)
results.append(total_reward) print("Reward:", np.mean(results)) def load_genetic_agent(args): actor_path = os.path.join(args.model_path) agent = GeneticAgent(args) agent.actor.load_state_dict(torch.load(actor_path)) return agent if __name__ == "__main__": env = utils.NormalizedActions(gym.make(args.env)) parameters = Parameters(None, init=False) parameters.individual_bs = 0 parameters.action_dim = env.action_space.shape[0] parameters.state_dim = env.observation_space.shape[0] parameters.use_ln = True parameters.device = torch.device('cuda') setattr(parameters, 'model_path', args.model_path) #Seed env.seed(args.seed) torch.manual_seed(args.seed) np.random.seed(args.seed) random.seed(args.seed)