Beispiel #1
0
def evaluate(net, args, replay_memory, dict_all_returns, key, store_transition=True):
    total_reward = 0.0
    env = utils.NormalizedActions(gym.make(env_tag))
    state = env.reset()
    num_frames = 0
    state = utils.to_tensor(state).unsqueeze(0)
    # replay_buffer = replay_memory.ReplayMemory(args.buffer_size)
    # replay_memory[key] = replay_memory

    if args.is_cuda: state = state.cuda()
    done = False
    while not done:
        if store_transition: num_frames += 1
        # if render and is_render: env.render()
        action = net.forward(state)
        action.clamp(-1, 1)
        action = utils.to_numpy(action.cpu())
        # if is_action_noise: action += self.ounoise.noise()
        # print("1")

        next_state, reward, done, info = env.step(action.flatten())  # Simulate one step in environment
        next_state = utils.to_tensor(next_state).unsqueeze(0)
        if args.is_cuda:
            next_state = next_state.cuda()
        total_reward += reward

        if store_transition:
            add_experience(state, action, next_state, reward, done, replay_memory, args)
            # replay_memory[key] = replay_memory

            # if len(replay_buffer) > args.batch_size:
            #     transitions = replay_buffer.sample(args.batch_size)
            #     batch = replay_memory.Transition(*zip(*transitions))
            #     replay_queue.put(batch)
        state = next_state
Beispiel #2
0
        # print("ddpg time:", (time.time()-time_evolution)/3600)

        return best_train_fitness, test_score, elite_index


if __name__ == "__main__":
    num_processes = 4
    parameters = Parameters()  # Create the Parameters class
    tracker = utils.Tracker(parameters, ['erl'],
                            '_score.csv')  # Initiate tracker
    frame_tracker = utils.Tracker(parameters, ['frame_erl'],
                                  '_score.csv')  # Initiate tracker
    time_tracker = utils.Tracker(parameters, ['time_erl'], '_score.csv')

    #Create Env
    env = utils.NormalizedActions(gym.make(env_tag))
    parameters.action_dim = env.action_space.shape[0]
    parameters.state_dim = env.observation_space.shape[0]

    #Seed
    env.seed(parameters.seed)
    torch.manual_seed(parameters.seed)
    np.random.seed(parameters.seed)
    random.seed(parameters.seed)

    #Create Agent
    agent = Agent(parameters, env)
    print('Running', env_tag, ' State_dim:', parameters.state_dim,
          ' Action_dim:', parameters.action_dim)

    next_save = 100
Beispiel #3
0
if __name__ == "__main__":
    parameters = Parameters(
        parser)  # Inject the cla arguments in the parameters object
    tracker = utils.Tracker(parameters, ['erl'],
                            '_score.csv')  # Initiate tracker
    frame_tracker = utils.Tracker(parameters, ['frame_erl'],
                                  '_score.csv')  # Initiate tracker
    time_tracker = utils.Tracker(parameters, ['time_erl'], '_score.csv')
    ddpg_tracker = utils.Tracker(parameters, ['ddpg'], '_score.csv')
    selection_tracker = utils.Tracker(parameters,
                                      ['elite', 'selected', 'discarded'],
                                      '_selection.csv')

    # Create Env
    env = utils.NormalizedActions(gym.make(parameters.env_name))
    parameters.action_dim = env.action_space.shape[0]
    parameters.state_dim = env.observation_space.shape[0]

    # Write the parameters to a the info file and print them
    parameters.write_params(stdout=True)

    # Seed
    env.seed(parameters.seed)
    torch.manual_seed(parameters.seed)
    np.random.seed(parameters.seed)
    random.seed(parameters.seed)

    # Tests the variation operators after that is saved first with -save_periodic
    if parameters.test_operators:
        operator_runner = OperatorRunner(parameters, env)
Beispiel #4
0
        results.append(total_reward)

    print("Reward:", np.mean(results))


def load_genetic_agent(args):
    actor_path = os.path.join(args.model_path)
    agent = GeneticAgent(args)
    agent.actor.load_state_dict(torch.load(actor_path))

    return agent


if __name__ == "__main__":
    env = utils.NormalizedActions(gym.make(args.env))

    parameters = Parameters(None, init=False)
    parameters.individual_bs = 0
    parameters.action_dim = env.action_space.shape[0]
    parameters.state_dim = env.observation_space.shape[0]
    parameters.use_ln = True
    parameters.device = torch.device('cuda')
    setattr(parameters, 'model_path', args.model_path)

    #Seed
    env.seed(args.seed)
    torch.manual_seed(args.seed)
    np.random.seed(args.seed)
    random.seed(args.seed)