Ejemplo n.º 1
0
    noise = OUNoise(action_dim)
    done = False
    early_stop = False
    train_epoch = 0
    rewards = 0

    while not early_stop:

        # Sample trajectories
        for step in range(NB_STEP):

            action = model.actor.get_action(
                torch.FloatTensor(state)
            ) * ACTION_BOUND  # récupère l'action de l'agent et multiplie par la range de l'action
            action = noise.get_action(
                action.detach().numpy(),
                step)  # ajout de bruit pour l'exploration
            next_state, reward, done, _ = env.step(
                action)  # avance dans l'environment

            # On record les données de ce time_step
            fifo.push(state, action, reward, next_state, done)
            rewards += reward
            state = next_state

            if len(fifo) > BATCH_SIZE:

                states_batch, actions_batch, rewards_batch, next_states_batch, done_batch = fifo.sample(
                    BATCH_SIZE)

                states_batch = torch.FloatTensor(states_batch)
Ejemplo n.º 2
0
max_frames = 12000 * NUM_PROCESSES
max_steps = 500
frame_idx = 0
episode_rewards = []
batch_size = 128

if __name__ == "__main__":
    # 초기 상태로 시작
    while frame_idx < max_frames:
        state = envs.reset()
        ou_noise.reset()
        episode_reward = 0

        for step in range(max_steps):
            action = policy_net.get_action(state)
            action = ou_noise.get_action(action, step)
            next_state, reward, done, _ = envs.step(action)

            replay_buffer.push(state, action, reward, next_state, done)
            if len(replay_buffer) > batch_size:
                ddpg.update(batch_size, replay_buffer)

            state = next_state
            episode_reward += reward
            frame_idx += NUM_PROCESSES

            if frame_idx % (NUM_PROCESSES * 100) == 0:
                # plot(frame_idx, rewards)
                # rewards_tmp = np.array(rewards)
                if episode_rewards:
                    print("finished frames {}, {:.1f}".format(