예제 #1
0
def collect_episode(bc, env):
    """
    :param bc: BehaviorCloning
    :param env: OpenAI gym environment
    """
    state = env.reset()
    episode = Episode(env.discount)
    done = False

    while not done:
        action, action_prob = bc.single_action(state)
        next_state, reward, done, _ = env.step(action)
        transition = Transition(state, action, action_prob, reward, next_state,
                                done)
        state = next_state
        episode.insert(transition)

    return episode
def collect_episode(dqn, env, eps):
    """
    :param dqn: DQN
    :param env: OpenAI gym environment
    :param eps: rate of epsilon greedy exploration
    """
    state = env.reset()
    episode = Episode(env.discount)
    done = False

    while not done:
        action, action_prob = dqn.single_action(state, eps)
        next_state, reward, done, _ = env.step(action)
        transition = Transition(state, action, action_prob, reward, next_state,
                                done)
        state = next_state
        episode.insert(transition)

    return episode