Beispiel #1
0
def episode_encoded(AE, env_name='BreakoutDeterministic-v3', minimum_score=0, onehot=True, video=False):
    env = envs.Atari(env_name)
    action_space = env.action_space.n
    cumulative_reward = 0

    while cumulative_reward <= minimum_score:
        cumulative_reward = 0
        frame_counter = 0

        # Get current state
        state = env.reset()

        # Get encoded features
        preprocessed_state = np.expand_dims(np.asarray(crop_state(state)), axis=0)
        encoded_state = AE.flat_encode(preprocessed_state)

        reward = 0
        done = False

        # Start episode
        ep_output = []
        while not done:
            frame_counter += 1

            # Select an action
            action = random.randrange(0, action_space)
            # Execute the action, get next state and reward
            next_state, reward, done, info = env.step(action)
            cumulative_reward += reward

            # Get encoded features
            preprocessed_next_state = np.expand_dims(crop_state(next_state), axis=0)
            encoded_next_state = AE.flat_encode(preprocessed_next_state)

            # Append sars tuple to datset
            actions_to_append = onehot_encode(action, action_space) if onehot else action
            sars_list = [encoded_state, actions_to_append, reward, encoded_next_state, [1 if done else 0] * 2]
            ep_output.append(flat2list(sars_list, as_tuple=True))

            # Render environment
            if video:
                env.render()

            # Update state
            state = next_state
            encoded_state = encoded_next_state

    return ep_output
Beispiel #2
0
def _eval_with_FE(mdp, policy, AE, metric, selected_states=None, max_ep_len=np.inf, render=False):
    gamma = mdp.gamma if metric == 'discounted' else 1
    ep_performance = 0.0
    df = 1.0  # Discount factor

    # Start episode
    reward = 0
    done = False
    state = mdp.reset()
    # Get encoded features
    preprocessed_state = np.expand_dims(np.asarray(crop_state(state)), axis=0)
    encoded_state = AE.flat_encode(preprocessed_state)
    if selected_states is not None:
        filtered_state = filter_state_with_RFS(encoded_state, selected_states)
    else:
        filtered_state = encoded_state

    frame_counter = 0
    while not done and frame_counter <= max_ep_len:
        frame_counter += 1

        # Select an action
        action = policy.draw_action(filtered_state, done, evaluation=True)
        # Execute the action, get next state and reward
        next_state, reward, done, info = mdp.step(action)
        ep_performance += df * reward  # Update performance
        df *= gamma  # Update discount factor

        # Get encoded features
        preprocessed_next_state = np.expand_dims(crop_state(next_state), axis=0)
        encoded_next_state = AE.flat_encode(preprocessed_next_state)
        if selected_states is not None:
            filtered_next_state = filter_state_with_RFS(encoded_next_state, selected_states)
        else:
            filtered_next_state = encoded_next_state

        # Render environment
        if render:
            mdp.render(mode='human')

        # Update state
        state = next_state
        filtered_state = filtered_next_state

    if metric == 'average':
        ep_performance /= frame_counter

    return ep_performance, frame_counter
Beispiel #3
0
def episode_images(episode_id, logger, env_name='BreakoutDeterministic-v3', video=False):
    env = envs.Atari(env_name)
    action_space = env.action_space.n
    frame_counter = 0

    # Get current state
    state = env.reset()

    # Save image of state
    state_id = '%04d_%d' % (episode_id, frame_counter)
    np.save(logger.path + state_id, crop_state(state))

    reward = 0
    done = False

    # Start episode
    ep_output = []
    while not done:
        frame_counter += 1

        # Select an action
        action = random.randrange(0, action_space)
        # Execute the action, get next state and reward
        next_state, reward, done, info = env.step(action)

        # Save image of state
        next_state_id = '%04d_%d' % (episode_id, frame_counter)
        np.save(logger.path + next_state_id, crop_state(next_state))
        ep_output.append([state_id, action, reward, next_state_id])

        # Render environment
        if video:
            env.render()

        # Update state
        state = next_state
        state_id = next_state_id

    return ep_output