コード例 #1
0
def episode_encoded(AE, env_name='BreakoutDeterministic-v3', minimum_score=0, onehot=True, video=False):
    env = envs.Atari(env_name)
    action_space = env.action_space.n
    cumulative_reward = 0

    while cumulative_reward <= minimum_score:
        cumulative_reward = 0
        frame_counter = 0

        # Get current state
        state = env.reset()

        # Get encoded features
        preprocessed_state = np.expand_dims(np.asarray(crop_state(state)), axis=0)
        encoded_state = AE.flat_encode(preprocessed_state)

        reward = 0
        done = False

        # Start episode
        ep_output = []
        while not done:
            frame_counter += 1

            # Select an action
            action = random.randrange(0, action_space)
            # Execute the action, get next state and reward
            next_state, reward, done, info = env.step(action)
            cumulative_reward += reward

            # Get encoded features
            preprocessed_next_state = np.expand_dims(crop_state(next_state), axis=0)
            encoded_next_state = AE.flat_encode(preprocessed_next_state)

            # Append sars tuple to datset
            actions_to_append = onehot_encode(action, action_space) if onehot else action
            sars_list = [encoded_state, actions_to_append, reward, encoded_next_state, [1 if done else 0] * 2]
            ep_output.append(flat2list(sars_list, as_tuple=True))

            # Render environment
            if video:
                env.render()

            # Update state
            state = next_state
            encoded_state = encoded_next_state

    return ep_output
コード例 #2
0
def collect_images_dataset(logger, episodes=100, env_name='BreakoutDeterministic-v3', header=None, video=False, n_jobs=-1):
    # Parameters for the episode function
    ep_params = {
        'env_name': env_name,
        'video': video
    }

    # Collect episodes in parallel
    dataset = Parallel(n_jobs=n_jobs)(delayed(episode_images)(eid, logger, **ep_params) for eid in tqdm(xrange(episodes)))
    dataset = np.asarray(flat2list(dataset)) # Each episode is in a list, so the dataset needs to be flattened

    # Return dataset
    if header is not None:
        return np.append([header], dataset, axis=0)
    else:
        return dataset