def episode_encoded(AE, env_name='BreakoutDeterministic-v3', minimum_score=0, onehot=True, video=False): env = envs.Atari(env_name) action_space = env.action_space.n cumulative_reward = 0 while cumulative_reward <= minimum_score: cumulative_reward = 0 frame_counter = 0 # Get current state state = env.reset() # Get encoded features preprocessed_state = np.expand_dims(np.asarray(crop_state(state)), axis=0) encoded_state = AE.flat_encode(preprocessed_state) reward = 0 done = False # Start episode ep_output = [] while not done: frame_counter += 1 # Select an action action = random.randrange(0, action_space) # Execute the action, get next state and reward next_state, reward, done, info = env.step(action) cumulative_reward += reward # Get encoded features preprocessed_next_state = np.expand_dims(crop_state(next_state), axis=0) encoded_next_state = AE.flat_encode(preprocessed_next_state) # Append sars tuple to datset actions_to_append = onehot_encode(action, action_space) if onehot else action sars_list = [encoded_state, actions_to_append, reward, encoded_next_state, [1 if done else 0] * 2] ep_output.append(flat2list(sars_list, as_tuple=True)) # Render environment if video: env.render() # Update state state = next_state encoded_state = encoded_next_state return ep_output
def collect_images_dataset(logger, episodes=100, env_name='BreakoutDeterministic-v3', header=None, video=False, n_jobs=-1): # Parameters for the episode function ep_params = { 'env_name': env_name, 'video': video } # Collect episodes in parallel dataset = Parallel(n_jobs=n_jobs)(delayed(episode_images)(eid, logger, **ep_params) for eid in tqdm(xrange(episodes))) dataset = np.asarray(flat2list(dataset)) # Each episode is in a list, so the dataset needs to be flattened # Return dataset if header is not None: return np.append([header], dataset, axis=0) else: return dataset