def rollout(self, env, agent, directory, record_video=False, timeout=8, num_episode=10, record_trajectory=False): """Rollout and save. Args: env: the RL environment agent: the RL agent directory: directory where the output of the rollout is saved record_video: record the video timeout: timeout step if the agent is stuck num_episode: number of rollout episode record_trajectory: record the ground truth trajectory Returns: percentage of success during this rollout """ print('#######################################') print('Rolling out...') print('#######################################') all_frames = [] ep_observation, ep_action, ep_agn = [], [], [] black_frame = pad_image(env.render(mode='rgb_array')) * 0.0 goal_sampled = 0 timeout_count, success = 0, 0 for ep in range(num_episode): s = self.reset(env, agent, self.cfg.diverse_scene_content) all_frames += [black_frame] * 10 g_text, p = env.sample_goal() if env.all_goals_satisfied: s = self.reset(env, agent, True) g, p = env.sample_goal() goal_sampled += 1 g = np.squeeze(self.encode_fn(g_text)) current_goal_repetition = 0 for t in range(self.cfg.max_episode_length): prob = self.epsilon if record_trajectory else 0.0 action = agent.step(s, g, env, explore_prob=prob) s_tp1, r, _, _ = env.step( action, record_achieved_goal=True, goal=p, atomic_goal=self.cfg.record_atomic_instruction) ag = env.get_achieved_goals() s = s_tp1 all_frames.append( add_text(pad_image(env.render(mode='rgb_array')), g_text)) current_goal_repetition += 1 if record_trajectory: ep_observation.append(env.get_direct_obs().tolist()) ep_action.append(action) ep_agn.append(len(ag)) sample_new_goal = False if r > env.shape_val: for _ in range(5): all_frames.append( add_text( pad_image(env.render(mode='rgb_array')), g_text, color='green')) success += 1 sample_new_goal = True if current_goal_repetition >= timeout: all_frames.append( add_text(pad_image(env.render(mode='rgb_array')), 'time out :(')) timeout_count += 1 sample_new_goal = True if sample_new_goal: g, p = env.sample_goal() if env.all_goals_satisfied: break g_text = g g = np.squeeze(self.encode_fn(g)) current_goal_repetition = 0 goal_sampled += 1 print('Rollout finished') print('{} instrutctions tried given'.format(goal_sampled)) print('{} instructions timed out'.format(timeout_count)) if record_video: save_video(np.uint8(all_frames), directory, fps=5) print('Video saved...') if record_trajectory: print('Recording trajectory...') datum = { 'obs': ep_observation, 'action': ep_action, 'achieved goal': ep_agn, } save_json(datum, directory[:-4] + '_trajectory.json') return success / float(num_episode)
def rollout(self, env, agent, directory, record_video=False, timeout=8, num_episode=10, record_trajectory=False): """Rollout and save. Args: env: the RL environment agent: the RL agent directory: directory where the output of the rollout is saved record_video: record the video timeout: timeout step if the agent is stuck num_episode: number of rollout episode record_trajectory: record the ground truth trajectory Returns: percentage of success during this rollout """ print('\n#######################################') print('Rolling out...') print('#######################################') # randomly change subset of embedding if self._use_synonym_for_rollout and self.cfg.embedding_type == 'random': original_embedding = agent.randomize_partial_word_embedding(10) all_frames = [] ep_observation, ep_action, ep_agn = [], [], [] black_frame = pad_image(env.render(mode='rgb_array')) * 0.0 goal_sampled = 0 timeout_count, success = 0, 0 for ep in range(num_episode): s = env.reset(self.cfg.diverse_scene_content) all_frames += [black_frame] * 10 g_text, p = env.sample_goal() if env.all_goals_satisfied: s = env.reset(True) g, p = env.sample_goal() goal_sampled += 1 g = self.encode_fn(g_text) g = np.squeeze(pad_to_max_length([g], self.cfg.max_sequence_length)[0]) if self._use_synonym_for_rollout and self.cfg.embedding_type != 'random': # use unseen lexicons for test g = paraphrase_sentence( self.decode_fn(g), synonym_tables=_SYNONYM_TABLES) current_goal_repetition = 0 for t in range(self.cfg.max_episode_length): prob = self.epsilon if record_trajectory else 0.0 action = agent.step(s, g, env, explore_prob=prob) s_tp1, r, _, _ = env.step( action, record_achieved_goal=False, goal=p, atomic_goal=self.cfg.record_atomic_instruction) s = s_tp1 all_frames.append( add_text(pad_image(env.render(mode='rgb_array')), g_text)) current_goal_repetition += 1 if record_trajectory: ep_observation.append(env.get_direct_obs().tolist()) ep_action.append(action) sample_new_goal = False if r > env.shape_val: img = pad_image(env.render(mode='rgb_array')) for _ in range(5): all_frames.append(add_text(img, g_text, color='green')) success += 1 sample_new_goal = True if current_goal_repetition >= timeout: all_frames.append( add_text(pad_image(env.render(mode='rgb_array')), 'time out :(')) timeout_count += 1 sample_new_goal = True if sample_new_goal: g, p = env.sample_goal() if env.all_goals_satisfied: break g_text = g g = self.encode_fn(g_text) g = np.squeeze( pad_to_max_length([g], self.cfg.max_sequence_length)[0]) if self._use_synonym_for_rollout and self.cfg.embedding_type != 'random': g = paraphrase_sentence( self.decode_fn(g), synonym_tables=_SYNONYM_TABLES) current_goal_repetition = 0 goal_sampled += 1 # restore the original embedding if self._use_synonym_for_rollout and self.cfg.embedding_type == 'random': agent.set_embedding(original_embedding) print('Rollout finished') print('{} instrutctions tried given'.format(goal_sampled)) print('{} instructions timed out'.format(timeout_count)) print('{} success rate\n'.format(1 - float(timeout_count) / goal_sampled)) if record_video: save_video(np.uint8(all_frames), directory, fps=5) print('Video saved...') if record_trajectory: print('Recording trajectory...') datum = { 'obs': ep_observation, 'action': ep_action, 'achieved goal': ep_agn, } save_json(datum, directory[:-4] + '_trajectory.json') return 1 - float(timeout_count) / goal_sampled