class BasicEpisode(Episode): """ Episode for Navigation. """ def __init__(self, args, gpu_id, strict_done=False): super(BasicEpisode, self).__init__() self._env = None self.gpu_id = gpu_id self.strict_done = strict_done self.task_data = None self.glove_embedding = None self.actions = get_actions(args) self.done_count = 0 self.duplicate_count = 0 self.failed_action_count = 0 self._last_action_embedding_idx = 0 self.target_object = None self.prev_frame = None self.current_frame = None self.current_objs = None self.scene_states = [] self.partial_reward = args.partial_reward self.seen_list = [] if args.eval: random.seed(args.seed) self.room = None @property def environment(self): return self._env @property def actions_list(self): return [{"action": a} for a in self.actions] def reset(self): self.done_count = 0 self.duplicate_count = 0 self._env.back_to_start() def state_for_agent(self): return self.environment.current_frame def objstate_for_agent(self): return self.environment.current_objs def current_agent_position(self): """ Get the current position of the agent in the scene. """ return self.environment.current_agent_position def step(self, action_as_int): action = self.actions_list[action_as_int] if args.vis: print(action) if action["action"] != DONE: self.environment.step(action) reward, terminal, action_was_successful, next_done = self.judge(action) return reward, terminal, action_was_successful, next_done def judge(self, action): """ Judge the last event. """ reward = STEP_PENALTY # Thresholding replaced with simple look up for efficiency. if self.environment.controller.state in self.scene_states: if action["action"] != DONE: if self.environment.last_action_success: self.duplicate_count += 1 else: self.failed_action_count += 1 # added partial reward if self.partial_reward: reward = self.get_partial_reward() else: self.scene_states.append(self.environment.controller.state) done = False next_done = False if action["action"] == DONE: self.done_count = 1 action_was_successful = False for id_ in self.task_data: if self.environment.object_is_visible(id_): reward = GOAL_SUCCESS_REWARD done = True action_was_successful = True if self.partial_reward: self.seen_list = [] reward += self.get_partial_reward() break self.seen_list = [] else: action_was_successful = self.environment.last_action_success for id_ in self.task_data: if self.environment.object_is_visible(id_): next_done = True break if done: self.seen_list = [] self.done_count = 1 return reward, done, action_was_successful, next_done # Set the target index. @property def target_object_index(self): """ Return the index which corresponds to the target object. """ return self._target_object_index @target_object_index.setter def target_object_index(self, target_object_index): """ Set the target object by specifying the index. """ self._target_object_index = gpuify( torch.LongTensor([target_object_index]), self.gpu_id) def get_partial_reward(self): reward = STEP_PENALTY reward_dict = {} if self.target_parents is not None: for parent_type in self.target_parents: parent_ids = self.environment.find_id(parent_type) for parent_id in parent_ids: if self.environment.object_is_visible( parent_id) and parent_id not in self.seen_list: reward_dict[parent_id] = self.target_parents[ parent_type] if len(reward_dict) != 0: v = list(reward_dict.values()) k = list(reward_dict.keys()) reward = max(v) self.seen_list.append(k[v.index(reward)]) return reward def _new_episode(self, args, scenes, possible_targets, targets=None, room=None, keep_obj=False, glove=None): """ New navigation episode. """ scene = random.choice(scenes) self.room = room if self._env is None: self._env = Environment( offline_data_dir=args.offline_data_dir, use_offline_controller=True, grid_size=0.25, images_file_name=args.images_file_name, local_executable_path=args.local_executable_path, ) self._env.start(scene) else: self._env.reset(scene) # Randomize the start location. start_state = self._env.randomize_agent_location() objects = self._env.all_objects() visible_objects = [obj.split("|")[0] for obj in objects] intersection = [obj for obj in visible_objects if obj in targets] self.task_data = [] idx = random.randint(0, len(intersection) - 1) goal_object_type = intersection[idx] self.target_object = goal_object_type for id_ in objects: type_ = id_.split("|")[0] if goal_object_type == type_: self.task_data.append(id_) child_object = self.task_data[0].split("|")[0] #print('room is ', self.room) try: self.target_parents = c2p_prob[self.room][child_object] except: self.target_parents = None if args.verbose: print("Scene", scene, "Navigating towards:", goal_object_type) self.glove_embedding = None self.glove_embedding = toFloatTensor( glove.glove_embeddings[goal_object_type][:], self.gpu_id) def new_episode( self, args, scenes, possible_targets=None, targets=None, rooms=None, keep_obj=False, glove=None, ): self.done_count = 0 self.duplicate_count = 0 self.failed_action_count = 0 self.prev_frame = None self.current_frame = None self.current_objs = None self._new_episode(args, scenes, possible_targets, targets, rooms, keep_obj, glove)
class BasicEpisode(Episode): """ Episode for Navigation. """ def __init__(self, args, gpu_id, strict_done=False): super(BasicEpisode, self).__init__() self._env = None self.gpu_id = gpu_id self.strict_done = strict_done self.task_data = None self.glove_embedding = None self.actions = get_actions(args) self.done_count = 0 self.duplicate_count = 0 self.failed_action_count = 0 self._last_action_embedding_idx = 0 self.target_object = None self.prev_frame = None self.current_frame = None self.scene = None self.scene_states = [] if args.eval: random.seed(args.seed) self._episode_times = 0 self.seen_percentage = 0 self.state_reps = [] self.state_memory = [] self.action_memory = [] self.obs_reps = [] self.episode_length = 0 self.target_object_detected = False # tools self.states = [] self.actions_record = [] self.action_outputs = [] self.detection_results = [] # imitation learning self.imitation_learning = args.imitation_learning self.action_failed_il = False self.action_probs = [] self.meta_learning = args.update_meta_network self.meta_predictions = [] self.visual_infos = {} self.match_score = [] self.indices_topk = [] @property def environment(self): return self._env @property def actions_list(self): return [{"action": a} for a in self.actions] @property def episode_times(self): return self._episode_times @episode_times.setter def episode_times(self, times): self._episode_times = times def reset(self): self.done_count = 0 self.duplicate_count = 0 self._env.back_to_start() def state_for_agent(self): return self.environment.current_frame def current_detection_feature(self): return self.environment.current_detection_feature def current_depth(self): return self.environment.current_depth def current_agent_position(self): """ Get the current position of the agent in the scene. """ return self.environment.current_agent_position def step(self, action_as_int): action = self.actions_list[action_as_int] if action["action"] != DONE: self.environment.step(action) else: self.done_count += 1 reward, terminal, action_was_successful = self.judge(action) return reward, terminal, action_was_successful def judge(self, action): """ Judge the last event. """ reward = STEP_PENALTY # Thresholding replaced with simple look up for efficiency. if self.environment.controller.state in self.scene_states: if action["action"] != DONE: if self.environment.last_action_success: self.duplicate_count += 1 else: self.failed_action_count += 1 else: self.scene_states.append(self.environment.controller.state) done = False if action["action"] == DONE: action_was_successful = False for id_ in self.task_data: if self.environment.object_is_visible(id_): reward = GOAL_SUCCESS_REWARD done = True action_was_successful = True break else: action_was_successful = self.environment.last_action_success return reward, done, action_was_successful # Set the target index. @property def target_object_index(self): """ Return the index which corresponds to the target object. """ return self._target_object_index @target_object_index.setter def target_object_index(self, target_object_index): """ Set the target object by specifying the index. """ self._target_object_index = gpuify( torch.LongTensor([target_object_index]), self.gpu_id) def _new_episode(self, args, scenes, targets): """ New navigation episode. """ scene = random.choice(scenes) self.scene = scene if self._env is None: self._env = Environment( offline_data_dir=args.data_dir, use_offline_controller=True, grid_size=0.25, detection_feature_file_name=args.detection_feature_file_name, images_file_name=args.images_file_name, visible_object_map_file_name=args.visible_map_file_name, local_executable_path=args.local_executable_path, optimal_action_file_name=args.optimal_action_file_name, ) self._env.start(scene) else: self._env.reset(scene) # Randomize the start location. self._env.randomize_agent_location() self.task_data = [] objects = self._env.all_objects() visible_objects = [obj.split("|")[0] for obj in objects] intersection = [obj for obj in visible_objects if obj in targets] idx = random.randint(0, len(intersection) - 1) goal_object_type = intersection[idx] self.target_object = goal_object_type for id_ in objects: type_ = id_.split("|")[0] if goal_object_type == type_: self.task_data.append(id_) if args.verbose: print("Scene", scene, "Navigating towards:", goal_object_type) def new_episode(self, args, scenes, targets): self.done_count = 0 self.duplicate_count = 0 self.failed_action_count = 0 self.episode_length = 0 self.prev_frame = None self.current_frame = None self.scene_states = [] self.state_reps = [] self.state_memory = [] self.action_memory = [] self.target_object_detected = False self.episode_times += 1 self.states = [] self.actions_record = [] self.action_outputs = [] self.detection_results = [] self.obs_reps = [] self.action_failed_il = False self.action_probs = [] self.meta_predictions = [] self.visual_infos = {} self.match_score = [] self.indices_topk = [] self._new_episode(args, scenes, targets)
class BasicEpisode(Episode): """ Episode for Navigation. """ def __init__(self, args, gpu_id, strict_done=False): super(BasicEpisode, self).__init__() self._env = None self.gpu_id = gpu_id self.strict_done = strict_done self.task_data = None self.glove_embedding = None self.actions = get_actions(args) self.done_count = 0 self.duplicate_count = 0 self.failed_action_count = 0 self._last_action_embedding_idx = 0 self.target_object = None self.prev_frame = None self.current_frame = None self.det_frame = None self.last_det = False self.current_det = False self.det_gt = None self.optimal_actions = None self.scene_states = [] self.detections = [] if args.eval: random.seed(args.seed) @property def environment(self): return self._env @property def actions_list(self): return [{"action": a} for a in self.actions] def reset(self): self.done_count = 0 self.duplicate_count = 0 self._env.back_to_start() def state_for_agent(self): return self.environment.current_frame def current_agent_position(self): """ Get the current position of the agent in the scene. """ return self.environment.current_agent_position def step(self, action_as_int, arrive): self.last_det = self.current_det action = self.actions_list[action_as_int] if action["action"] != DONE: self.environment.step(action) else: self.done_count += 1 reward, terminal, action_was_successful, arrive = self.judge( action, arrive) return reward, terminal, action_was_successful, arrive def judge(self, action, arrive): """ Judge the last event. """ reward = STEP_PENALTY # Thresholding replaced with simple look up for efficiency. if self.environment.controller.state in self.scene_states: if action["action"] != DONE: if self.environment.last_action_success: self.duplicate_count += 1 else: self.failed_action_count += 1 else: self.scene_states.append(self.environment.controller.state) done = False if action["action"] == DONE: action_was_successful = False for id_ in self.task_data: if self.environment.object_is_visible(id_): reward = GOAL_SUCCESS_REWARD done = True action_was_successful = True break else: # test for 100% accuracy of target detection for id_ in self.task_data: if self.environment.object_is_visible(id_): arrive = True reward = GOAL_SUCCESS_REWARD done = True action_was_successful = True break action_was_successful = self.environment.last_action_success return reward, done, action_was_successful, arrive # Set the target index. @property def target_object_index(self): """ Return the index which corresponds to the target object. """ return self._target_object_index @target_object_index.setter def target_object_index(self, target_object_index): """ Set the target object by specifying the index. """ self._target_object_index = gpuify( torch.LongTensor([target_object_index]), self.gpu_id) def _new_episode( self, args, scenes, possible_targets, targets=None, keep_obj=False, optimal_act=None, glove=None, det_gt=None, ): """ New navigation episode. """ scene = random.choice(scenes) img_file_scene = args.images_file_name if self._env is None: self._env = Environment( offline_data_dir=args.offline_data_dir, use_offline_controller=True, grid_size=0.25, # images_file_name=args.images_file_name, images_file_name=img_file_scene, local_executable_path=args.local_executable_path, total_images_file=None) self._env.start(scene) else: self._env.reset(scene) # Randomize the start location. self._env.randomize_agent_location() objects = self._env.all_objects() visible_objects = [obj.split("|")[0] for obj in objects] intersection = [obj for obj in visible_objects if obj in targets] self.task_data = [] idx = random.randint(0, len(intersection) - 1) goal_object_type = intersection[idx] self.target_object = goal_object_type for id_ in objects: type_ = id_.split("|")[0] if goal_object_type == type_: self.task_data.append(id_) if args.verbose: print("Scene", scene, "Navigating towards:", goal_object_type) # glove = Glove(os.path.join(args.glove_dir, self.environment.controller.scene_name, 'det_feature.hdf5')) glove = glove[self.environment.controller.scene_name] if optimal_act is not None: self.optimal_actions = optimal_act[ self.environment.controller.scene_name][self.task_data[0]] else: self.optimal_actions = None self.glove_embedding = None init_pos = '{}|{}|{}|{}'.format( # self.environment.controller.scene_name, self.environment.controller.state.position()['x'], self.environment.controller.state.position()['z'], self.environment.controller.state.rotation, self.environment.controller.state.horizon) target_embedding_array = np.zeros((len(CLASSES), 1)) target_embedding_array[CLASSES.index(self.target_object)] = 1 # glove_embedding_tensor = np.concatenate((glove.glove_embeddings[init_pos][()], target_embedding_array), axis=1) glove_embedding_tensor = np.concatenate( (glove[init_pos], target_embedding_array), axis=1) self.glove_embedding = toFloatTensor(glove_embedding_tensor, self.gpu_id) # self.glove_reader = glove.glove_embeddings self.glove_reader = glove # self.det_gt = det_gt[self.environment.controller.scene_name] # self.glove_embedding = toFloatTensor( # glove.glove_embeddings[goal_object_type][:], self.gpu_id # ) def new_episode( self, args, scenes, possible_targets=None, targets=None, keep_obj=False, optimal_act=None, glove=None, # det_gt=None ): self.done_count = 0 self.duplicate_count = 0 self.failed_action_count = 0 self.prev_frame = None self.current_frame = None # self.last_det = False # self.current_det = False self.det_frame = None self.detections = [] self._new_episode(args, scenes, possible_targets, targets, keep_obj, optimal_act=optimal_act, glove=glove)
class BasicEpisode(Episode): """ Episode for Navigation. """ def __init__(self, args, gpu_id, strict_done=False): super(BasicEpisode, self).__init__() self._env = None self.gpu_id = gpu_id self.strict_done = strict_done self.task_data = None self.glove_embedding = None self.prototype = None self.actions = get_actions(args) self.done_count = 0 self.duplicate_count = 0 self.failed_action_count = 0 self._last_action_embedding_idx = 0 self.target_object = None self.prev_frame = None self.current_frame = None self.grid_size = args.grid_size self.goal_success_reward = args.goal_success_reward self.step_penalty = args.step_penalty self.step_penalty_table = [] self.episode_id = "" step_penalty = args.step_penalty for _ in range(0, args.max_ep, args.num_ep_per_stage): self.step_penalty_table.append(step_penalty) step_penalty = step_penalty * args.penalty_decay self.scene_states = [] self.episode_trajectories = [] self.actions_taken = [] if args.eval: random.seed(args.seed) @property def environment(self): return self._env @property def actions_list(self): return [{"action": a} for a in self.actions] def reset(self): self.done_count = 0 self.duplicate_count = 0 self._env.back_to_start() def state_for_agent(self): return self.environment.current_frame def current_agent_position(self): """ Get the current position of the agent in the scene. """ return self.environment.current_agent_position def step(self, action_as_int): action = self.actions_list[action_as_int] if action["action"] != DONE: self.environment.step(action) else: self.done_count += 1 reward, terminal, action_was_successful = self.judge(action) return reward, terminal, action_was_successful def judge(self, action): """ Judge the last event. """ reward = self.step_penalty # Thresholding replaced with simple look up for efficiency. if self.environment.controller.state in self.scene_states: if action["action"] != DONE: if self.environment.last_action_success: self.duplicate_count += 1 else: self.failed_action_count += 1 else: self.scene_states.append(self.environment.controller.state) self.episode_trajectories.append(self.environment.controller.state) done = False if action["action"] == DONE: action_was_successful = False for id_ in self.task_data: if self.environment.object_is_visible(id_): reward = self.goal_success_reward done = True action_was_successful = True break else: action_was_successful = self.environment.last_action_success return reward, done, action_was_successful # Set the target index. @property def target_object_index(self): """ Return the index which corresponds to the target object. """ return self._target_object_index @target_object_index.setter def target_object_index(self, target_object_index): """ Set the target object by specifying the index. """ self._target_object_index = gpuify( torch.LongTensor([target_object_index]), self.gpu_id) def _new_random_episode(self, args, scenes, possible_targets, targets=None, keep_obj=False, glove=None, protos=None, pre_metadata=None): """ New navigation episode. """ #random episode scene = None retry = 0 while scene not in os.listdir(args.offline_data_dir): scene = random.choice(scenes) retry += 1 if retry >= 1000: raise Exception("No scenes found in {}".format( args.offline_data_dir)) if self._env is None: self._env = Environment( offline_data_dir=args.offline_data_dir, use_offline_controller=True, grid_size=self.grid_size, images_file_name=args.images_file_name, local_executable_path=args.local_executable_path, rotate_by=args.rotate_by, state_decimal=args.state_decimal, pinned_scene=args.pinned_scene, pre_metadata=pre_metadata, actions=self.actions) self._env.start(scene) else: self._env.reset(scene) # Randomize the start location. self._env.randomize_agent_location() objects = self._env.all_objects() visible_objects = [obj.split("|")[0] for obj in objects] intersection = [obj for obj in visible_objects if obj in targets] self.task_data = [] idx = random.randint(0, len(intersection) - 1) object_type = intersection[idx] self.target_object = object_type for id_ in objects: type_ = id_.split("|")[0] if object_type == type_: self.task_data.append(id_) if args.verbose: print("Scene", scene, "Navigating towards:", object_type) self.episode_trajectories = [] self.actions_taken = [] if args.glove_file != "": self.glove_embedding = toFloatTensor( glove.glove_embeddings[object_type][:], self.gpu_id) if args.proto_file != "": self.prototype = toFloatTensor( protos.protos[object_type.lower()][:], self.gpu_id) return scene # curriculum_meta: episodes indexed by scene, difficulty, object_type in order def _new_curriculum_episode(self, args, scenes, possible_targets, targets=None, keep_obj=False, glove=None, protos=None, pre_metadata=None, curriculum_meta=None, total_ep=0): """ New navigation episode. """ # choose a scene scene = None retry = 0 flag_episode_valid = False while not flag_episode_valid: # choose a scene valid_scenes = os.listdir(args.offline_data_dir) intersection_scenes = [ scene for scene in scenes if scene in valid_scenes ] scene = random.choice(intersection_scenes) # TODO: choose difficulty try: diff = round(total_ep // args.num_ep_per_stage) + 1 diff_idx = random.choice(range(diff)) # if total_ep < args.difficulty_upgrade_step: # diff = DIFFICULTY[0] # elif total_ep < 2 * args.difficulty_upgrade_step: # diff = random.choice(DIFFICULTY[:2]) # else: # diff = random.choice(DIFFICULTY[:3]) # choose object # visible_objects = curriculum_meta[scene][diff].keys() # intersection_objs = [obj for obj in visible_objects if obj in targets] # object_type = random.choice(intersection_objs) episode = random.choice(curriculum_meta[scene][diff_idx]) object_type = episode['object_type'].replace(" ", "") if object_type not in targets: continue # to plot trajectory by xiaodong # state_pattern: x, z, rotation_degree, horizon_degree state_pattern = "{:0." + str( args.state_decimal) + "f}|{:0." + str( args.state_decimal) + "f}|{:d}|{:d}" self.init_pos_str = state_pattern.format( episode['initial_position']['x'], episode['initial_position']['z'], episode['initial_orientation'], 0) self.target_pos_str = state_pattern.format( episode['target_position']['x'], episode['target_position']['z'], 0, 0) self.object_type = object_type except: continue # TODO: Present validity checking method breaks the principle of tiered-design and decoupling # TODO: Find a better way to check the validity of an episode by junting, 2020-04-10 state = ThorAgentState(**episode['initial_position'], rotation=episode['initial_orientation'], horizon=0, state_decimal=args.state_decimal) if str(state) in pre_metadata[scene]['all_states']: flag_episode_valid = True else: print( "Episode ID {} not valid for its initial state missing from all_states" .format(episode['id'])) if self._env is None: self._env = Environment( offline_data_dir=args.offline_data_dir, use_offline_controller=True, grid_size=self.grid_size, images_file_name=args.images_file_name, local_executable_path=args.local_executable_path, rotate_by=args.rotate_by, state_decimal=args.state_decimal, pinned_scene=args.pinned_scene, pre_metadata=pre_metadata, actions=self.actions) self._env.start(scene) else: self._env.reset(scene) # initialize the start location. self._env.initialize_agent_location( **episode['initial_position'], rotation=episode['initial_orientation'], horizon=0) self.task_data = [] self.target_object = object_type self.task_data.append(episode['object_id']) self.episode_id = episode['id'] self.episode_trajectories = [] self.actions_taken = [] if args.verbose: print("Episode: Scene ", scene, " Difficulty ", diff, " Navigating towards: ", object_type) if args.glove_file != "": self.glove_embedding = toFloatTensor( glove.glove_embeddings[object_type][:], self.gpu_id) if args.proto_file != "": self.prototype = toFloatTensor( protos.protos[object_type.lower()][:], self.gpu_id) return scene def new_episode( self, args, scenes, possible_targets=None, targets=None, keep_obj=False, glove=None, protos=None, pre_metadata=None, curriculum_meta=None, total_ep=0, ): self.done_count = 0 self.duplicate_count = 0 self.failed_action_count = 0 self.prev_frame = None self.current_frame = None if args.curriculum_learning: diff = round(total_ep // args.num_ep_per_stage) + 1 self.step_penalty = self.step_penalty_table[diff - 1] return self._new_curriculum_episode(args, scenes, possible_targets, targets, keep_obj, glove, protos, pre_metadata, curriculum_meta, total_ep) # set penalty decay return self._new_random_episode(args, scenes, possible_targets, targets, keep_obj, glove, protos, pre_metadata)