コード例 #1
0
class BasicEpisode(Episode):
    """ Episode for Navigation. """
    def __init__(self, args, gpu_id, strict_done=False):
        super(BasicEpisode, self).__init__()

        self._env = None
        self.gpu_id = gpu_id
        self.strict_done = strict_done
        self.task_data = None
        self.glove_embedding = None
        self.actions = get_actions(args)
        self.done_count = 0
        self.duplicate_count = 0
        self.failed_action_count = 0
        self._last_action_embedding_idx = 0
        self.target_object = None
        self.prev_frame = None
        self.current_frame = None
        self.current_objs = None

        self.scene_states = []
        self.partial_reward = args.partial_reward
        self.seen_list = []
        if args.eval:
            random.seed(args.seed)
        self.room = None

    @property
    def environment(self):
        return self._env

    @property
    def actions_list(self):
        return [{"action": a} for a in self.actions]

    def reset(self):
        self.done_count = 0
        self.duplicate_count = 0
        self._env.back_to_start()

    def state_for_agent(self):
        return self.environment.current_frame

    def objstate_for_agent(self):
        return self.environment.current_objs

    def current_agent_position(self):
        """ Get the current position of the agent in the scene. """
        return self.environment.current_agent_position

    def step(self, action_as_int):

        action = self.actions_list[action_as_int]

        if args.vis:
            print(action)

        if action["action"] != DONE:
            self.environment.step(action)

        reward, terminal, action_was_successful, next_done = self.judge(action)

        return reward, terminal, action_was_successful, next_done

    def judge(self, action):
        """ Judge the last event. """
        reward = STEP_PENALTY

        # Thresholding replaced with simple look up for efficiency.
        if self.environment.controller.state in self.scene_states:
            if action["action"] != DONE:
                if self.environment.last_action_success:
                    self.duplicate_count += 1
                else:
                    self.failed_action_count += 1

                # added partial reward
                if self.partial_reward:
                    reward = self.get_partial_reward()
        else:
            self.scene_states.append(self.environment.controller.state)

        done = False
        next_done = False

        if action["action"] == DONE:
            self.done_count = 1
            action_was_successful = False
            for id_ in self.task_data:
                if self.environment.object_is_visible(id_):
                    reward = GOAL_SUCCESS_REWARD
                    done = True
                    action_was_successful = True
                    if self.partial_reward:
                        self.seen_list = []
                        reward += self.get_partial_reward()
                    break
            self.seen_list = []
        else:
            action_was_successful = self.environment.last_action_success
            for id_ in self.task_data:
                if self.environment.object_is_visible(id_):
                    next_done = True
                    break
        if done:
            self.seen_list = []
            self.done_count = 1
        return reward, done, action_was_successful, next_done

    # Set the target index.
    @property
    def target_object_index(self):
        """ Return the index which corresponds to the target object. """
        return self._target_object_index

    @target_object_index.setter
    def target_object_index(self, target_object_index):
        """ Set the target object by specifying the index. """
        self._target_object_index = gpuify(
            torch.LongTensor([target_object_index]), self.gpu_id)

    def get_partial_reward(self):
        reward = STEP_PENALTY
        reward_dict = {}
        if self.target_parents is not None:
            for parent_type in self.target_parents:
                parent_ids = self.environment.find_id(parent_type)
                for parent_id in parent_ids:
                    if self.environment.object_is_visible(
                            parent_id) and parent_id not in self.seen_list:
                        reward_dict[parent_id] = self.target_parents[
                            parent_type]
        if len(reward_dict) != 0:
            v = list(reward_dict.values())
            k = list(reward_dict.keys())
            reward = max(v)
            self.seen_list.append(k[v.index(reward)])
        return reward

    def _new_episode(self,
                     args,
                     scenes,
                     possible_targets,
                     targets=None,
                     room=None,
                     keep_obj=False,
                     glove=None):
        """ New navigation episode. """
        scene = random.choice(scenes)
        self.room = room

        if self._env is None:
            self._env = Environment(
                offline_data_dir=args.offline_data_dir,
                use_offline_controller=True,
                grid_size=0.25,
                images_file_name=args.images_file_name,
                local_executable_path=args.local_executable_path,
            )
            self._env.start(scene)
        else:
            self._env.reset(scene)

        # Randomize the start location.
        start_state = self._env.randomize_agent_location()
        objects = self._env.all_objects()

        visible_objects = [obj.split("|")[0] for obj in objects]
        intersection = [obj for obj in visible_objects if obj in targets]

        self.task_data = []

        idx = random.randint(0, len(intersection) - 1)
        goal_object_type = intersection[idx]
        self.target_object = goal_object_type

        for id_ in objects:
            type_ = id_.split("|")[0]
            if goal_object_type == type_:
                self.task_data.append(id_)

        child_object = self.task_data[0].split("|")[0]
        #print('room is ', self.room)
        try:
            self.target_parents = c2p_prob[self.room][child_object]
        except:
            self.target_parents = None

        if args.verbose:
            print("Scene", scene, "Navigating towards:", goal_object_type)

        self.glove_embedding = None
        self.glove_embedding = toFloatTensor(
            glove.glove_embeddings[goal_object_type][:], self.gpu_id)

    def new_episode(
        self,
        args,
        scenes,
        possible_targets=None,
        targets=None,
        rooms=None,
        keep_obj=False,
        glove=None,
    ):
        self.done_count = 0
        self.duplicate_count = 0
        self.failed_action_count = 0
        self.prev_frame = None
        self.current_frame = None
        self.current_objs = None
        self._new_episode(args, scenes, possible_targets, targets, rooms,
                          keep_obj, glove)
コード例 #2
0
ファイル: basic_episode.py プロジェクト: zebrajack/ECCV-VN
class BasicEpisode(Episode):
    """ Episode for Navigation. """
    def __init__(self, args, gpu_id, strict_done=False):
        super(BasicEpisode, self).__init__()

        self._env = None

        self.gpu_id = gpu_id
        self.strict_done = strict_done
        self.task_data = None
        self.glove_embedding = None
        self.actions = get_actions(args)
        self.done_count = 0
        self.duplicate_count = 0
        self.failed_action_count = 0
        self._last_action_embedding_idx = 0
        self.target_object = None
        self.prev_frame = None
        self.current_frame = None
        self.scene = None

        self.scene_states = []
        if args.eval:
            random.seed(args.seed)

        self._episode_times = 0
        self.seen_percentage = 0

        self.state_reps = []
        self.state_memory = []
        self.action_memory = []
        self.obs_reps = []

        self.episode_length = 0
        self.target_object_detected = False

        # tools
        self.states = []
        self.actions_record = []
        self.action_outputs = []
        self.detection_results = []

        # imitation learning
        self.imitation_learning = args.imitation_learning
        self.action_failed_il = False

        self.action_probs = []

        self.meta_learning = args.update_meta_network
        self.meta_predictions = []

        self.visual_infos = {}
        self.match_score = []
        self.indices_topk = []

    @property
    def environment(self):
        return self._env

    @property
    def actions_list(self):
        return [{"action": a} for a in self.actions]

    @property
    def episode_times(self):
        return self._episode_times

    @episode_times.setter
    def episode_times(self, times):
        self._episode_times = times

    def reset(self):
        self.done_count = 0
        self.duplicate_count = 0
        self._env.back_to_start()

    def state_for_agent(self):
        return self.environment.current_frame

    def current_detection_feature(self):
        return self.environment.current_detection_feature

    def current_depth(self):
        return self.environment.current_depth

    def current_agent_position(self):
        """ Get the current position of the agent in the scene. """
        return self.environment.current_agent_position

    def step(self, action_as_int):

        action = self.actions_list[action_as_int]

        if action["action"] != DONE:
            self.environment.step(action)
        else:
            self.done_count += 1

        reward, terminal, action_was_successful = self.judge(action)
        return reward, terminal, action_was_successful

    def judge(self, action):
        """ Judge the last event. """
        reward = STEP_PENALTY
        # Thresholding replaced with simple look up for efficiency.
        if self.environment.controller.state in self.scene_states:
            if action["action"] != DONE:
                if self.environment.last_action_success:
                    self.duplicate_count += 1
                else:
                    self.failed_action_count += 1
        else:
            self.scene_states.append(self.environment.controller.state)

        done = False

        if action["action"] == DONE:
            action_was_successful = False
            for id_ in self.task_data:
                if self.environment.object_is_visible(id_):
                    reward = GOAL_SUCCESS_REWARD
                    done = True
                    action_was_successful = True
                    break
        else:
            action_was_successful = self.environment.last_action_success

        return reward, done, action_was_successful

    # Set the target index.
    @property
    def target_object_index(self):
        """ Return the index which corresponds to the target object. """
        return self._target_object_index

    @target_object_index.setter
    def target_object_index(self, target_object_index):
        """ Set the target object by specifying the index. """
        self._target_object_index = gpuify(
            torch.LongTensor([target_object_index]), self.gpu_id)

    def _new_episode(self, args, scenes, targets):
        """ New navigation episode. """
        scene = random.choice(scenes)
        self.scene = scene

        if self._env is None:
            self._env = Environment(
                offline_data_dir=args.data_dir,
                use_offline_controller=True,
                grid_size=0.25,
                detection_feature_file_name=args.detection_feature_file_name,
                images_file_name=args.images_file_name,
                visible_object_map_file_name=args.visible_map_file_name,
                local_executable_path=args.local_executable_path,
                optimal_action_file_name=args.optimal_action_file_name,
            )
            self._env.start(scene)
        else:
            self._env.reset(scene)

        # Randomize the start location.
        self._env.randomize_agent_location()

        self.task_data = []

        objects = self._env.all_objects()

        visible_objects = [obj.split("|")[0] for obj in objects]
        intersection = [obj for obj in visible_objects if obj in targets]

        idx = random.randint(0, len(intersection) - 1)
        goal_object_type = intersection[idx]
        self.target_object = goal_object_type

        for id_ in objects:
            type_ = id_.split("|")[0]
            if goal_object_type == type_:
                self.task_data.append(id_)

        if args.verbose:
            print("Scene", scene, "Navigating towards:", goal_object_type)

    def new_episode(self, args, scenes, targets):
        self.done_count = 0
        self.duplicate_count = 0
        self.failed_action_count = 0
        self.episode_length = 0
        self.prev_frame = None
        self.current_frame = None
        self.scene_states = []

        self.state_reps = []
        self.state_memory = []
        self.action_memory = []

        self.target_object_detected = False

        self.episode_times += 1

        self.states = []
        self.actions_record = []
        self.action_outputs = []
        self.detection_results = []
        self.obs_reps = []

        self.action_failed_il = False

        self.action_probs = []
        self.meta_predictions = []
        self.visual_infos = {}
        self.match_score = []
        self.indices_topk = []

        self._new_episode(args, scenes, targets)
コード例 #3
0
ファイル: basic_episode.py プロジェクト: xiaobaishu0097/savn
class BasicEpisode(Episode):
    """ Episode for Navigation. """
    def __init__(self, args, gpu_id, strict_done=False):
        super(BasicEpisode, self).__init__()

        self._env = None

        self.gpu_id = gpu_id
        self.strict_done = strict_done
        self.task_data = None
        self.glove_embedding = None
        self.actions = get_actions(args)
        self.done_count = 0
        self.duplicate_count = 0
        self.failed_action_count = 0
        self._last_action_embedding_idx = 0
        self.target_object = None
        self.prev_frame = None
        self.current_frame = None
        self.det_frame = None

        self.last_det = False
        self.current_det = False
        self.det_gt = None
        self.optimal_actions = None

        self.scene_states = []
        self.detections = []
        if args.eval:
            random.seed(args.seed)

    @property
    def environment(self):
        return self._env

    @property
    def actions_list(self):
        return [{"action": a} for a in self.actions]

    def reset(self):
        self.done_count = 0
        self.duplicate_count = 0
        self._env.back_to_start()

    def state_for_agent(self):
        return self.environment.current_frame

    def current_agent_position(self):
        """ Get the current position of the agent in the scene. """
        return self.environment.current_agent_position

    def step(self, action_as_int, arrive):

        self.last_det = self.current_det
        action = self.actions_list[action_as_int]

        if action["action"] != DONE:
            self.environment.step(action)
        else:
            self.done_count += 1

        reward, terminal, action_was_successful, arrive = self.judge(
            action, arrive)
        return reward, terminal, action_was_successful, arrive

    def judge(self, action, arrive):
        """ Judge the last event. """
        reward = STEP_PENALTY

        # Thresholding replaced with simple look up for efficiency.
        if self.environment.controller.state in self.scene_states:
            if action["action"] != DONE:
                if self.environment.last_action_success:
                    self.duplicate_count += 1
                else:
                    self.failed_action_count += 1
        else:
            self.scene_states.append(self.environment.controller.state)

        done = False

        if action["action"] == DONE:
            action_was_successful = False
            for id_ in self.task_data:
                if self.environment.object_is_visible(id_):
                    reward = GOAL_SUCCESS_REWARD
                    done = True
                    action_was_successful = True
                    break
        else:
            # test for 100% accuracy of target detection
            for id_ in self.task_data:
                if self.environment.object_is_visible(id_):
                    arrive = True
                    reward = GOAL_SUCCESS_REWARD
                    done = True
                    action_was_successful = True
                    break
            action_was_successful = self.environment.last_action_success

        return reward, done, action_was_successful, arrive

    # Set the target index.
    @property
    def target_object_index(self):
        """ Return the index which corresponds to the target object. """
        return self._target_object_index

    @target_object_index.setter
    def target_object_index(self, target_object_index):
        """ Set the target object by specifying the index. """
        self._target_object_index = gpuify(
            torch.LongTensor([target_object_index]), self.gpu_id)

    def _new_episode(
        self,
        args,
        scenes,
        possible_targets,
        targets=None,
        keep_obj=False,
        optimal_act=None,
        glove=None,
        det_gt=None,
    ):
        """ New navigation episode. """
        scene = random.choice(scenes)

        img_file_scene = args.images_file_name

        if self._env is None:
            self._env = Environment(
                offline_data_dir=args.offline_data_dir,
                use_offline_controller=True,
                grid_size=0.25,
                # images_file_name=args.images_file_name,
                images_file_name=img_file_scene,
                local_executable_path=args.local_executable_path,
                total_images_file=None)
            self._env.start(scene)
        else:
            self._env.reset(scene)

        # Randomize the start location.
        self._env.randomize_agent_location()
        objects = self._env.all_objects()

        visible_objects = [obj.split("|")[0] for obj in objects]
        intersection = [obj for obj in visible_objects if obj in targets]

        self.task_data = []

        idx = random.randint(0, len(intersection) - 1)
        goal_object_type = intersection[idx]
        self.target_object = goal_object_type

        for id_ in objects:
            type_ = id_.split("|")[0]
            if goal_object_type == type_:
                self.task_data.append(id_)

        if args.verbose:
            print("Scene", scene, "Navigating towards:", goal_object_type)

        # glove = Glove(os.path.join(args.glove_dir, self.environment.controller.scene_name, 'det_feature.hdf5'))
        glove = glove[self.environment.controller.scene_name]
        if optimal_act is not None:
            self.optimal_actions = optimal_act[
                self.environment.controller.scene_name][self.task_data[0]]
        else:
            self.optimal_actions = None

        self.glove_embedding = None

        init_pos = '{}|{}|{}|{}'.format(
            # self.environment.controller.scene_name,
            self.environment.controller.state.position()['x'],
            self.environment.controller.state.position()['z'],
            self.environment.controller.state.rotation,
            self.environment.controller.state.horizon)

        target_embedding_array = np.zeros((len(CLASSES), 1))
        target_embedding_array[CLASSES.index(self.target_object)] = 1
        # glove_embedding_tensor = np.concatenate((glove.glove_embeddings[init_pos][()], target_embedding_array), axis=1)
        glove_embedding_tensor = np.concatenate(
            (glove[init_pos], target_embedding_array), axis=1)

        self.glove_embedding = toFloatTensor(glove_embedding_tensor,
                                             self.gpu_id)
        # self.glove_reader = glove.glove_embeddings
        self.glove_reader = glove
        # self.det_gt = det_gt[self.environment.controller.scene_name]

        # self.glove_embedding = toFloatTensor(
        #     glove.glove_embeddings[goal_object_type][:], self.gpu_id
        # )

    def new_episode(
        self,
        args,
        scenes,
        possible_targets=None,
        targets=None,
        keep_obj=False,
        optimal_act=None,
        glove=None,
        # det_gt=None
    ):
        self.done_count = 0
        self.duplicate_count = 0
        self.failed_action_count = 0
        self.prev_frame = None
        self.current_frame = None
        # self.last_det = False
        # self.current_det = False
        self.det_frame = None
        self.detections = []
        self._new_episode(args,
                          scenes,
                          possible_targets,
                          targets,
                          keep_obj,
                          optimal_act=optimal_act,
                          glove=glove)
コード例 #4
0
class BasicEpisode(Episode):
    """ Episode for Navigation. """
    def __init__(self, args, gpu_id, strict_done=False):
        super(BasicEpisode, self).__init__()

        self._env = None

        self.gpu_id = gpu_id
        self.strict_done = strict_done
        self.task_data = None
        self.glove_embedding = None
        self.prototype = None
        self.actions = get_actions(args)
        self.done_count = 0
        self.duplicate_count = 0
        self.failed_action_count = 0
        self._last_action_embedding_idx = 0
        self.target_object = None
        self.prev_frame = None
        self.current_frame = None
        self.grid_size = args.grid_size
        self.goal_success_reward = args.goal_success_reward
        self.step_penalty = args.step_penalty
        self.step_penalty_table = []
        self.episode_id = ""

        step_penalty = args.step_penalty
        for _ in range(0, args.max_ep, args.num_ep_per_stage):
            self.step_penalty_table.append(step_penalty)
            step_penalty = step_penalty * args.penalty_decay

        self.scene_states = []
        self.episode_trajectories = []
        self.actions_taken = []
        if args.eval:
            random.seed(args.seed)

    @property
    def environment(self):
        return self._env

    @property
    def actions_list(self):
        return [{"action": a} for a in self.actions]

    def reset(self):
        self.done_count = 0
        self.duplicate_count = 0
        self._env.back_to_start()

    def state_for_agent(self):
        return self.environment.current_frame

    def current_agent_position(self):
        """ Get the current position of the agent in the scene. """
        return self.environment.current_agent_position

    def step(self, action_as_int):

        action = self.actions_list[action_as_int]

        if action["action"] != DONE:
            self.environment.step(action)
        else:
            self.done_count += 1

        reward, terminal, action_was_successful = self.judge(action)
        return reward, terminal, action_was_successful

    def judge(self, action):
        """ Judge the last event. """
        reward = self.step_penalty

        # Thresholding replaced with simple look up for efficiency.
        if self.environment.controller.state in self.scene_states:
            if action["action"] != DONE:
                if self.environment.last_action_success:
                    self.duplicate_count += 1
                else:
                    self.failed_action_count += 1
        else:
            self.scene_states.append(self.environment.controller.state)

        self.episode_trajectories.append(self.environment.controller.state)
        done = False

        if action["action"] == DONE:
            action_was_successful = False
            for id_ in self.task_data:
                if self.environment.object_is_visible(id_):
                    reward = self.goal_success_reward
                    done = True
                    action_was_successful = True
                    break
        else:
            action_was_successful = self.environment.last_action_success

        return reward, done, action_was_successful

    # Set the target index.
    @property
    def target_object_index(self):
        """ Return the index which corresponds to the target object. """
        return self._target_object_index

    @target_object_index.setter
    def target_object_index(self, target_object_index):
        """ Set the target object by specifying the index. """
        self._target_object_index = gpuify(
            torch.LongTensor([target_object_index]), self.gpu_id)

    def _new_random_episode(self,
                            args,
                            scenes,
                            possible_targets,
                            targets=None,
                            keep_obj=False,
                            glove=None,
                            protos=None,
                            pre_metadata=None):
        """ New navigation episode. """
        #random episode
        scene = None
        retry = 0
        while scene not in os.listdir(args.offline_data_dir):
            scene = random.choice(scenes)
            retry += 1
            if retry >= 1000:
                raise Exception("No scenes found in {}".format(
                    args.offline_data_dir))

        if self._env is None:
            self._env = Environment(
                offline_data_dir=args.offline_data_dir,
                use_offline_controller=True,
                grid_size=self.grid_size,
                images_file_name=args.images_file_name,
                local_executable_path=args.local_executable_path,
                rotate_by=args.rotate_by,
                state_decimal=args.state_decimal,
                pinned_scene=args.pinned_scene,
                pre_metadata=pre_metadata,
                actions=self.actions)
            self._env.start(scene)
        else:
            self._env.reset(scene)

        # Randomize the start location.
        self._env.randomize_agent_location()
        objects = self._env.all_objects()

        visible_objects = [obj.split("|")[0] for obj in objects]
        intersection = [obj for obj in visible_objects if obj in targets]

        self.task_data = []

        idx = random.randint(0, len(intersection) - 1)
        object_type = intersection[idx]
        self.target_object = object_type

        for id_ in objects:
            type_ = id_.split("|")[0]
            if object_type == type_:
                self.task_data.append(id_)

        if args.verbose:
            print("Scene", scene, "Navigating towards:", object_type)
        self.episode_trajectories = []
        self.actions_taken = []

        if args.glove_file != "":
            self.glove_embedding = toFloatTensor(
                glove.glove_embeddings[object_type][:], self.gpu_id)
        if args.proto_file != "":
            self.prototype = toFloatTensor(
                protos.protos[object_type.lower()][:], self.gpu_id)
        return scene

    # curriculum_meta: episodes indexed by scene, difficulty, object_type in order
    def _new_curriculum_episode(self,
                                args,
                                scenes,
                                possible_targets,
                                targets=None,
                                keep_obj=False,
                                glove=None,
                                protos=None,
                                pre_metadata=None,
                                curriculum_meta=None,
                                total_ep=0):
        """ New navigation episode. """
        # choose a scene
        scene = None
        retry = 0

        flag_episode_valid = False
        while not flag_episode_valid:
            # choose a scene
            valid_scenes = os.listdir(args.offline_data_dir)
            intersection_scenes = [
                scene for scene in scenes if scene in valid_scenes
            ]
            scene = random.choice(intersection_scenes)
            # TODO: choose difficulty
            try:
                diff = round(total_ep // args.num_ep_per_stage) + 1
                diff_idx = random.choice(range(diff))
                # if total_ep < args.difficulty_upgrade_step:
                #     diff = DIFFICULTY[0]
                # elif total_ep < 2 * args.difficulty_upgrade_step:
                #     diff = random.choice(DIFFICULTY[:2])
                # else:
                #     diff = random.choice(DIFFICULTY[:3])

                # choose object
                # visible_objects = curriculum_meta[scene][diff].keys()
                # intersection_objs = [obj for obj in visible_objects if obj in targets]
                # object_type = random.choice(intersection_objs)

                episode = random.choice(curriculum_meta[scene][diff_idx])
                object_type = episode['object_type'].replace(" ", "")
                if object_type not in targets:
                    continue

                # to plot trajectory by xiaodong
                # state_pattern: x, z, rotation_degree, horizon_degree
                state_pattern = "{:0." + str(
                    args.state_decimal) + "f}|{:0." + str(
                        args.state_decimal) + "f}|{:d}|{:d}"
                self.init_pos_str = state_pattern.format(
                    episode['initial_position']['x'],
                    episode['initial_position']['z'],
                    episode['initial_orientation'], 0)
                self.target_pos_str = state_pattern.format(
                    episode['target_position']['x'],
                    episode['target_position']['z'], 0, 0)
                self.object_type = object_type

            except:
                continue

            # TODO: Present validity checking method breaks the principle of tiered-design and decoupling
            # TODO: Find a better way to check the validity of an episode  by junting, 2020-04-10

            state = ThorAgentState(**episode['initial_position'],
                                   rotation=episode['initial_orientation'],
                                   horizon=0,
                                   state_decimal=args.state_decimal)
            if str(state) in pre_metadata[scene]['all_states']:
                flag_episode_valid = True
            else:
                print(
                    "Episode ID {} not valid for its initial state missing from all_states"
                    .format(episode['id']))

        if self._env is None:
            self._env = Environment(
                offline_data_dir=args.offline_data_dir,
                use_offline_controller=True,
                grid_size=self.grid_size,
                images_file_name=args.images_file_name,
                local_executable_path=args.local_executable_path,
                rotate_by=args.rotate_by,
                state_decimal=args.state_decimal,
                pinned_scene=args.pinned_scene,
                pre_metadata=pre_metadata,
                actions=self.actions)
            self._env.start(scene)
        else:
            self._env.reset(scene)

        # initialize the start location.

        self._env.initialize_agent_location(
            **episode['initial_position'],
            rotation=episode['initial_orientation'],
            horizon=0)
        self.task_data = []
        self.target_object = object_type
        self.task_data.append(episode['object_id'])
        self.episode_id = episode['id']
        self.episode_trajectories = []
        self.actions_taken = []

        if args.verbose:
            print("Episode: Scene ", scene, " Difficulty ", diff,
                  " Navigating towards: ", object_type)

        if args.glove_file != "":
            self.glove_embedding = toFloatTensor(
                glove.glove_embeddings[object_type][:], self.gpu_id)
        if args.proto_file != "":
            self.prototype = toFloatTensor(
                protos.protos[object_type.lower()][:], self.gpu_id)
        return scene

    def new_episode(
        self,
        args,
        scenes,
        possible_targets=None,
        targets=None,
        keep_obj=False,
        glove=None,
        protos=None,
        pre_metadata=None,
        curriculum_meta=None,
        total_ep=0,
    ):
        self.done_count = 0
        self.duplicate_count = 0
        self.failed_action_count = 0
        self.prev_frame = None
        self.current_frame = None

        if args.curriculum_learning:
            diff = round(total_ep // args.num_ep_per_stage) + 1
            self.step_penalty = self.step_penalty_table[diff - 1]

            return self._new_curriculum_episode(args, scenes, possible_targets,
                                                targets, keep_obj, glove,
                                                protos, pre_metadata,
                                                curriculum_meta, total_ep)
            # set penalty decay

        return self._new_random_episode(args, scenes, possible_targets,
                                        targets, keep_obj, glove, protos,
                                        pre_metadata)