Ejemplo n.º 1
0
    def __init__(self, run_name=""):

        super(ModelTrajectoryToAction, self).__init__()
        self.model_name = "lsvd_action"
        self.run_name = run_name
        self.writer = LoggingSummaryWriter(log_dir="runs/" + run_name)

        self.params = get_current_parameters()["ModelPVN"]
        self.aux_weights = get_current_parameters()["AuxWeights"]

        self.prof = SimpleProfiler(torch_sync=PROFILE, print=PROFILE)
        self.iter = nn.Parameter(torch.zeros(1), requires_grad=False)

        # Common
        # --------------------------------------------------------------------------------------------------------------
        self.map_transform_w_to_s = MapTransformerBase(
            source_map_size=self.params["global_map_size"],
            dest_map_size=self.params["local_map_size"],
            world_size=self.params["world_size_px"])

        self.map_transform_r_to_w = MapTransformerBase(
            source_map_size=self.params["local_map_size"],
            dest_map_size=self.params["global_map_size"],
            world_size=self.params["world_size_px"])

        # Output an action given the global semantic map
        if self.params["map_to_action"] == "downsample2":
            self.map_to_action = EgoMapToActionTriplet(
                map_channels=self.params["map_to_act_channels"],
                map_size=self.params["local_map_size"],
                other_features_size=self.params["emb_size"])

        elif self.params["map_to_action"] == "cropped":
            self.map_to_action = CroppedMapToActionTriplet(
                map_channels=self.params["map_to_act_channels"],
                map_size=self.params["local_map_size"],
                manual=self.params["manual_rule"],
                path_only=self.params["action_in_path_only"],
                recurrence=self.params["action_recurrence"])

        self.spatialsoftmax = SpatialSoftmax2d()
        self.gt_fill_missing = MapBatchFillMissing(
            self.params["local_map_size"], self.params["world_size_px"])

        # Don't freeze the trajectory to action weights, because it will be pre-trained during path-prediction training
        # and finetuned on all timesteps end-to-end
        enable_weight_saving(self.map_to_action,
                             "map_to_action",
                             alwaysfreeze=False,
                             neverfreeze=True)

        self.action_loss = ActionLoss()

        self.env_id = None
        self.seg_idx = None
        self.prev_instruction = None
        self.seq_step = 0
        self.get_act_start_pose = None
        self.gt_labels = None
Ejemplo n.º 2
0
    def __init__(self, source_map_size, world_in_map_size, lamda=0.2):
        super(LeakyIntegratorMap, self).__init__(source_map_size, world_in_map_size)
        self.map_size = source_map_size
        self.world_size = world_in_map_size
        self.child_transformer = MapTransformerBase(source_map_size, world_in_map_size)
        self.lamda = lamda

        self.prof = SimpleProfiler(torch_sync=PROFILE, print=PROFILE)
        self.map_memory = MapTransformerBase(source_map_size, world_in_map_size)

        self.dbg_t = None
        self.seq = 0
Ejemplo n.º 3
0
    def __init__(self, source_map_size, world_in_map_size):
        super(MapBatchFillMissing, self).__init__(source_map_size, world_in_map_size)
        self.map_size = source_map_size
        self.world_size = world_in_map_size
        self.child_transformer = MapTransformerBase(source_map_size, world_in_map_size)

        self.prof = SimpleProfiler(torch_sync=PROFILE, print=PROFILE)
        self.map_memory = MapTransformerBase(source_map_size, world_in_map_size)

        self.last_observation = None

        self.dbg_t = None
        self.seq = 0
Ejemplo n.º 4
0
    def __init__(self, source_map_size, world_size_px, world_size_m):
        super(IdentityIntegratorMap, self).__init__(source_map_size, world_size_px, world_size_m)
        self.map_size = source_map_size
        self.world_size = world_size_px
        self.world_size_m = world_size_m
        self.child_transformer = MapTransformerBase(source_map_size, world_size_px, world_size_m)

        self.prof = SimpleProfiler(torch_sync=PROFILE, print=PROFILE)
        self.map_memory = MapTransformerBase(source_map_size, world_size_px, world_size_m)

        self.last_observation = None

        self.dbg_t = None
        self.seq = 0
Ejemplo n.º 5
0
    def get_action(self, state, instruction):
        """
        Given a DroneState (from PomdpInterface) and instruction, produce a numpy 4D action (x, y, theta, pstop)
        :param state: DroneState object with the raw image from the simulator
        :param instruction: Tokenized instruction given the corpus
        #TODO: Absorb corpus within model
        :return:
        """
        prof = SimpleProfiler(print=True)
        prof.tick(".")
        # TODO: Simplify this
        self.eval()
        images_np_pure = state.image
        state_np = state.state
        state = Variable(none_padded_seq_to_tensor([state_np]))

        #print("Act: " + debug_untokenize_instruction(instruction))

        # Add the batch dimension

        first_step = True
        if instruction == self.prev_instruction:
            first_step = False
        self.prev_instruction = instruction
        if first_step:
            self.get_act_start_pose = self.cam_poses_from_states(state[0:1])

        self.seq_step += 1

        # This is for training the policy to mimic the ground-truth state distribution with oracle actions
        # b_traj_gt_w_select = b_traj_ground_truth[b_plan_mask_t[:, np.newaxis, np.newaxis, np.newaxis].expand_as(b_traj_ground_truth)].view([-1] + gtsize)
        traj_gt_w = Variable(self.gt_labels)
        b_poses = self.cam_poses_from_states(state)
        # TODO: These source and dest should go as arguments to get_maps (in forward pass not params)
        transformer = MapTransformerBase(
            source_map_size=self.params["global_map_size"],
            world_size_px=self.params["world_size_px"],
            dest_map_size=self.params["local_map_size"],
            world_size_m=self.params["world_size_m"])
        self.maybe_cuda(transformer)
        transformer.set_maps(traj_gt_w, None)
        traj_gt_r, _ = transformer.get_maps(b_poses)
        self.clear_inputs("traj_gt_r_select")
        self.clear_inputs("traj_gt_w_select")
        self.keep_inputs("traj_gt_r_select", traj_gt_r)
        self.keep_inputs("traj_gt_w_select", traj_gt_w)

        action = self(traj_gt_r, firstseg=[self.seq_step == 1])

        output_action = action.squeeze().data.cpu().numpy()

        stop_prob = output_action[3]
        output_stop = 1 if stop_prob > self.params["stop_threshold"] else 0
        output_action[3] = output_stop

        return output_action
Ejemplo n.º 6
0
    def __init__(self, source_map_size, world_in_map_size, lamda=0.2):
        super(DrawStartPosOnGlobalMap, self).__init__(source_map_size, world_in_map_size)
        self.map_size = source_map_size
        self.world_size = world_in_map_size
        self.child_transformer = MapTransformerBase(source_map_size, world_in_map_size)

        self.prof = SimpleProfiler(torch_sync=PROFILE, print=PROFILE)
        self.start_pose = None
        self.last_emb = None

        self.dbg_t = None
        self.seq = 0
Ejemplo n.º 7
0
    def dbg_write_extra(self, map, pose):
        if DebugWriter().should_write():
            self.seq += 1
            # Initialize a transformer module
            if self.dbg_t is None:
                self.dbg_t = MapTransformerBase(self.map_size, self.world_size)
                if self.is_cuda:
                    self.dbg_t.cuda(self.cuda_device)

            # Transform the prediction to the global frame and write out to disk.
            self.dbg_t.set_map(map, pose)
            map_global, _ = self.dbg_t.get_map(None)
            DebugWriter().write_img(map_global[0], "gif_overlaid", args={"world_size": self.world_size, "name": "pathpred"})
Ejemplo n.º 8
0
    def dbg_write_extra(self, map, pose):
        if DebugWriter().should_write():
            map = map[0:1, 0:3]
            self.seq += 1
            # Initialize a transformer module
            if pose is not None:
                if self.dbg_t is None:
                    self.dbg_t = MapTransformerBase(self.map_size, self.world_size, self.world_size_m).to(map.device)

                # Transform the prediction to the global frame and write out to disk.
                self.dbg_t.set_map(map, pose)
                map_global, _ = self.dbg_t.get_map(None)
            else:
                map_global = map
            DebugWriter().write_img(map_global[0], "gif_overlaid", args={"world_size": self.world_size, "name": "identity_integrator"})
Ejemplo n.º 9
0
    def __init__(self,
                 source_map_size,
                 world_size_px,
                 world_size_m,
                 lamda=0.2):
        super(LeakyIntegratorGlobalMap,
              self).__init__(source_map_size, world_size_px, world_size_m)
        self.map_size_px = source_map_size
        self.world_size_px = world_size_px
        self.world_size_m = world_size_m
        self.child_transformer = MapTransformerBase(source_map_size,
                                                    world_size_px,
                                                    world_size_m)
        self.lamda = lamda

        self.prof = SimpleProfiler(torch_sync=PROFILE, print=PROFILE)
        self.map_memory = []
        self.coverage_memory = []

        self.dbg_t = None
        self.seq = 0
Ejemplo n.º 10
0
    def sup_loss_on_batch(self, batch, eval):
        self.prof.tick("out")

        action_loss_total = Variable(
            empty_float_tensor([1], self.is_cuda, self.cuda_device))

        if batch is None:
            print("Skipping None Batch")
            return action_loss_total

        actions = self.maybe_cuda(batch["actions"])
        states = self.maybe_cuda(batch["states"])

        firstseg_mask = batch["firstseg_mask"]

        # Auxiliary labels
        traj_ground_truth_select = self.maybe_cuda(batch["traj_ground_truth"])
        # stops = self.maybe_cuda(batch["stops"])
        metadata = batch["md"]
        batch_size = actions.size(0)
        count = 0

        # Loop thru batch
        for b in range(batch_size):
            seg_idx = -1

            self.reset()

            self.prof.tick("out")
            b_seq_len = len_until_nones(metadata[b])

            # TODO: Generalize this
            # Slice the data according to the sequence length
            b_metadata = metadata[b][:b_seq_len]
            b_actions = actions[b][:b_seq_len]
            b_traj_ground_truth_select = traj_ground_truth_select[b]
            b_states = states[b][:b_seq_len]

            self.keep_inputs("traj_gt_global_select",
                             b_traj_ground_truth_select)

            #b_firstseg = get_obs_mask_segstart(b_metadata)
            b_firstseg = firstseg_mask[b][:b_seq_len]

            # ----------------------------------------------------------------------------
            # Optional Auxiliary Inputs
            # ----------------------------------------------------------------------------
            gtsize = list(b_traj_ground_truth_select.size())[1:]
            b_poses = self.cam_poses_from_states(b_states)
            # TODO: These source and dest should go as arguments to get_maps (in forward pass not params)
            transformer = MapTransformerBase(
                source_map_size=self.params["global_map_size"],
                world_size=self.params["world_size_px"],
                dest_map_size=self.params["local_map_size"])
            self.maybe_cuda(transformer)
            transformer.set_maps(b_traj_ground_truth_select, None)
            traj_gt_local_select, _ = transformer.get_maps(b_poses)
            self.keep_inputs("traj_gt_r_select", traj_gt_local_select)
            self.keep_inputs("traj_gt_w_select", b_traj_ground_truth_select)

            # ----------------------------------------------------------------------------

            self.prof.tick("inputs")

            actions = self(traj_gt_local_select, firstseg=b_firstseg)
            action_losses, _ = self.action_loss(b_actions,
                                                actions,
                                                batchreduce=False)
            action_losses = self.action_loss.batch_reduce_loss(action_losses)
            action_loss = self.action_loss.reduce_loss(action_losses)
            action_loss_total = action_loss
            count += b_seq_len

            self.prof.tick("loss")

        action_loss_avg = action_loss_total / (count + 1e-9)
        prefix = self.model_name + ("/eval" if eval else "/train")
        self.writer.add_scalar(prefix + "/action_loss",
                               action_loss_avg.data.cpu()[0], self.get_iter())

        self.prof.tick("out")

        prefix = self.model_name + ("/eval" if eval else "/train")
        self.writer.add_dict(prefix, get_current_meters(), self.get_iter())

        self.inc_iter()

        self.prof.tick("summaries")
        self.prof.loop()
        self.prof.print_stats(1)

        return action_loss_avg
Ejemplo n.º 11
0
    def __init__(self, run_name="", model_class=MODEL_RSS,
                 aux_class_features=False, aux_grounding_features=False,
                 aux_class_map=False, aux_grounding_map=False, aux_goal_map=False,
                 aux_lang=False, aux_traj=False, rot_noise=False, pos_noise=False):

        super(ModelTrajectoryTopDown, self).__init__()
        self.model_name = "sm_trajectory" + str(model_class)
        self.model_class = model_class
        print("Init model of type: ", str(model_class))
        self.run_name = run_name
        self.writer = LoggingSummaryWriter(log_dir="runs/" + run_name)

        self.params = get_current_parameters()["Model"]
        self.aux_weights = get_current_parameters()["AuxWeights"]

        self.prof = SimpleProfiler(torch_sync=PROFILE, print=PROFILE)
        self.iter = nn.Parameter(torch.zeros(1), requires_grad=False)

        # Auxiliary Objectives
        self.use_aux_class_features = aux_class_features
        self.use_aux_grounding_features = aux_grounding_features
        self.use_aux_class_on_map = aux_class_map
        self.use_aux_grounding_on_map = aux_grounding_map
        self.use_aux_goal_on_map = aux_goal_map
        self.use_aux_lang = aux_lang
        self.use_aux_traj_on_map = aux_traj
        self.use_aux_reg_map = self.aux_weights["regularize_map"]

        self.use_rot_noise = rot_noise
        self.use_pos_noise = pos_noise


        # Path-pred FPV model definition
        # --------------------------------------------------------------------------------------------------------------

        self.img_to_features_w = FPVToGlobalMap(
            source_map_size=self.params["global_map_size"], world_size_px=self.params["world_size_px"], world_size=self.params["world_size_m"],
            res_channels=self.params["resnet_channels"], map_channels=self.params["feature_channels"],
            img_w=self.params["img_w"], img_h=self.params["img_h"], img_dbg=IMG_DBG)

        self.map_accumulator_w = LeakyIntegratorGlobalMap(source_map_size=self.params["global_map_size"], world_in_map_size=self.params["world_size_px"])

        # Pre-process the accumulated map to do language grounding if necessary - in the world reference frame
        if self.use_aux_grounding_on_map and not self.use_aux_grounding_features:
            self.map_processor_a_w = LangFilterMapProcessor(
                source_map_size=self.params["global_map_size"],
                world_size=self.params["world_size_px"],
                embed_size=self.params["emb_size"],
                in_channels=self.params["feature_channels"],
                out_channels=self.params["relevance_channels"],
                spatial=False, cat_out=True)
        else:
            self.map_processor_a_w = IdentityMapProcessor(source_map_size=self.params["global_map_size"], world_size=self.params["world_size_px"])

        if self.use_aux_goal_on_map:
            self.map_processor_b_r = LangFilterMapProcessor(source_map_size=self.params["local_map_size"],
                                                            world_size=self.params["world_size_px"],
                                                            embed_size=self.params["emb_size"],
                                                            in_channels=self.params["relevance_channels"],
                                                            out_channels=self.params["goal_channels"],
                                                            spatial=True, cat_out=True)
        else:
            self.map_processor_b_r = IdentityMapProcessor(source_map_size=self.params["local_map_size"],
                                                          world_size=self.params["world_size_px"])

        pred_channels = self.params["goal_channels"] + self.params["relevance_channels"]

        # Common
        # --------------------------------------------------------------------------------------------------------------

        # Sentence Embedding
        self.sentence_embedding = SentenceEmbeddingSimple(
            self.params["word_emb_size"], self.params["emb_size"], self.params["emb_layers"])

        self.map_transform_w_to_r = MapTransformerBase(source_map_size=self.params["global_map_size"],
                                                       dest_map_size=self.params["local_map_size"],
                                                       world_size=self.params["world_size_px"])
        self.map_transform_r_to_w = MapTransformerBase(source_map_size=self.params["local_map_size"],
                                                       dest_map_size=self.params["global_map_size"],
                                                       world_size=self.params["world_size_px"])

        # Batch select is used to drop and forget semantic maps at those timestaps that we're not planning in
        self.batch_select = MapBatchSelect()
        # Since we only have path predictions for some timesteps (the ones not dropped above), we use this to fill
        # in the missing pieces by reorienting the past trajectory prediction into the frame of the current timestep
        self.map_batch_fill_missing = MapBatchFillMissing(self.params["local_map_size"], self.params["world_size_px"])

        # Passing true to freeze will freeze these weights regardless of whether they've been explicitly reloaded or not
        enable_weight_saving(self.sentence_embedding, "sentence_embedding", alwaysfreeze=False)

        # Output an action given the global semantic map
        if self.params["map_to_action"] == "downsample2":
            self.map_to_action = EgoMapToActionTriplet(
                map_channels=self.params["map_to_act_channels"],
                map_size=self.params["local_map_size"],
                other_features_size=self.params["emb_size"])

        elif self.params["map_to_action"] == "cropped":
            self.map_to_action = CroppedMapToActionTriplet(
                map_channels=self.params["map_to_act_channels"],
                map_size=self.params["local_map_size"],
                other_features_size=self.params["emb_size"]
            )

        # Don't freeze the trajectory to action weights, because it will be pre-trained during path-prediction training
        # and finetuned on all timesteps end-to-end
        enable_weight_saving(self.map_to_action, "map_to_action", alwaysfreeze=False, neverfreeze=True)

        # Auxiliary Objectives
        # --------------------------------------------------------------------------------------------------------------

        # We add all auxiliaries that are necessary. The first argument is the auxiliary name, followed by parameters,
        # followed by variable number of names of inputs. ModuleWithAuxiliaries will automatically collect these inputs
        # that have been saved with keep_auxiliary_input() during execution
        if aux_class_features:
            self.add_auxiliary(ClassAuxiliary2D("aux_class", None,  self.params["feature_channels"], self.params["num_landmarks"], self.params["dropout"],
                                                "fpv_features", "lm_pos_fpv", "lm_indices"))
        if aux_grounding_features:
            self.add_auxiliary(ClassAuxiliary2D("aux_ground", None, self.params["relevance_channels"], 2, self.params["dropout"],
                                                "fpv_features_g", "lm_pos_fpv", "lm_mentioned"))
        if aux_class_map:
            self.add_auxiliary(ClassAuxiliary2D("aux_class_map", self.params["world_size_px"], self.params["feature_channels"], self.params["num_landmarks"], self.params["dropout"],
                                                "map_s_w_select", "lm_pos_map_select", "lm_indices_select"))
        if aux_grounding_map:
            self.add_auxiliary(ClassAuxiliary2D("aux_grounding_map", self.params["world_size_px"], self.params["relevance_channels"], 2, self.params["dropout"],
                                                "map_a_w_select", "lm_pos_map_select", "lm_mentioned_select"))
        if aux_goal_map:
            self.add_auxiliary(GoalAuxiliary2D("aux_goal_map", self.params["goal_channels"], self.params["world_size_px"],
                                               "map_b_w", "goal_pos_map"))
        # RSS model uses templated data for landmark and side prediction
        if self.use_aux_lang and self.params["templates"]:
            self.add_auxiliary(ClassAuxiliary("aux_lang_lm", self.params["emb_size"], self.params["num_landmarks"], 1,
                                                "sentence_embed", "lm_mentioned_tplt"))
            self.add_auxiliary(ClassAuxiliary("aux_lang_side", self.params["emb_size"], self.params["num_sides"], 1,
                                                "sentence_embed", "side_mentioned_tplt"))
        # CoRL model uses alignment-model groundings
        elif self.use_aux_lang:
            # one output for each landmark, 2 classes per output. This is for finetuning, so use the embedding that's gonna be fine tuned
            self.add_auxiliary(ClassAuxiliary("aux_lang_lm_nl", self.params["emb_size"], 2, self.params["num_landmarks"],
                                                "sentence_embed", "lang_lm_mentioned"))
        if self.use_aux_traj_on_map:
            self.add_auxiliary(PathAuxiliary2D("aux_path", "map_b_r_select", "traj_gt_r_select"))

        if self.use_aux_reg_map:
            self.add_auxiliary(FeatureRegularizationAuxiliary2D("aux_regularize_features", None, "l1",
                                                                "map_s_w_select", "lm_pos_map_select"))

        self.goal_good_criterion = GoalPredictionGoodCriterion(ok_distance=3.2)
        self.goal_acc_meter = MovingAverageMeter(10)

        self.print_auxiliary_info()

        self.action_loss = ActionLoss()

        self.env_id = None
        self.prev_instruction = None
        self.seq_step = 0
Ejemplo n.º 12
0
 def __init__(self, source_map_size, world_size_px, world_size_m):
     super(IdentityMapAccumulator,
           self).__init__(source_map_size, world_size_px, world_size_m)
     self.child_transformer = MapTransformerBase(source_map_size,
                                                 world_size_px,
                                                 world_size_m)
Ejemplo n.º 13
0
    def __init__(self, run_name="", model_instance_name=""):

        super(ModelGSMNBiDomain, self).__init__()
        self.model_name = "gsmn_bidomain"
        self.run_name = run_name
        self.name = model_instance_name
        if not self.name:
            self.name = ""
        self.writer = LoggingSummaryWriter(
            log_dir=f"runs/{run_name}/{self.name}")

        self.params = get_current_parameters()["Model"]
        self.aux_weights = get_current_parameters()["AuxWeights"]
        self.use_aux = self.params["UseAuxiliaries"]

        self.prof = SimpleProfiler(torch_sync=PROFILE, print=PROFILE)
        self.iter = nn.Parameter(torch.zeros(1), requires_grad=False)

        self.tensor_store = KeyTensorStore()
        self.aux_losses = AuxiliaryLosses()

        self.rviz = None
        if self.params.get("rviz"):
            self.rviz = RvizInterface(
                base_name="/gsmn/",
                map_topics=["semantic_map", "grounding_map", "goal_map"],
                markerarray_topics=["instruction"])

        # Path-pred FPV model definition
        # --------------------------------------------------------------------------------------------------------------

        self.img_to_features_w = FPVToGlobalMap(
            source_map_size=self.params["global_map_size"],
            world_size_px=self.params["world_size_px"],
            world_size_m=self.params["world_size_m"],
            res_channels=self.params["resnet_channels"],
            map_channels=self.params["feature_channels"],
            img_w=self.params["img_w"],
            img_h=self.params["img_h"],
            cam_h_fov=self.params["cam_h_fov"],
            img_dbg=IMG_DBG)

        self.map_accumulator_w = LeakyIntegratorGlobalMap(
            source_map_size=self.params["global_map_size"],
            world_size_px=self.params["world_size_px"],
            world_size_m=self.params["world_size_m"])

        # Pre-process the accumulated map to do language grounding if necessary - in the world reference frame
        if self.use_aux[
                "grounding_map"] and not self.use_aux["grounding_features"]:
            self.map_processor_a_w = LangFilterMapProcessor(
                embed_size=self.params["emb_size"],
                in_channels=self.params["feature_channels"],
                out_channels=self.params["relevance_channels"],
                spatial=False,
                cat_out=True)
        else:
            self.map_processor_a_w = IdentityMapProcessor(
                source_map_size=self.params["global_map_size"],
                world_size_px=self.params["world_size_px"],
                world_size_m=self.params["world_size_m"])

        if self.use_aux["goal_map"]:
            self.map_processor_b_r = LangFilterMapProcessor(
                embed_size=self.params["emb_size"],
                in_channels=self.params["relevance_channels"],
                out_channels=self.params["goal_channels"],
                spatial=self.params["spatial_goal_filter"],
                cat_out=self.params["cat_rel_and_goal"])
        else:
            self.map_processor_b_r = IdentityMapProcessor(
                source_map_size=self.params["local_map_size"],
                world_size_px=self.params["world_size_px"],
                world_size_m=self.params["world_size_m"])

        # Common
        # --------------------------------------------------------------------------------------------------------------

        # Sentence Embedding
        self.sentence_embedding = SentenceEmbeddingSimple(
            self.params["word_emb_size"],
            self.params["emb_size"],
            self.params["emb_layers"],
            dropout=0.0)

        self.map_transform_w_to_r = MapTransformerBase(
            source_map_size=self.params["global_map_size"],
            dest_map_size=self.params["local_map_size"],
            world_size_px=self.params["world_size_px"],
            world_size_m=self.params["world_size_m"])
        self.map_transform_r_to_w = MapTransformerBase(
            source_map_size=self.params["local_map_size"],
            dest_map_size=self.params["global_map_size"],
            world_size_px=self.params["world_size_px"],
            world_size_m=self.params["world_size_m"])

        # Output an action given the global semantic map
        if self.params["map_to_action"] == "downsample2":
            self.map_to_action = EgoMapToActionTriplet(
                map_channels=self.params["map_to_act_channels"],
                map_size=self.params["local_map_size"],
                other_features_size=self.params["emb_size"])

        elif self.params["map_to_action"] == "cropped":
            self.map_to_action = CroppedMapToActionTriplet(
                map_channels=self.params["map_to_act_channels"],
                map_size=self.params["local_map_size"])

        # Auxiliary Objectives
        # --------------------------------------------------------------------------------------------------------------

        # We add all auxiliaries that are necessary. The first argument is the auxiliary name, followed by parameters,
        # followed by variable number of names of inputs. ModuleWithAuxiliaries will automatically collect these inputs
        # that have been saved with keep_auxiliary_input() during execution
        if self.use_aux["class_features"]:
            self.aux_losses.add_auxiliary(
                ClassAuxiliary2D("aux_class", self.params["feature_channels"],
                                 self.params["num_landmarks"],
                                 self.params["dropout"], "fpv_features",
                                 "lm_pos_fpv_features", "lm_indices",
                                 "tensor_store"))
        if self.use_aux["grounding_features"]:
            self.aux_losses.add_auxiliary(
                ClassAuxiliary2D("aux_ground",
                                 self.params["relevance_channels"], 2,
                                 self.params["dropout"], "fpv_features_g",
                                 "lm_pos_fpv_features", "lm_mentioned",
                                 "tensor_store"))
        if self.use_aux["class_map"]:
            self.aux_losses.add_auxiliary(
                ClassAuxiliary2D("aux_class_map",
                                 self.params["feature_channels"],
                                 self.params["num_landmarks"],
                                 self.params["dropout"], "map_S_W",
                                 "lm_pos_map", "lm_indices", "tensor_store"))
        if self.use_aux["grounding_map"]:
            self.aux_losses.add_auxiliary(
                ClassAuxiliary2D("aux_grounding_map",
                                 self.params["relevance_channels"], 2,
                                 self.params["dropout"], "map_R_W",
                                 "lm_pos_map", "lm_mentioned", "tensor_store"))
        if self.use_aux["goal_map"]:
            self.aux_losses.add_auxiliary(
                GoalAuxiliary2D("aux_goal_map", self.params["goal_channels"],
                                self.params["global_map_size"], "map_G_W",
                                "goal_pos_map"))
        # RSS model uses templated data for landmark and side prediction
        if self.use_aux["language"] and self.params["templates"]:
            self.aux_losses.add_auxiliary(
                ClassAuxiliary("aux_lang_lm", self.params["emb_size"],
                               self.params["num_landmarks"], 1,
                               "sentence_embed", "lm_mentioned_tplt"))
            self.aux_losses.add_auxiliary(
                ClassAuxiliary("aux_lang_side", self.params["emb_size"],
                               self.params["num_sides"], 1, "sentence_embed",
                               "side_mentioned_tplt"))
        # CoRL model uses alignment-model groundings
        elif self.use_aux["language"]:
            # one output for each landmark, 2 classes per output. This is for finetuning, so use the embedding that's gonna be fine tuned
            self.aux_losses.add_auxiliary(
                ClassAuxiliary("aux_lang_lm_nl", self.params["emb_size"], 2,
                               self.params["num_landmarks"], "sentence_embed",
                               "lang_lm_mentioned"))
        if self.use_aux["l1_regularization"]:
            self.aux_losses.add_auxiliary(
                FeatureRegularizationAuxiliary2D("aux_regularize_features",
                                                 "l1", "map_S_W"))
            self.aux_losses.add_auxiliary(
                FeatureRegularizationAuxiliary2D("aux_regularize_features",
                                                 "l1", "map_R_W"))

        self.goal_acc_meter = MovingAverageMeter(10)

        self.aux_losses.print_auxiliary_info()

        self.action_loss = ActionLoss()

        self.env_id = None
        self.prev_instruction = None
        self.seq_step = 0