コード例 #1
0
 def cuda(self, device=None):
     MapTransformerBase.cuda(self, device)
     self.map_projection.cuda(device)
     self.grid_sampler.cuda(device)
     self.img_to_features.cuda(device)
     if self.use_lang_filter:
         self.lang_filter.cuda(device)
コード例 #2
0
ファイル: model_sm_action_gt.py プロジェクト: hyzcn/drif
    def __init__(self, run_name=""):

        super(ModelTrajectoryToAction, self).__init__()
        self.model_name = "lsvd_action"
        self.run_name = run_name
        self.writer = LoggingSummaryWriter(log_dir="runs/" + run_name)

        self.params = get_current_parameters()["ModelPVN"]
        self.aux_weights = get_current_parameters()["AuxWeights"]

        self.prof = SimpleProfiler(torch_sync=PROFILE, print=PROFILE)
        self.iter = nn.Parameter(torch.zeros(1), requires_grad=False)

        # Common
        # --------------------------------------------------------------------------------------------------------------
        self.map_transform_w_to_s = MapTransformerBase(
            source_map_size=self.params["global_map_size"],
            dest_map_size=self.params["local_map_size"],
            world_size=self.params["world_size_px"])

        self.map_transform_r_to_w = MapTransformerBase(
            source_map_size=self.params["local_map_size"],
            dest_map_size=self.params["global_map_size"],
            world_size=self.params["world_size_px"])

        # Output an action given the global semantic map
        if self.params["map_to_action"] == "downsample2":
            self.map_to_action = EgoMapToActionTriplet(
                map_channels=self.params["map_to_act_channels"],
                map_size=self.params["local_map_size"],
                other_features_size=self.params["emb_size"])

        elif self.params["map_to_action"] == "cropped":
            self.map_to_action = CroppedMapToActionTriplet(
                map_channels=self.params["map_to_act_channels"],
                map_size=self.params["local_map_size"],
                manual=self.params["manual_rule"],
                path_only=self.params["action_in_path_only"],
                recurrence=self.params["action_recurrence"])

        self.spatialsoftmax = SpatialSoftmax2d()
        self.gt_fill_missing = MapBatchFillMissing(
            self.params["local_map_size"], self.params["world_size_px"])

        # Don't freeze the trajectory to action weights, because it will be pre-trained during path-prediction training
        # and finetuned on all timesteps end-to-end
        enable_weight_saving(self.map_to_action,
                             "map_to_action",
                             alwaysfreeze=False,
                             neverfreeze=True)

        self.action_loss = ActionLoss()

        self.env_id = None
        self.seg_idx = None
        self.prev_instruction = None
        self.seq_step = 0
        self.get_act_start_pose = None
        self.gt_labels = None
コード例 #3
0
    def get_action(self, state, instruction):
        """
        Given a DroneState (from PomdpInterface) and instruction, produce a numpy 4D action (x, y, theta, pstop)
        :param state: DroneState object with the raw image from the simulator
        :param instruction: Tokenized instruction given the corpus
        #TODO: Absorb corpus within model
        :return:
        """
        prof = SimpleProfiler(print=True)
        prof.tick(".")
        # TODO: Simplify this
        self.eval()
        images_np_pure = state.image
        state_np = state.state
        state = Variable(none_padded_seq_to_tensor([state_np]))

        #print("Act: " + debug_untokenize_instruction(instruction))

        # Add the batch dimension

        first_step = True
        if instruction == self.prev_instruction:
            first_step = False
        self.prev_instruction = instruction
        if first_step:
            self.get_act_start_pose = self.cam_poses_from_states(state[0:1])

        self.seq_step += 1

        # This is for training the policy to mimic the ground-truth state distribution with oracle actions
        # b_traj_gt_w_select = b_traj_ground_truth[b_plan_mask_t[:, np.newaxis, np.newaxis, np.newaxis].expand_as(b_traj_ground_truth)].view([-1] + gtsize)
        traj_gt_w = Variable(self.gt_labels)
        b_poses = self.cam_poses_from_states(state)
        # TODO: These source and dest should go as arguments to get_maps (in forward pass not params)
        transformer = MapTransformerBase(
            source_map_size=self.params["global_map_size"],
            world_size_px=self.params["world_size_px"],
            dest_map_size=self.params["local_map_size"],
            world_size_m=self.params["world_size_m"])
        self.maybe_cuda(transformer)
        transformer.set_maps(traj_gt_w, None)
        traj_gt_r, _ = transformer.get_maps(b_poses)
        self.clear_inputs("traj_gt_r_select")
        self.clear_inputs("traj_gt_w_select")
        self.keep_inputs("traj_gt_r_select", traj_gt_r)
        self.keep_inputs("traj_gt_w_select", traj_gt_w)

        action = self(traj_gt_r, firstseg=[self.seq_step == 1])

        output_action = action.squeeze().data.cpu().numpy()

        stop_prob = output_action[3]
        output_stop = 1 if stop_prob > self.params["stop_threshold"] else 0
        output_action[3] = output_stop

        return output_action
コード例 #4
0
ファイル: draw_start_pos.py プロジェクト: hyzcn/drif
    def __init__(self, source_map_size, world_in_map_size, lamda=0.2):
        super(DrawStartPosOnGlobalMap, self).__init__(source_map_size, world_in_map_size)
        self.map_size = source_map_size
        self.world_size = world_in_map_size
        self.child_transformer = MapTransformerBase(source_map_size, world_in_map_size)

        self.prof = SimpleProfiler(torch_sync=PROFILE, print=PROFILE)
        self.start_pose = None
        self.last_emb = None

        self.dbg_t = None
        self.seq = 0
コード例 #5
0
ファイル: leaky_integrator.py プロジェクト: hyzcn/drif
    def __init__(self, source_map_size, world_in_map_size, lamda=0.2):
        super(LeakyIntegratorMap, self).__init__(source_map_size, world_in_map_size)
        self.map_size = source_map_size
        self.world_size = world_in_map_size
        self.child_transformer = MapTransformerBase(source_map_size, world_in_map_size)
        self.lamda = lamda

        self.prof = SimpleProfiler(torch_sync=PROFILE, print=PROFILE)
        self.map_memory = MapTransformerBase(source_map_size, world_in_map_size)

        self.dbg_t = None
        self.seq = 0
コード例 #6
0
ファイル: map_batch_fill_missing.py プロジェクト: hyzcn/drif
    def __init__(self, source_map_size, world_in_map_size):
        super(MapBatchFillMissing, self).__init__(source_map_size, world_in_map_size)
        self.map_size = source_map_size
        self.world_size = world_in_map_size
        self.child_transformer = MapTransformerBase(source_map_size, world_in_map_size)

        self.prof = SimpleProfiler(torch_sync=PROFILE, print=PROFILE)
        self.map_memory = MapTransformerBase(source_map_size, world_in_map_size)

        self.last_observation = None

        self.dbg_t = None
        self.seq = 0
コード例 #7
0
ファイル: ratio_path_predictor.py プロジェクト: hyzcn/drif
    def dbg_write_extra(self, map, pose):
        if DebugWriter().should_write():
            self.seq += 1
            # Initialize a transformer module
            if self.dbg_t is None:
                self.dbg_t = MapTransformerBase(self.map_size, self.world_size)
                if self.is_cuda:
                    self.dbg_t.cuda(self.cuda_device)

            # Transform the prediction to the global frame and write out to disk.
            self.dbg_t.set_map(map, pose)
            map_global, _ = self.dbg_t.get_map(None)
            DebugWriter().write_img(map_global[0], "gif_overlaid", args={"world_size": self.world_size, "name": "pathpred"})
コード例 #8
0
    def __init__(self, source_map_size, world_size_px, world_size_m):
        super(IdentityIntegratorMap, self).__init__(source_map_size, world_size_px, world_size_m)
        self.map_size = source_map_size
        self.world_size = world_size_px
        self.world_size_m = world_size_m
        self.child_transformer = MapTransformerBase(source_map_size, world_size_px, world_size_m)

        self.prof = SimpleProfiler(torch_sync=PROFILE, print=PROFILE)
        self.map_memory = MapTransformerBase(source_map_size, world_size_px, world_size_m)

        self.last_observation = None

        self.dbg_t = None
        self.seq = 0
コード例 #9
0
    def dbg_write_extra(self, map, pose):
        if DebugWriter().should_write():
            map = map[0:1, 0:3]
            self.seq += 1
            # Initialize a transformer module
            if pose is not None:
                if self.dbg_t is None:
                    self.dbg_t = MapTransformerBase(self.map_size, self.world_size, self.world_size_m).to(map.device)

                # Transform the prediction to the global frame and write out to disk.
                self.dbg_t.set_map(map, pose)
                map_global, _ = self.dbg_t.get_map(None)
            else:
                map_global = map
            DebugWriter().write_img(map_global[0], "gif_overlaid", args={"world_size": self.world_size, "name": "identity_integrator"})
コード例 #10
0
ファイル: leaky_integrator_w.py プロジェクト: pianpwk/drif
    def __init__(self,
                 source_map_size,
                 world_size_px,
                 world_size_m,
                 lamda=0.2):
        super(LeakyIntegratorGlobalMap,
              self).__init__(source_map_size, world_size_px, world_size_m)
        self.map_size_px = source_map_size
        self.world_size_px = world_size_px
        self.world_size_m = world_size_m
        self.child_transformer = MapTransformerBase(source_map_size,
                                                    world_size_px,
                                                    world_size_m)
        self.lamda = lamda

        self.prof = SimpleProfiler(torch_sync=PROFILE, print=PROFILE)
        self.map_memory = []
        self.coverage_memory = []

        self.dbg_t = None
        self.seq = 0
コード例 #11
0
ファイル: identity_accumulator.py プロジェクト: pianpwk/drif
 def __init__(self, source_map_size, world_size_px, world_size_m):
     super(IdentityMapAccumulator,
           self).__init__(source_map_size, world_size_px, world_size_m)
     self.child_transformer = MapTransformerBase(source_map_size,
                                                 world_size_px,
                                                 world_size_m)
コード例 #12
0
ファイル: model_sm_action_gt.py プロジェクト: hyzcn/drif
    def sup_loss_on_batch(self, batch, eval):
        self.prof.tick("out")

        action_loss_total = Variable(
            empty_float_tensor([1], self.is_cuda, self.cuda_device))

        if batch is None:
            print("Skipping None Batch")
            return action_loss_total

        actions = self.maybe_cuda(batch["actions"])
        states = self.maybe_cuda(batch["states"])

        firstseg_mask = batch["firstseg_mask"]

        # Auxiliary labels
        traj_ground_truth_select = self.maybe_cuda(batch["traj_ground_truth"])
        # stops = self.maybe_cuda(batch["stops"])
        metadata = batch["md"]
        batch_size = actions.size(0)
        count = 0

        # Loop thru batch
        for b in range(batch_size):
            seg_idx = -1

            self.reset()

            self.prof.tick("out")
            b_seq_len = len_until_nones(metadata[b])

            # TODO: Generalize this
            # Slice the data according to the sequence length
            b_metadata = metadata[b][:b_seq_len]
            b_actions = actions[b][:b_seq_len]
            b_traj_ground_truth_select = traj_ground_truth_select[b]
            b_states = states[b][:b_seq_len]

            self.keep_inputs("traj_gt_global_select",
                             b_traj_ground_truth_select)

            #b_firstseg = get_obs_mask_segstart(b_metadata)
            b_firstseg = firstseg_mask[b][:b_seq_len]

            # ----------------------------------------------------------------------------
            # Optional Auxiliary Inputs
            # ----------------------------------------------------------------------------
            gtsize = list(b_traj_ground_truth_select.size())[1:]
            b_poses = self.cam_poses_from_states(b_states)
            # TODO: These source and dest should go as arguments to get_maps (in forward pass not params)
            transformer = MapTransformerBase(
                source_map_size=self.params["global_map_size"],
                world_size=self.params["world_size_px"],
                dest_map_size=self.params["local_map_size"])
            self.maybe_cuda(transformer)
            transformer.set_maps(b_traj_ground_truth_select, None)
            traj_gt_local_select, _ = transformer.get_maps(b_poses)
            self.keep_inputs("traj_gt_r_select", traj_gt_local_select)
            self.keep_inputs("traj_gt_w_select", b_traj_ground_truth_select)

            # ----------------------------------------------------------------------------

            self.prof.tick("inputs")

            actions = self(traj_gt_local_select, firstseg=b_firstseg)
            action_losses, _ = self.action_loss(b_actions,
                                                actions,
                                                batchreduce=False)
            action_losses = self.action_loss.batch_reduce_loss(action_losses)
            action_loss = self.action_loss.reduce_loss(action_losses)
            action_loss_total = action_loss
            count += b_seq_len

            self.prof.tick("loss")

        action_loss_avg = action_loss_total / (count + 1e-9)
        prefix = self.model_name + ("/eval" if eval else "/train")
        self.writer.add_scalar(prefix + "/action_loss",
                               action_loss_avg.data.cpu()[0], self.get_iter())

        self.prof.tick("out")

        prefix = self.model_name + ("/eval" if eval else "/train")
        self.writer.add_dict(prefix, get_current_meters(), self.get_iter())

        self.inc_iter()

        self.prof.tick("summaries")
        self.prof.loop()
        self.prof.print_stats(1)

        return action_loss_avg
コード例 #13
0
ファイル: model_sm_action_gt.py プロジェクト: hyzcn/drif
class ModelTrajectoryToAction(ModuleWithAuxiliaries):
    def __init__(self, run_name=""):

        super(ModelTrajectoryToAction, self).__init__()
        self.model_name = "lsvd_action"
        self.run_name = run_name
        self.writer = LoggingSummaryWriter(log_dir="runs/" + run_name)

        self.params = get_current_parameters()["ModelPVN"]
        self.aux_weights = get_current_parameters()["AuxWeights"]

        self.prof = SimpleProfiler(torch_sync=PROFILE, print=PROFILE)
        self.iter = nn.Parameter(torch.zeros(1), requires_grad=False)

        # Common
        # --------------------------------------------------------------------------------------------------------------
        self.map_transform_w_to_s = MapTransformerBase(
            source_map_size=self.params["global_map_size"],
            dest_map_size=self.params["local_map_size"],
            world_size=self.params["world_size_px"])

        self.map_transform_r_to_w = MapTransformerBase(
            source_map_size=self.params["local_map_size"],
            dest_map_size=self.params["global_map_size"],
            world_size=self.params["world_size_px"])

        # Output an action given the global semantic map
        if self.params["map_to_action"] == "downsample2":
            self.map_to_action = EgoMapToActionTriplet(
                map_channels=self.params["map_to_act_channels"],
                map_size=self.params["local_map_size"],
                other_features_size=self.params["emb_size"])

        elif self.params["map_to_action"] == "cropped":
            self.map_to_action = CroppedMapToActionTriplet(
                map_channels=self.params["map_to_act_channels"],
                map_size=self.params["local_map_size"],
                manual=self.params["manual_rule"],
                path_only=self.params["action_in_path_only"],
                recurrence=self.params["action_recurrence"])

        self.spatialsoftmax = SpatialSoftmax2d()
        self.gt_fill_missing = MapBatchFillMissing(
            self.params["local_map_size"], self.params["world_size_px"])

        # Don't freeze the trajectory to action weights, because it will be pre-trained during path-prediction training
        # and finetuned on all timesteps end-to-end
        enable_weight_saving(self.map_to_action,
                             "map_to_action",
                             alwaysfreeze=False,
                             neverfreeze=True)

        self.action_loss = ActionLoss()

        self.env_id = None
        self.seg_idx = None
        self.prev_instruction = None
        self.seq_step = 0
        self.get_act_start_pose = None
        self.gt_labels = None

    # TODO: Try to hide these in a superclass or something. They take up a lot of space:
    def cuda(self, device=None):
        ModuleWithAuxiliaries.cuda(self, device)
        self.map_to_action.cuda(device)
        self.action_loss.cuda(device)
        self.map_transform_w_to_s.cuda(device)
        self.map_transform_r_to_w.cuda(device)
        self.gt_fill_missing.cuda(device)
        return self

    def get_iter(self):
        return int(self.iter.data[0])

    def inc_iter(self):
        self.iter += 1

    def init_weights(self):
        self.map_to_action.init_weights()

    def reset(self):
        # TODO: This is error prone. Create a class StatefulModule, iterate submodules and reset all stateful modules
        super(ModelTrajectoryToAction, self).reset()
        self.map_transform_w_to_s.reset()
        self.map_transform_r_to_w.reset()
        self.gt_fill_missing.reset()

    def setEnvContext(self, context):
        print("Set env context to: " + str(context))
        self.env_id = context["env_id"]

    def start_segment_rollout(self):
        import rollout.run_metadata as md
        m_size = self.params["local_map_size"]
        w_size = self.params["world_size_px"]
        self.gt_labels = get_top_down_ground_truth_static_global(
            md.ENV_ID, md.START_IDX, md.END_IDX, m_size, m_size, w_size,
            w_size)
        self.seg_idx = md.SEG_IDX
        self.gt_labels = self.maybe_cuda(self.gt_labels)
        if self.params["clear_history"]:
            self.start_sequence()

    def get_action(self, state, instruction):
        """
        Given a DroneState (from PomdpInterface) and instruction, produce a numpy 4D action (x, y, theta, pstop)
        :param state: DroneState object with the raw image from the simulator
        :param instruction: Tokenized instruction given the corpus
        #TODO: Absorb corpus within model
        :return:
        """
        prof = SimpleProfiler(print=True)
        prof.tick(".")
        # TODO: Simplify this
        self.eval()
        images_np_pure = state.image
        state_np = state.state
        state = Variable(none_padded_seq_to_tensor([state_np]))

        #print("Act: " + debug_untokenize_instruction(instruction))

        # Add the batch dimension

        first_step = True
        if instruction == self.prev_instruction:
            first_step = False
        self.prev_instruction = instruction
        if first_step:
            self.get_act_start_pose = self.cam_poses_from_states(state[0:1])

        self.seq_step += 1

        # This is for training the policy to mimic the ground-truth state distribution with oracle actions
        # b_traj_gt_w_select = b_traj_ground_truth[b_plan_mask_t[:, np.newaxis, np.newaxis, np.newaxis].expand_as(b_traj_ground_truth)].view([-1] + gtsize)
        traj_gt_w = Variable(self.gt_labels)
        b_poses = self.cam_poses_from_states(state)
        # TODO: These source and dest should go as arguments to get_maps (in forward pass not params)
        transformer = MapTransformerBase(
            source_map_size=self.params["global_map_size"],
            world_size=self.params["world_size_px"],
            dest_map_size=self.params["local_map_size"])
        self.maybe_cuda(transformer)
        transformer.set_maps(traj_gt_w, None)
        traj_gt_r, _ = transformer.get_maps(b_poses)
        self.clear_inputs("traj_gt_r_select")
        self.clear_inputs("traj_gt_w_select")
        self.keep_inputs("traj_gt_r_select", traj_gt_r)
        self.keep_inputs("traj_gt_w_select", traj_gt_w)

        action = self(traj_gt_r, firstseg=[self.seq_step == 1])

        output_action = action.squeeze().data.cpu().numpy()

        stop_prob = output_action[3]
        output_stop = 1 if stop_prob > self.params["stop_threshold"] else 0
        output_action[3] = output_stop

        return output_action

    def deterministic_action(self, action_mean, action_std, stop_prob):
        batch_size = action_mean.size(0)
        action = Variable(
            empty_float_tensor((batch_size, 4), self.is_cuda,
                               self.cuda_device))
        action[:, 0:3] = action_mean[:, 0:3]
        action[:, 3] = stop_prob
        return action

    def sample_action(self, action_mean, action_std, stop_prob):
        action = torch.normal(action_mean, action_std)
        stop = torch.bernoulli(stop_prob)
        return action, stop

    # This is called before beginning an execution sequence
    def start_sequence(self):
        self.seq_step = 0
        self.reset()
        print("RESETTED!")
        return

    def cam_poses_from_states(self, states):
        cam_pos = states[:, 9:12]
        cam_rot = states[:, 12:16]
        pose = Pose(cam_pos, cam_rot)
        return pose

    def save(self, epoch):
        filename = self.params[
            "map_to_action_file"] + "_" + self.run_name + "_" + str(epoch)
        save_pytorch_model(self.map_to_action, filename)
        print("Saved action model to " + filename)

    def forward(self, traj_gt_r, firstseg=None):
        """
        :param images: BxCxHxW batch of images (observations)
        :param states: BxK batch of drone states
        :param instructions: BxM LongTensor where M is the maximum length of any instruction
        :param instr_lengths: list of len B of integers, indicating length of each instruction
        :param has_obs: list of booleans of length B indicating whether the given element in the sequence has an observation
        :param yield_semantic_maps: If true, will not compute actions (full model), but return the semantic maps that
            were built along the way in response to the images. This is ugly, but allows code reuse
        :return:
        """
        action_pred = self.map_to_action(traj_gt_r,
                                         None,
                                         fistseg_mask=firstseg)
        out_action = self.deterministic_action(action_pred[:, 0:3], None,
                                               action_pred[:, 3])
        self.keep_inputs("action", out_action)
        self.prof.tick("map_to_action")

        return out_action

    def maybe_cuda(self, tensor):
        if self.is_cuda:
            if False:
                if type(tensor) is Variable:
                    tensor.data.pin_memory()
                elif type(tensor) is Pose:
                    pass
                elif type(tensor) is torch.FloatTensor:
                    tensor.pin_memory()
            return tensor.cuda()
        else:
            return tensor

    def cuda_var(self, tensor):
        return cuda_var(tensor, self.is_cuda, self.cuda_device)

    # Forward pass for training (with batch optimizations
    def sup_loss_on_batch(self, batch, eval):
        self.prof.tick("out")

        action_loss_total = Variable(
            empty_float_tensor([1], self.is_cuda, self.cuda_device))

        if batch is None:
            print("Skipping None Batch")
            return action_loss_total

        actions = self.maybe_cuda(batch["actions"])
        states = self.maybe_cuda(batch["states"])

        firstseg_mask = batch["firstseg_mask"]

        # Auxiliary labels
        traj_ground_truth_select = self.maybe_cuda(batch["traj_ground_truth"])
        # stops = self.maybe_cuda(batch["stops"])
        metadata = batch["md"]
        batch_size = actions.size(0)
        count = 0

        # Loop thru batch
        for b in range(batch_size):
            seg_idx = -1

            self.reset()

            self.prof.tick("out")
            b_seq_len = len_until_nones(metadata[b])

            # TODO: Generalize this
            # Slice the data according to the sequence length
            b_metadata = metadata[b][:b_seq_len]
            b_actions = actions[b][:b_seq_len]
            b_traj_ground_truth_select = traj_ground_truth_select[b]
            b_states = states[b][:b_seq_len]

            self.keep_inputs("traj_gt_global_select",
                             b_traj_ground_truth_select)

            #b_firstseg = get_obs_mask_segstart(b_metadata)
            b_firstseg = firstseg_mask[b][:b_seq_len]

            # ----------------------------------------------------------------------------
            # Optional Auxiliary Inputs
            # ----------------------------------------------------------------------------
            gtsize = list(b_traj_ground_truth_select.size())[1:]
            b_poses = self.cam_poses_from_states(b_states)
            # TODO: These source and dest should go as arguments to get_maps (in forward pass not params)
            transformer = MapTransformerBase(
                source_map_size=self.params["global_map_size"],
                world_size=self.params["world_size_px"],
                dest_map_size=self.params["local_map_size"])
            self.maybe_cuda(transformer)
            transformer.set_maps(b_traj_ground_truth_select, None)
            traj_gt_local_select, _ = transformer.get_maps(b_poses)
            self.keep_inputs("traj_gt_r_select", traj_gt_local_select)
            self.keep_inputs("traj_gt_w_select", b_traj_ground_truth_select)

            # ----------------------------------------------------------------------------

            self.prof.tick("inputs")

            actions = self(traj_gt_local_select, firstseg=b_firstseg)
            action_losses, _ = self.action_loss(b_actions,
                                                actions,
                                                batchreduce=False)
            action_losses = self.action_loss.batch_reduce_loss(action_losses)
            action_loss = self.action_loss.reduce_loss(action_losses)
            action_loss_total = action_loss
            count += b_seq_len

            self.prof.tick("loss")

        action_loss_avg = action_loss_total / (count + 1e-9)
        prefix = self.model_name + ("/eval" if eval else "/train")
        self.writer.add_scalar(prefix + "/action_loss",
                               action_loss_avg.data.cpu()[0], self.get_iter())

        self.prof.tick("out")

        prefix = self.model_name + ("/eval" if eval else "/train")
        self.writer.add_dict(prefix, get_current_meters(), self.get_iter())

        self.inc_iter()

        self.prof.tick("summaries")
        self.prof.loop()
        self.prof.print_stats(1)

        return action_loss_avg

    def get_dataset(self, data=None, envs=None, dataset_name=None, eval=False):
        # TODO: Maybe use eval here
        data_sources = []
        data_sources.append(aup.PROVIDER_TRAJECTORY_GROUND_TRUTH_STATIC)
        return SegmentDataset(data=data,
                              env_list=envs,
                              dataset_name=dataset_name,
                              aux_provider_names=data_sources,
                              segment_level=True)
コード例 #14
0
ファイル: model_sm_rss_global.py プロジェクト: dxsun/drif
class ModelTrajectoryTopDown(ModuleWithAuxiliaries):

    def __init__(self, run_name="", model_class=MODEL_RSS,
                 aux_class_features=False, aux_grounding_features=False,
                 aux_class_map=False, aux_grounding_map=False, aux_goal_map=False,
                 aux_lang=False, aux_traj=False, rot_noise=False, pos_noise=False):

        super(ModelTrajectoryTopDown, self).__init__()
        self.model_name = "sm_trajectory" + str(model_class)
        self.model_class = model_class
        print("Init model of type: ", str(model_class))
        self.run_name = run_name
        self.writer = LoggingSummaryWriter(log_dir="runs/" + run_name)

        self.params = get_current_parameters()["Model"]
        self.aux_weights = get_current_parameters()["AuxWeights"]

        self.prof = SimpleProfiler(torch_sync=PROFILE, print=PROFILE)
        self.iter = nn.Parameter(torch.zeros(1), requires_grad=False)

        # Auxiliary Objectives
        self.use_aux_class_features = aux_class_features
        self.use_aux_grounding_features = aux_grounding_features
        self.use_aux_class_on_map = aux_class_map
        self.use_aux_grounding_on_map = aux_grounding_map
        self.use_aux_goal_on_map = aux_goal_map
        self.use_aux_lang = aux_lang
        self.use_aux_traj_on_map = aux_traj
        self.use_aux_reg_map = self.aux_weights["regularize_map"]

        self.use_rot_noise = rot_noise
        self.use_pos_noise = pos_noise


        # Path-pred FPV model definition
        # --------------------------------------------------------------------------------------------------------------

        self.img_to_features_w = FPVToGlobalMap(
            source_map_size=self.params["global_map_size"], world_size_px=self.params["world_size_px"], world_size=self.params["world_size_m"],
            res_channels=self.params["resnet_channels"], map_channels=self.params["feature_channels"],
            img_w=self.params["img_w"], img_h=self.params["img_h"], img_dbg=IMG_DBG)

        self.map_accumulator_w = LeakyIntegratorGlobalMap(source_map_size=self.params["global_map_size"], world_in_map_size=self.params["world_size_px"])

        # Pre-process the accumulated map to do language grounding if necessary - in the world reference frame
        if self.use_aux_grounding_on_map and not self.use_aux_grounding_features:
            self.map_processor_a_w = LangFilterMapProcessor(
                source_map_size=self.params["global_map_size"],
                world_size=self.params["world_size_px"],
                embed_size=self.params["emb_size"],
                in_channels=self.params["feature_channels"],
                out_channels=self.params["relevance_channels"],
                spatial=False, cat_out=True)
        else:
            self.map_processor_a_w = IdentityMapProcessor(source_map_size=self.params["global_map_size"], world_size=self.params["world_size_px"])

        if self.use_aux_goal_on_map:
            self.map_processor_b_r = LangFilterMapProcessor(source_map_size=self.params["local_map_size"],
                                                            world_size=self.params["world_size_px"],
                                                            embed_size=self.params["emb_size"],
                                                            in_channels=self.params["relevance_channels"],
                                                            out_channels=self.params["goal_channels"],
                                                            spatial=True, cat_out=True)
        else:
            self.map_processor_b_r = IdentityMapProcessor(source_map_size=self.params["local_map_size"],
                                                          world_size=self.params["world_size_px"])

        pred_channels = self.params["goal_channels"] + self.params["relevance_channels"]

        # Common
        # --------------------------------------------------------------------------------------------------------------

        # Sentence Embedding
        self.sentence_embedding = SentenceEmbeddingSimple(
            self.params["word_emb_size"], self.params["emb_size"], self.params["emb_layers"])

        self.map_transform_w_to_r = MapTransformerBase(source_map_size=self.params["global_map_size"],
                                                       dest_map_size=self.params["local_map_size"],
                                                       world_size=self.params["world_size_px"])
        self.map_transform_r_to_w = MapTransformerBase(source_map_size=self.params["local_map_size"],
                                                       dest_map_size=self.params["global_map_size"],
                                                       world_size=self.params["world_size_px"])

        # Batch select is used to drop and forget semantic maps at those timestaps that we're not planning in
        self.batch_select = MapBatchSelect()
        # Since we only have path predictions for some timesteps (the ones not dropped above), we use this to fill
        # in the missing pieces by reorienting the past trajectory prediction into the frame of the current timestep
        self.map_batch_fill_missing = MapBatchFillMissing(self.params["local_map_size"], self.params["world_size_px"])

        # Passing true to freeze will freeze these weights regardless of whether they've been explicitly reloaded or not
        enable_weight_saving(self.sentence_embedding, "sentence_embedding", alwaysfreeze=False)

        # Output an action given the global semantic map
        if self.params["map_to_action"] == "downsample2":
            self.map_to_action = EgoMapToActionTriplet(
                map_channels=self.params["map_to_act_channels"],
                map_size=self.params["local_map_size"],
                other_features_size=self.params["emb_size"])

        elif self.params["map_to_action"] == "cropped":
            self.map_to_action = CroppedMapToActionTriplet(
                map_channels=self.params["map_to_act_channels"],
                map_size=self.params["local_map_size"],
                other_features_size=self.params["emb_size"]
            )

        # Don't freeze the trajectory to action weights, because it will be pre-trained during path-prediction training
        # and finetuned on all timesteps end-to-end
        enable_weight_saving(self.map_to_action, "map_to_action", alwaysfreeze=False, neverfreeze=True)

        # Auxiliary Objectives
        # --------------------------------------------------------------------------------------------------------------

        # We add all auxiliaries that are necessary. The first argument is the auxiliary name, followed by parameters,
        # followed by variable number of names of inputs. ModuleWithAuxiliaries will automatically collect these inputs
        # that have been saved with keep_auxiliary_input() during execution
        if aux_class_features:
            self.add_auxiliary(ClassAuxiliary2D("aux_class", None,  self.params["feature_channels"], self.params["num_landmarks"], self.params["dropout"],
                                                "fpv_features", "lm_pos_fpv", "lm_indices"))
        if aux_grounding_features:
            self.add_auxiliary(ClassAuxiliary2D("aux_ground", None, self.params["relevance_channels"], 2, self.params["dropout"],
                                                "fpv_features_g", "lm_pos_fpv", "lm_mentioned"))
        if aux_class_map:
            self.add_auxiliary(ClassAuxiliary2D("aux_class_map", self.params["world_size_px"], self.params["feature_channels"], self.params["num_landmarks"], self.params["dropout"],
                                                "map_s_w_select", "lm_pos_map_select", "lm_indices_select"))
        if aux_grounding_map:
            self.add_auxiliary(ClassAuxiliary2D("aux_grounding_map", self.params["world_size_px"], self.params["relevance_channels"], 2, self.params["dropout"],
                                                "map_a_w_select", "lm_pos_map_select", "lm_mentioned_select"))
        if aux_goal_map:
            self.add_auxiliary(GoalAuxiliary2D("aux_goal_map", self.params["goal_channels"], self.params["world_size_px"],
                                               "map_b_w", "goal_pos_map"))
        # RSS model uses templated data for landmark and side prediction
        if self.use_aux_lang and self.params["templates"]:
            self.add_auxiliary(ClassAuxiliary("aux_lang_lm", self.params["emb_size"], self.params["num_landmarks"], 1,
                                                "sentence_embed", "lm_mentioned_tplt"))
            self.add_auxiliary(ClassAuxiliary("aux_lang_side", self.params["emb_size"], self.params["num_sides"], 1,
                                                "sentence_embed", "side_mentioned_tplt"))
        # CoRL model uses alignment-model groundings
        elif self.use_aux_lang:
            # one output for each landmark, 2 classes per output. This is for finetuning, so use the embedding that's gonna be fine tuned
            self.add_auxiliary(ClassAuxiliary("aux_lang_lm_nl", self.params["emb_size"], 2, self.params["num_landmarks"],
                                                "sentence_embed", "lang_lm_mentioned"))
        if self.use_aux_traj_on_map:
            self.add_auxiliary(PathAuxiliary2D("aux_path", "map_b_r_select", "traj_gt_r_select"))

        if self.use_aux_reg_map:
            self.add_auxiliary(FeatureRegularizationAuxiliary2D("aux_regularize_features", None, "l1",
                                                                "map_s_w_select", "lm_pos_map_select"))

        self.goal_good_criterion = GoalPredictionGoodCriterion(ok_distance=3.2)
        self.goal_acc_meter = MovingAverageMeter(10)

        self.print_auxiliary_info()

        self.action_loss = ActionLoss()

        self.env_id = None
        self.prev_instruction = None
        self.seq_step = 0

    # TODO: Try to hide these in a superclass or something. They take up a lot of space:
    def cuda(self, device=None):
        ModuleWithAuxiliaries.cuda(self, device)
        self.sentence_embedding.cuda(device)
        self.map_accumulator_w.cuda(device)
        self.map_processor_a_w.cuda(device)
        self.map_processor_b_r.cuda(device)
        self.img_to_features_w.cuda(device)
        self.map_to_action.cuda(device)
        self.action_loss.cuda(device)
        self.map_batch_fill_missing.cuda(device)
        self.map_transform_w_to_r.cuda(device)
        self.map_transform_r_to_w.cuda(device)
        self.batch_select.cuda(device)
        self.map_batch_fill_missing.cuda(device)
        return self

    def get_iter(self):
        return int(self.iter.data[0])

    def inc_iter(self):
        self.iter += 1

    def init_weights(self):
        self.img_to_features_w.init_weights()
        self.map_accumulator_w.init_weights()
        self.sentence_embedding.init_weights()
        self.map_to_action.init_weights()
        self.map_processor_a_w.init_weights()
        self.map_processor_b_r.init_weights()

    def reset(self):
        # TODO: This is error prone. Create a class StatefulModule, iterate submodules and reset all stateful modules
        super(ModelTrajectoryTopDown, self).reset()
        self.sentence_embedding.reset()
        self.img_to_features_w.reset()
        self.map_accumulator_w.reset()
        self.map_processor_a_w.reset()
        self.map_processor_b_r.reset()
        self.map_transform_w_to_r.reset()
        self.map_transform_r_to_w.reset()
        self.map_batch_fill_missing.reset()
        self.prev_instruction = None

    def setEnvContext(self, context):
        print("Set env context to: " + str(context))
        self.env_id = context["env_id"]

    def save_viz(self, images_in):
        imsave(get_viz_dir() + "fpv_" + str(self.seq_step) + ".png", images_in)
        features_cam = self.get_inputs_batch("fpv_features")[-1, 0, 0:3]
        save_tensor_as_img(features_cam, "F_c", self.env_id)
        feature_map_torch = self.get_inputs_batch("f_w")[-1, 0, 0:3]
        save_tensor_as_img(feature_map_torch, "F_w", self.env_id)
        coverage_map_torch = self.get_inputs_batch("m_w")[-1, 0, 0:3]
        save_tensor_as_img(coverage_map_torch, "M_w", self.env_id)
        semantic_map_torch = self.get_inputs_batch("map_s_w_select")[-1, 0, 0:3]
        save_tensor_as_img(semantic_map_torch, "S_w", self.env_id)
        relmap_torch = self.get_inputs_batch("map_a_w_select")[-1, 0, 0:3]
        save_tensor_as_img(relmap_torch, "R_w", self.env_id)
        relmap_r_torch = self.get_inputs_batch("map_a_r_select")[-1, 0, 0:3]
        save_tensor_as_img(relmap_r_torch, "R_r", self.env_id)
        goalmap_torch = self.get_inputs_batch("map_b_w_select")[-1, 0, 0:3]
        save_tensor_as_img(goalmap_torch, "G_w", self.env_id)
        goalmap_r_torch = self.get_inputs_batch("map_b_r_select")[-1, 0, 0:3]
        save_tensor_as_img(goalmap_r_torch, "G_r", self.env_id)

        action = self.get_inputs_batch("action")[-1].data.cpu().squeeze().numpy()
        action_fname = self.get_viz_dir() + "action_" + str(self.seq_step) + ".png"
        Presenter().save_action(action, action_fname, "")

    def get_action(self, state, instruction):
        """
        Given a DroneState (from PomdpInterface) and instruction, produce a numpy 4D action (x, y, theta, pstop)
        :param state: DroneState object with the raw image from the simulator
        :param instruction: Tokenized instruction given the corpus
        #TODO: Absorb corpus within model
        :return:
        """
        # TODO: Simplify this
        self.eval()
        images_np_pure = state.image
        state_np = state.state

        #print("Act: " + debug_untokenize_instruction(instruction))

        images_np = standardize_image(images_np_pure)
        image_fpv = Variable(none_padded_seq_to_tensor([images_np]))
        state = Variable(none_padded_seq_to_tensor([state_np]))
        # Add the batch dimension

        first_step = True
        if instruction == self.prev_instruction:
            first_step = False
        self.prev_instruction = instruction

        img_in_t = image_fpv
        img_in_t.volatile = True

        instr_len = [len(instruction)] if instruction is not None else None
        instruction = torch.LongTensor(instruction).unsqueeze(0)
        instruction = cuda_var(instruction, self.is_cuda, self.cuda_device)

        state.volatile = True

        if self.is_cuda:
            if img_in_t is not None:
                img_in_t = img_in_t.cuda(self.cuda_device)
            state = state.cuda(self.cuda_device)

        step_enc = None
        plan_now = None

        self.seq_step += 1

        action = self(img_in_t, state, instruction, instr_len, plan=plan_now, pos_enc=step_enc)

        # Save materials for paper and presentation
        if False:
            self.save_viz(images_np_pure)

        output_action = action.squeeze().data.cpu().numpy()
        stop_prob = output_action[3]
        output_stop = 1 if stop_prob > 0.5 else 0
        output_action[3] = output_stop

        return output_action

    def deterministic_action(self, action_mean, action_std, stop_prob):
        batch_size = action_mean.size(0)
        action = Variable(empty_float_tensor((batch_size, 4), self.is_cuda, self.cuda_device))
        action[:, 0:3] = action_mean[:, 0:3]
        action[:, 3] = stop_prob
        return action

    def sample_action(self, action_mean, action_std, stop_prob):
        action = torch.normal(action_mean, action_std)
        stop = torch.bernoulli(stop_prob)
        return action, stop

    # This is called before beginning an execution sequence
    def start_sequence(self):
        self.seq_step = 0
        self.reset()
        print("RESETTED!")
        return

    # TODO: Move this somewhere and standardize
    def cam_poses_from_states(self, states):
        cam_pos = states[:, 9:12]
        cam_rot = states[:, 12:16]

        pos_variance = 0
        rot_variance = 0
        if self.use_pos_noise:
            pos_variance = self.params["noisy_pos_variance"]
        if self.use_rot_noise:
            rot_variance = self.params["noisy_rot_variance"]

        pose = Pose(cam_pos, cam_rot)
        if self.use_pos_noise or self.use_rot_noise:
            pose = get_noisy_poses_torch(pose, pos_variance, rot_variance, cuda=self.is_cuda, cuda_device=self.cuda_device)
        return pose

    def forward(self, images, states, instructions, instr_lengths, has_obs=None, plan=None, save_maps_only=False, pos_enc=None, noisy_poses=None):
        """
        :param images: BxCxHxW batch of images (observations)
        :param states: BxK batch of drone states
        :param instructions: BxM LongTensor where M is the maximum length of any instruction
        :param instr_lengths: list of len B of integers, indicating length of each instruction
        :param has_obs: list of booleans of length B indicating whether the given element in the sequence has an observation
        :param yield_semantic_maps: If true, will not compute actions (full model), but return the semantic maps that
            were built along the way in response to the images. This is ugly, but allows code reuse
        :return:
        """
        cam_poses = self.cam_poses_from_states(states)
        g_poses = None#[None for pose in cam_poses]
        self.prof.tick("out")

        #print("Trn: " + debug_untokenize_instruction(instructions[0].data[:instr_lengths[0]]))

        # Calculate the instruction embedding
        if instructions is not None:
            # TODO: Take batch of instructions and their lengths, return batch of embeddings. Store the last one as internal state
            sent_embeddings = self.sentence_embedding(instructions, instr_lengths)
            self.keep_inputs("sentence_embed", sent_embeddings)
        else:
            sent_embeddings = self.sentence_embedding.get()

        self.prof.tick("embed")

        # Extract and project features onto the egocentric frame for each image
        features_w, coverages_w = self.img_to_features_w(images, cam_poses, sent_embeddings, self, show="")
        self.prof.tick("img_to_map_frame")
        self.keep_inputs("f_w", features_w)
        self.keep_inputs("m_w", coverages_w)

        # Accumulate the egocentric features in a global map
        maps_w = self.map_accumulator_w(features_w, coverages_w, add_mask=has_obs, show="acc" if IMG_DBG else "")
        map_poses_w = g_poses

        # TODO: Maybe keep maps_w if necessary
        #self.keep_inputs("map_sm_local", maps_m)
        self.prof.tick("map_accumulate")

        # Throw away those timesteps that don't correspond to planning timesteps
        maps_w_select, map_poses_w_select, cam_poses_select, noisy_poses_select, _, sent_embeddings_select, pos_enc = \
            self.batch_select(maps_w, map_poses_w, cam_poses, noisy_poses, None, sent_embeddings, pos_enc, plan)

        # Only process the maps on planning timesteps
        if len(maps_w_select) > 0:
            self.keep_inputs("map_s_w_select", maps_w_select)
            self.prof.tick("batch_select")

            # Process the map via the two map_procesors
            # Do grounding of objects in the map chosen to do so
            maps_w_select, map_poses_w_select = self.map_processor_a_w(maps_w_select, sent_embeddings_select, map_poses_w_select, show="")
            self.keep_inputs("map_a_w_select", maps_w_select)

            self.prof.tick("map_proc_gnd")

            self.map_transform_w_to_r.set_maps(maps_w_select, map_poses_w_select)
            maps_m_select, map_poses_m_select = self.map_transform_w_to_r.get_maps(cam_poses_select)

            self.keep_inputs("map_a_r_select", maps_w_select)
            self.prof.tick("transform_w_to_r")

            self.keep_inputs("map_a_r_perturbed_select", maps_m_select)

            self.prof.tick("map_perturb")

            # Include positional encoding for path prediction
            if pos_enc is not None:
                sent_embeddings_pp = torch.cat([sent_embeddings_select, pos_enc.unsqueeze(1)], dim=1)
            else:
                sent_embeddings_pp = sent_embeddings_select

            # Process the map via the two map_procesors (e.g. predict the trajectory that we'll be taking)
            maps_m_select, map_poses_m_select = self.map_processor_b_r(maps_m_select, sent_embeddings_pp, map_poses_m_select)

            self.keep_inputs("map_b_r_select", maps_m_select)

            if True:
                self.map_transform_r_to_w.set_maps(maps_m_select, map_poses_m_select)
                maps_b_w_select, _ = self.map_transform_r_to_w.get_maps(None)
                self.keep_inputs("map_b_w_select", maps_b_w_select)

            self.prof.tick("map_proc_b")

        else:
            maps_m_select = None

        maps_m, map_poses_m = self.map_batch_fill_missing(maps_m_select, cam_poses, plan, show="")
        self.keep_inputs("map_b_r", maps_m)
        self.prof.tick("map_fill_missing")

        # Keep global maps for auxiliary objectives if necessary
        if self.input_required("map_b_w"):
            maps_b, _ = self.map_processor_b_r.get_maps(g_poses)
            self.keep_inputs("map_b_w", maps_b)

        self.prof.tick("keep_global_maps")

        if run_metadata.IS_ROLLOUT:
            pass
            #Presenter().show_image(maps_m.data[0, 0:3], "plan_map_now", torch=True, scale=4, waitkey=1)
            #Presenter().show_image(maps_w.data[0, 0:3], "sm_map_now", torch=True, scale=4, waitkey=1)
        self.prof.tick("viz")

        # Output the final action given the processed map
        action_pred = self.map_to_action(maps_m, sent_embeddings)
        out_action = self.deterministic_action(action_pred[:, 0:3], None, action_pred[:, 3])

        self.keep_inputs("action", out_action)
        self.prof.tick("map_to_action")

        return out_action

    # TODO: The below two methods seem to do the same thing
    def maybe_cuda(self, tensor):
        if self.is_cuda:
            return tensor.cuda()
        else:
            return tensor

    def cuda_var(self, tensor):
        return cuda_var(tensor, self.is_cuda, self.cuda_device)

    # Forward pass for training (with batch optimizations
    def sup_loss_on_batch(self, batch, eval):
        self.prof.tick("out")

        action_loss_total = Variable(empty_float_tensor([1], self.is_cuda, self.cuda_device))

        if batch is None:
            print("Skipping None Batch")
            return action_loss_total

        images = self.maybe_cuda(batch["images"])

        instructions = self.maybe_cuda(batch["instr"])
        instr_lengths = batch["instr_len"]
        states = self.maybe_cuda(batch["states"])
        actions = self.maybe_cuda(batch["actions"])

        # Auxiliary labels
        lm_pos_fpv = batch["lm_pos_fpv"]
        lm_pos_map = batch["lm_pos_map"]
        lm_indices = batch["lm_indices"]
        goal_pos_map = batch["goal_loc"]

        TEMPLATES = True
        if TEMPLATES:
            lm_mentioned_tplt = batch["lm_mentioned_tplt"]
            side_mentioned_tplt = batch["side_mentioned_tplt"]
        else:
            lm_mentioned = batch["lm_mentioned"]
            lang_lm_mentioned = batch["lang_lm_mentioned"]

        # stops = self.maybe_cuda(batch["stops"])
        masks = self.maybe_cuda(batch["masks"])
        # This is the first-timestep metadata
        metadata = batch["md"]

        seq_len = images.size(1)
        batch_size = images.size(0)
        count = 0
        correct_goal_count = 0
        goal_count = 0

        # Loop thru batch
        for b in range(batch_size):
            seg_idx = -1

            self.reset()

            self.prof.tick("out")
            b_seq_len = len_until_nones(metadata[b])

            # TODO: Generalize this
            # Slice the data according to the sequence length
            b_metadata = metadata[b][:b_seq_len]
            b_images = images[b][:b_seq_len]
            b_instructions = instructions[b][:b_seq_len]
            b_instr_len = instr_lengths[b][:b_seq_len]
            b_states = states[b][:b_seq_len]
            b_actions = actions[b][:b_seq_len]
            b_lm_pos_fpv = lm_pos_fpv[b][:b_seq_len]
            b_lm_pos_map = lm_pos_map[b][:b_seq_len]
            b_lm_indices = lm_indices[b][:b_seq_len]
            b_goal_pos = goal_pos_map[b][:b_seq_len]
            if not TEMPLATES:
                b_lang_lm_mentioned = lang_lm_mentioned[b][:b_seq_len]
                b_lm_mentioned = lm_mentioned[b][:b_seq_len]

            b_lm_pos_map = [self.cuda_var(s.long()) if s is not None else None for s in b_lm_pos_map]
            b_lm_pos_fpv = [self.cuda_var((s / RESNET_FACTOR).long()) if s is not None else None for s in b_lm_pos_fpv]
            b_lm_indices = [self.cuda_var(s) if s is not None else None for s in b_lm_indices]
            b_goal_pos = self.cuda_var(b_goal_pos)
            if not TEMPLATES:
                b_lang_lm_mentioned = self.cuda_var(b_lang_lm_mentioned)
                b_lm_mentioned = [self.cuda_var(s) if s is not None else None for s in b_lm_mentioned]

            # TODO: Figure out how to keep these properly. Perhaps as a whole batch is best
            # TODO: Introduce a key-value store (encapsulate instead of inherit)
            self.keep_inputs("lm_pos_fpv", b_lm_pos_fpv)
            self.keep_inputs("lm_pos_map", b_lm_pos_map)
            self.keep_inputs("lm_indices", b_lm_indices)
            self.keep_inputs("goal_pos_map", b_goal_pos)
            if not TEMPLATES:
                self.keep_inputs("lang_lm_mentioned", b_lang_lm_mentioned)
                self.keep_inputs("lm_mentioned", b_lm_mentioned)

            # TODO: Abstract all of these if-elses in a modular way once we know which ones are necessary
            if TEMPLATES:
                b_lm_mentioned_tplt = lm_mentioned_tplt[b][:b_seq_len]
                b_side_mentioned_tplt = side_mentioned_tplt[b][:b_seq_len]
                b_side_mentioned_tplt = self.cuda_var(b_side_mentioned_tplt)
                b_lm_mentioned_tplt = self.cuda_var(b_lm_mentioned_tplt)
                self.keep_inputs("lm_mentioned_tplt", b_lm_mentioned_tplt)
                self.keep_inputs("side_mentioned_tplt", b_side_mentioned_tplt)

                b_lm_mentioned = b_lm_mentioned_tplt


            b_obs_mask = [True for _ in range(b_seq_len)]
            b_plan_mask = [True for _ in range(b_seq_len)]
            b_plan_mask_t_cpu = torch.Tensor(b_plan_mask) == True
            b_plan_mask_t = self.maybe_cuda(b_plan_mask_t_cpu)
            b_pos_enc = None

            # ----------------------------------------------------------------------------
            # Optional Auxiliary Inputs
            # ----------------------------------------------------------------------------
            if self.input_required("lm_pos_map_select"):
                b_lm_pos_map_select = [lm_pos for i,lm_pos in enumerate(b_lm_pos_map) if b_plan_mask[i]]
                self.keep_inputs("lm_pos_map_select", b_lm_pos_map_select)
            if self.input_required("lm_indices_select"):
                b_lm_indices_select = [lm_idx for i,lm_idx in enumerate(b_lm_indices) if b_plan_mask[i]]
                self.keep_inputs("lm_indices_select", b_lm_indices_select)
            if self.input_required("lm_mentioned_select"):
                b_lm_mentioned_select = [lm_m for i,lm_m in enumerate(b_lm_mentioned) if b_plan_mask[i]]
                self.keep_inputs("lm_mentioned_select", b_lm_mentioned_select)

            # ----------------------------------------------------------------------------

            self.prof.tick("inputs")

            actions = self(b_images, b_states, b_instructions, b_instr_len,
                           has_obs=b_obs_mask, plan=b_plan_mask, pos_enc=b_pos_enc)

            action_losses, _ = self.action_loss(b_actions, actions, batchreduce=False)

            self.prof.tick("call")

            action_losses = self.action_loss.batch_reduce_loss(action_losses)
            action_loss = self.action_loss.reduce_loss(action_losses)

            action_loss_total = action_loss
            count += b_seq_len

            self.prof.tick("loss")

        action_loss_avg = action_loss_total / (count + 1e-9)

        self.prof.tick("out")

        # Doing this in the end (outside of se
        aux_losses = self.calculate_aux_loss(reduce_average=True)
        aux_loss = self.combine_aux_losses(aux_losses, self.aux_weights)

        prefix = self.model_name + ("/eval" if eval else "/train")

        self.writer.add_dict(prefix, get_current_meters(), self.get_iter())
        self.writer.add_dict(prefix, aux_losses, self.get_iter())
        self.writer.add_scalar(prefix + "/action_loss", action_loss_avg.data.cpu()[0], self.get_iter())
        # TODO: Log value here
        self.writer.add_scalar(prefix + "/goal_accuracy", self.goal_acc_meter.get(), self.get_iter())

        self.prof.tick("auxiliaries")

        total_loss = action_loss_avg + aux_loss

        self.inc_iter()

        self.prof.tick("summaries")
        self.prof.loop()
        self.prof.print_stats(1)

        return total_loss

    def get_dataset(self, data=None, envs=None, dataset_name=None, eval=False):
        # TODO: Maybe use eval here
        #if self.fpv:
        data_sources = []
        # If we're running auxiliary objectives, we need to include the data sources for the auxiliary labels
        #if self.use_aux_class_features or self.use_aux_class_on_map or self.use_aux_grounding_features or self.use_aux_grounding_on_map:
        #if self.use_aux_goal_on_map:
        data_sources.append(aup.PROVIDER_LM_POS_DATA)
        data_sources.append(aup.PROVIDER_GOAL_POS)
        #data_sources.append(aup.PROVIDER_LANDMARKS_MENTIONED)
        data_sources.append(aup.PROVIDER_LANG_TEMPLATE)

        #if self.use_rot_noise or self.use_pos_noise:
        #    data_sources.append(aup.PROVIDER_POSE_NOISE)

        return SegmentDataset(data=data, env_list=envs, dataset_name=dataset_name, aux_provider_names=data_sources, segment_level=True)
コード例 #15
0
ファイル: model_sm_rss_global.py プロジェクト: dxsun/drif
    def __init__(self, run_name="", model_class=MODEL_RSS,
                 aux_class_features=False, aux_grounding_features=False,
                 aux_class_map=False, aux_grounding_map=False, aux_goal_map=False,
                 aux_lang=False, aux_traj=False, rot_noise=False, pos_noise=False):

        super(ModelTrajectoryTopDown, self).__init__()
        self.model_name = "sm_trajectory" + str(model_class)
        self.model_class = model_class
        print("Init model of type: ", str(model_class))
        self.run_name = run_name
        self.writer = LoggingSummaryWriter(log_dir="runs/" + run_name)

        self.params = get_current_parameters()["Model"]
        self.aux_weights = get_current_parameters()["AuxWeights"]

        self.prof = SimpleProfiler(torch_sync=PROFILE, print=PROFILE)
        self.iter = nn.Parameter(torch.zeros(1), requires_grad=False)

        # Auxiliary Objectives
        self.use_aux_class_features = aux_class_features
        self.use_aux_grounding_features = aux_grounding_features
        self.use_aux_class_on_map = aux_class_map
        self.use_aux_grounding_on_map = aux_grounding_map
        self.use_aux_goal_on_map = aux_goal_map
        self.use_aux_lang = aux_lang
        self.use_aux_traj_on_map = aux_traj
        self.use_aux_reg_map = self.aux_weights["regularize_map"]

        self.use_rot_noise = rot_noise
        self.use_pos_noise = pos_noise


        # Path-pred FPV model definition
        # --------------------------------------------------------------------------------------------------------------

        self.img_to_features_w = FPVToGlobalMap(
            source_map_size=self.params["global_map_size"], world_size_px=self.params["world_size_px"], world_size=self.params["world_size_m"],
            res_channels=self.params["resnet_channels"], map_channels=self.params["feature_channels"],
            img_w=self.params["img_w"], img_h=self.params["img_h"], img_dbg=IMG_DBG)

        self.map_accumulator_w = LeakyIntegratorGlobalMap(source_map_size=self.params["global_map_size"], world_in_map_size=self.params["world_size_px"])

        # Pre-process the accumulated map to do language grounding if necessary - in the world reference frame
        if self.use_aux_grounding_on_map and not self.use_aux_grounding_features:
            self.map_processor_a_w = LangFilterMapProcessor(
                source_map_size=self.params["global_map_size"],
                world_size=self.params["world_size_px"],
                embed_size=self.params["emb_size"],
                in_channels=self.params["feature_channels"],
                out_channels=self.params["relevance_channels"],
                spatial=False, cat_out=True)
        else:
            self.map_processor_a_w = IdentityMapProcessor(source_map_size=self.params["global_map_size"], world_size=self.params["world_size_px"])

        if self.use_aux_goal_on_map:
            self.map_processor_b_r = LangFilterMapProcessor(source_map_size=self.params["local_map_size"],
                                                            world_size=self.params["world_size_px"],
                                                            embed_size=self.params["emb_size"],
                                                            in_channels=self.params["relevance_channels"],
                                                            out_channels=self.params["goal_channels"],
                                                            spatial=True, cat_out=True)
        else:
            self.map_processor_b_r = IdentityMapProcessor(source_map_size=self.params["local_map_size"],
                                                          world_size=self.params["world_size_px"])

        pred_channels = self.params["goal_channels"] + self.params["relevance_channels"]

        # Common
        # --------------------------------------------------------------------------------------------------------------

        # Sentence Embedding
        self.sentence_embedding = SentenceEmbeddingSimple(
            self.params["word_emb_size"], self.params["emb_size"], self.params["emb_layers"])

        self.map_transform_w_to_r = MapTransformerBase(source_map_size=self.params["global_map_size"],
                                                       dest_map_size=self.params["local_map_size"],
                                                       world_size=self.params["world_size_px"])
        self.map_transform_r_to_w = MapTransformerBase(source_map_size=self.params["local_map_size"],
                                                       dest_map_size=self.params["global_map_size"],
                                                       world_size=self.params["world_size_px"])

        # Batch select is used to drop and forget semantic maps at those timestaps that we're not planning in
        self.batch_select = MapBatchSelect()
        # Since we only have path predictions for some timesteps (the ones not dropped above), we use this to fill
        # in the missing pieces by reorienting the past trajectory prediction into the frame of the current timestep
        self.map_batch_fill_missing = MapBatchFillMissing(self.params["local_map_size"], self.params["world_size_px"])

        # Passing true to freeze will freeze these weights regardless of whether they've been explicitly reloaded or not
        enable_weight_saving(self.sentence_embedding, "sentence_embedding", alwaysfreeze=False)

        # Output an action given the global semantic map
        if self.params["map_to_action"] == "downsample2":
            self.map_to_action = EgoMapToActionTriplet(
                map_channels=self.params["map_to_act_channels"],
                map_size=self.params["local_map_size"],
                other_features_size=self.params["emb_size"])

        elif self.params["map_to_action"] == "cropped":
            self.map_to_action = CroppedMapToActionTriplet(
                map_channels=self.params["map_to_act_channels"],
                map_size=self.params["local_map_size"],
                other_features_size=self.params["emb_size"]
            )

        # Don't freeze the trajectory to action weights, because it will be pre-trained during path-prediction training
        # and finetuned on all timesteps end-to-end
        enable_weight_saving(self.map_to_action, "map_to_action", alwaysfreeze=False, neverfreeze=True)

        # Auxiliary Objectives
        # --------------------------------------------------------------------------------------------------------------

        # We add all auxiliaries that are necessary. The first argument is the auxiliary name, followed by parameters,
        # followed by variable number of names of inputs. ModuleWithAuxiliaries will automatically collect these inputs
        # that have been saved with keep_auxiliary_input() during execution
        if aux_class_features:
            self.add_auxiliary(ClassAuxiliary2D("aux_class", None,  self.params["feature_channels"], self.params["num_landmarks"], self.params["dropout"],
                                                "fpv_features", "lm_pos_fpv", "lm_indices"))
        if aux_grounding_features:
            self.add_auxiliary(ClassAuxiliary2D("aux_ground", None, self.params["relevance_channels"], 2, self.params["dropout"],
                                                "fpv_features_g", "lm_pos_fpv", "lm_mentioned"))
        if aux_class_map:
            self.add_auxiliary(ClassAuxiliary2D("aux_class_map", self.params["world_size_px"], self.params["feature_channels"], self.params["num_landmarks"], self.params["dropout"],
                                                "map_s_w_select", "lm_pos_map_select", "lm_indices_select"))
        if aux_grounding_map:
            self.add_auxiliary(ClassAuxiliary2D("aux_grounding_map", self.params["world_size_px"], self.params["relevance_channels"], 2, self.params["dropout"],
                                                "map_a_w_select", "lm_pos_map_select", "lm_mentioned_select"))
        if aux_goal_map:
            self.add_auxiliary(GoalAuxiliary2D("aux_goal_map", self.params["goal_channels"], self.params["world_size_px"],
                                               "map_b_w", "goal_pos_map"))
        # RSS model uses templated data for landmark and side prediction
        if self.use_aux_lang and self.params["templates"]:
            self.add_auxiliary(ClassAuxiliary("aux_lang_lm", self.params["emb_size"], self.params["num_landmarks"], 1,
                                                "sentence_embed", "lm_mentioned_tplt"))
            self.add_auxiliary(ClassAuxiliary("aux_lang_side", self.params["emb_size"], self.params["num_sides"], 1,
                                                "sentence_embed", "side_mentioned_tplt"))
        # CoRL model uses alignment-model groundings
        elif self.use_aux_lang:
            # one output for each landmark, 2 classes per output. This is for finetuning, so use the embedding that's gonna be fine tuned
            self.add_auxiliary(ClassAuxiliary("aux_lang_lm_nl", self.params["emb_size"], 2, self.params["num_landmarks"],
                                                "sentence_embed", "lang_lm_mentioned"))
        if self.use_aux_traj_on_map:
            self.add_auxiliary(PathAuxiliary2D("aux_path", "map_b_r_select", "traj_gt_r_select"))

        if self.use_aux_reg_map:
            self.add_auxiliary(FeatureRegularizationAuxiliary2D("aux_regularize_features", None, "l1",
                                                                "map_s_w_select", "lm_pos_map_select"))

        self.goal_good_criterion = GoalPredictionGoodCriterion(ok_distance=3.2)
        self.goal_acc_meter = MovingAverageMeter(10)

        self.print_auxiliary_info()

        self.action_loss = ActionLoss()

        self.env_id = None
        self.prev_instruction = None
        self.seq_step = 0
コード例 #16
0
ファイル: model_gsmn_bidomain.py プロジェクト: pianpwk/drif
class ModelGSMNBiDomain(nn.Module):
    def __init__(self, run_name="", model_instance_name=""):

        super(ModelGSMNBiDomain, self).__init__()
        self.model_name = "gsmn_bidomain"
        self.run_name = run_name
        self.name = model_instance_name
        if not self.name:
            self.name = ""
        self.writer = LoggingSummaryWriter(
            log_dir=f"runs/{run_name}/{self.name}")

        self.params = get_current_parameters()["Model"]
        self.aux_weights = get_current_parameters()["AuxWeights"]
        self.use_aux = self.params["UseAuxiliaries"]

        self.prof = SimpleProfiler(torch_sync=PROFILE, print=PROFILE)
        self.iter = nn.Parameter(torch.zeros(1), requires_grad=False)

        self.tensor_store = KeyTensorStore()
        self.aux_losses = AuxiliaryLosses()

        self.rviz = None
        if self.params.get("rviz"):
            self.rviz = RvizInterface(
                base_name="/gsmn/",
                map_topics=["semantic_map", "grounding_map", "goal_map"],
                markerarray_topics=["instruction"])

        # Path-pred FPV model definition
        # --------------------------------------------------------------------------------------------------------------

        self.img_to_features_w = FPVToGlobalMap(
            source_map_size=self.params["global_map_size"],
            world_size_px=self.params["world_size_px"],
            world_size_m=self.params["world_size_m"],
            res_channels=self.params["resnet_channels"],
            map_channels=self.params["feature_channels"],
            img_w=self.params["img_w"],
            img_h=self.params["img_h"],
            cam_h_fov=self.params["cam_h_fov"],
            img_dbg=IMG_DBG)

        self.map_accumulator_w = LeakyIntegratorGlobalMap(
            source_map_size=self.params["global_map_size"],
            world_size_px=self.params["world_size_px"],
            world_size_m=self.params["world_size_m"])

        # Pre-process the accumulated map to do language grounding if necessary - in the world reference frame
        if self.use_aux[
                "grounding_map"] and not self.use_aux["grounding_features"]:
            self.map_processor_a_w = LangFilterMapProcessor(
                embed_size=self.params["emb_size"],
                in_channels=self.params["feature_channels"],
                out_channels=self.params["relevance_channels"],
                spatial=False,
                cat_out=True)
        else:
            self.map_processor_a_w = IdentityMapProcessor(
                source_map_size=self.params["global_map_size"],
                world_size_px=self.params["world_size_px"],
                world_size_m=self.params["world_size_m"])

        if self.use_aux["goal_map"]:
            self.map_processor_b_r = LangFilterMapProcessor(
                embed_size=self.params["emb_size"],
                in_channels=self.params["relevance_channels"],
                out_channels=self.params["goal_channels"],
                spatial=self.params["spatial_goal_filter"],
                cat_out=self.params["cat_rel_and_goal"])
        else:
            self.map_processor_b_r = IdentityMapProcessor(
                source_map_size=self.params["local_map_size"],
                world_size_px=self.params["world_size_px"],
                world_size_m=self.params["world_size_m"])

        # Common
        # --------------------------------------------------------------------------------------------------------------

        # Sentence Embedding
        self.sentence_embedding = SentenceEmbeddingSimple(
            self.params["word_emb_size"],
            self.params["emb_size"],
            self.params["emb_layers"],
            dropout=0.0)

        self.map_transform_w_to_r = MapTransformerBase(
            source_map_size=self.params["global_map_size"],
            dest_map_size=self.params["local_map_size"],
            world_size_px=self.params["world_size_px"],
            world_size_m=self.params["world_size_m"])
        self.map_transform_r_to_w = MapTransformerBase(
            source_map_size=self.params["local_map_size"],
            dest_map_size=self.params["global_map_size"],
            world_size_px=self.params["world_size_px"],
            world_size_m=self.params["world_size_m"])

        # Output an action given the global semantic map
        if self.params["map_to_action"] == "downsample2":
            self.map_to_action = EgoMapToActionTriplet(
                map_channels=self.params["map_to_act_channels"],
                map_size=self.params["local_map_size"],
                other_features_size=self.params["emb_size"])

        elif self.params["map_to_action"] == "cropped":
            self.map_to_action = CroppedMapToActionTriplet(
                map_channels=self.params["map_to_act_channels"],
                map_size=self.params["local_map_size"])

        # Auxiliary Objectives
        # --------------------------------------------------------------------------------------------------------------

        # We add all auxiliaries that are necessary. The first argument is the auxiliary name, followed by parameters,
        # followed by variable number of names of inputs. ModuleWithAuxiliaries will automatically collect these inputs
        # that have been saved with keep_auxiliary_input() during execution
        if self.use_aux["class_features"]:
            self.aux_losses.add_auxiliary(
                ClassAuxiliary2D("aux_class", self.params["feature_channels"],
                                 self.params["num_landmarks"],
                                 self.params["dropout"], "fpv_features",
                                 "lm_pos_fpv_features", "lm_indices",
                                 "tensor_store"))
        if self.use_aux["grounding_features"]:
            self.aux_losses.add_auxiliary(
                ClassAuxiliary2D("aux_ground",
                                 self.params["relevance_channels"], 2,
                                 self.params["dropout"], "fpv_features_g",
                                 "lm_pos_fpv_features", "lm_mentioned",
                                 "tensor_store"))
        if self.use_aux["class_map"]:
            self.aux_losses.add_auxiliary(
                ClassAuxiliary2D("aux_class_map",
                                 self.params["feature_channels"],
                                 self.params["num_landmarks"],
                                 self.params["dropout"], "map_S_W",
                                 "lm_pos_map", "lm_indices", "tensor_store"))
        if self.use_aux["grounding_map"]:
            self.aux_losses.add_auxiliary(
                ClassAuxiliary2D("aux_grounding_map",
                                 self.params["relevance_channels"], 2,
                                 self.params["dropout"], "map_R_W",
                                 "lm_pos_map", "lm_mentioned", "tensor_store"))
        if self.use_aux["goal_map"]:
            self.aux_losses.add_auxiliary(
                GoalAuxiliary2D("aux_goal_map", self.params["goal_channels"],
                                self.params["global_map_size"], "map_G_W",
                                "goal_pos_map"))
        # RSS model uses templated data for landmark and side prediction
        if self.use_aux["language"] and self.params["templates"]:
            self.aux_losses.add_auxiliary(
                ClassAuxiliary("aux_lang_lm", self.params["emb_size"],
                               self.params["num_landmarks"], 1,
                               "sentence_embed", "lm_mentioned_tplt"))
            self.aux_losses.add_auxiliary(
                ClassAuxiliary("aux_lang_side", self.params["emb_size"],
                               self.params["num_sides"], 1, "sentence_embed",
                               "side_mentioned_tplt"))
        # CoRL model uses alignment-model groundings
        elif self.use_aux["language"]:
            # one output for each landmark, 2 classes per output. This is for finetuning, so use the embedding that's gonna be fine tuned
            self.aux_losses.add_auxiliary(
                ClassAuxiliary("aux_lang_lm_nl", self.params["emb_size"], 2,
                               self.params["num_landmarks"], "sentence_embed",
                               "lang_lm_mentioned"))
        if self.use_aux["l1_regularization"]:
            self.aux_losses.add_auxiliary(
                FeatureRegularizationAuxiliary2D("aux_regularize_features",
                                                 "l1", "map_S_W"))
            self.aux_losses.add_auxiliary(
                FeatureRegularizationAuxiliary2D("aux_regularize_features",
                                                 "l1", "map_R_W"))

        self.goal_acc_meter = MovingAverageMeter(10)

        self.aux_losses.print_auxiliary_info()

        self.action_loss = ActionLoss()

        self.env_id = None
        self.prev_instruction = None
        self.seq_step = 0

    def cuda(self, device=None):
        CudaModule.cuda(self, device)
        self.aux_losses.cuda(device)
        self.sentence_embedding.cuda(device)
        self.map_accumulator_w.cuda(device)
        self.map_processor_a_w.cuda(device)
        self.map_processor_b_r.cuda(device)
        self.img_to_features_w.cuda(device)
        self.map_to_action.cuda(device)
        self.action_loss.cuda(device)
        self.map_transform_w_to_r.cuda(device)
        self.map_transform_r_to_w.cuda(device)
        return self

    def steal_cross_domain_modules(self, other_self):
        # TODO: Consider whether to share auxiliary losses, and if so, all of them?
        self.aux_losses = other_self.aux_losses
        self.action_loss = other_self.action_loss

        # TODO: Make sure that none of these things are stateful, or that there are resets after every forward pass
        self.sentence_embedding = other_self.sentence_embedding
        self.map_accumulator_w = other_self.map_accumulator_w
        self.map_processor_a_w = other_self.map_processor_a_w
        self.map_processor_b_r = other_self.map_processor_b_r
        self.map_to_action = other_self.map_to_action

        # We'll have a separate one of these for each domain
        #self.img_to_features_w = other_self.img_to_features_w

        # TODO: Check that statefulness is not an issue in sharing modules
        # These have no parameters so no point sharing
        #self.map_transform_w_to_r = other_self.map_transform_w_to_r
        #self.map_transform_r_to_w = other_self.map_transform_r_to_w

    def both_domain_parameters(self, other_self):
        # This function iterates and yields parameters from this module and the other module, but does not yield
        # shared parameters twice.
        # First yield all of the other module's parameters
        for p in other_self.parameters():
            yield p
        # Then yield all the parameters from the this module that are not shared with the other one
        for p in self.img_to_features_w.parameters():
            yield p
        return

    def get_iter(self):
        return int(self.iter.data[0])

    def inc_iter(self):
        self.iter += 1

    def load_img_feature_weights(self):
        if self.params.get("load_feature_net"):
            filename = self.params.get("feature_net_filename")
            weights = load_pytorch_model(None, filename)
            prefix = self.params.get("feature_net_tensor_name")
            if prefix:
                weights = find_state_subdict(weights, prefix)
            # TODO: This breaks OOP conventions
            self.img_to_features_w.img_to_features.load_state_dict(weights)
            print(
                f"Loaded pretrained weights from file {filename} with prefix {prefix}"
            )

    def init_weights(self):
        self.img_to_features_w.init_weights()
        self.load_img_feature_weights()
        self.map_accumulator_w.init_weights()
        self.sentence_embedding.init_weights()
        self.map_to_action.init_weights()
        self.map_processor_a_w.init_weights()
        self.map_processor_b_r.init_weights()

    def reset(self):
        self.tensor_store.reset()
        self.sentence_embedding.reset()
        self.img_to_features_w.reset()
        self.map_accumulator_w.reset()
        self.map_transform_w_to_r.reset()
        self.map_transform_r_to_w.reset()
        self.load_img_feature_weights()
        self.prev_instruction = None

    def set_env_context(self, context):
        print("Set env context to: " + str(context))
        self.env_id = context["env_id"]

    def save_viz(self, images_in, instruction):
        # Save incoming images
        imsave(
            os.path.join(get_viz_dir_for_rollout(),
                         "fpv_" + str(self.seq_step) + ".png"), images_in)
        #self.tensor_store.keep_input("fpv_img", images_in)
        # Save all of these tensors from the tensor store as images
        save_tensors_as_images(self.tensor_store, [
            "images_w", "fpv_img", "fpv_features", "map_F_W", "map_M_W",
            "map_S_W", "map_R_W", "map_R_R", "map_G_R", "map_G_W"
        ], str(self.seq_step))

        # Save action as image
        action = self.tensor_store.get_inputs_batch(
            "action")[-1].data.cpu().squeeze().numpy()
        action_fname = get_viz_dir_for_rollout() + "action_" + str(
            self.seq_step) + ".png"
        Presenter().save_action(action, action_fname, "")

        instruction_fname = get_viz_dir_for_rollout() + "instruction.txt"
        with open(instruction_fname, "w") as fp:
            fp.write(instruction)

    def get_action(self, state, instruction):
        """
        Given a DroneState (from PomdpInterface) and instruction, produce a numpy 4D action (x, y, theta, pstop)
        :param state: DroneState object with the raw image from the simulator
        :param instruction: Tokenized instruction given the corpus
        #TODO: Absorb corpus within model
        :return:
        """
        # TODO: Simplify this
        self.eval()
        images_np_pure = state.image
        state_np = state.state

        #print("Act: " + debug_untokenize_instruction(instruction))

        images_np = standardize_image(images_np_pure)
        image_fpv = Variable(none_padded_seq_to_tensor([images_np]))
        state = Variable(none_padded_seq_to_tensor([state_np]))
        # Add the batch dimension

        first_step = True
        if instruction == self.prev_instruction:
            first_step = False
        self.prev_instruction = instruction
        instruction_str = debug_untokenize_instruction(instruction)

        # TODO: Move this to PomdpInterface (for now it's here because this is already visualizing the maps)
        if first_step:
            if self.rviz is not None:
                self.rviz.publish_instruction_text(
                    "instruction", debug_untokenize_instruction(instruction))

        img_in_t = image_fpv
        img_in_t.volatile = True

        instr_len = [len(instruction)] if instruction is not None else None
        instruction = torch.LongTensor(instruction).unsqueeze(0)
        instruction = cuda_var(instruction, self.is_cuda, self.cuda_device)

        state.volatile = True

        if self.is_cuda:
            if img_in_t is not None:
                img_in_t = img_in_t.cuda(self.cuda_device)
            state = state.cuda(self.cuda_device)

        step_enc = None
        plan_now = None

        self.seq_step += 1

        action = self(img_in_t,
                      state,
                      instruction,
                      instr_len,
                      plan=plan_now,
                      pos_enc=step_enc)

        passive_mode_debug_projections = True
        if passive_mode_debug_projections:
            self.show_landmark_locations(loop=False, states=state)
            self.reset()

        # Run auxiliary objectives for debugging purposes (e.g. to compute classification predictions)
        if self.params.get("run_auxiliaries_at_test_time"):
            _, _ = self.aux_losses.calculate_aux_loss(self.tensor_store,
                                                      reduce_average=True)
            overlaid = self.get_overlaid_classification_results(
                whole_batch=False)

        # Save materials for analysis and presentation
        if self.params["write_figures"]:
            self.save_viz(images_np_pure, instruction_str)

        output_action = action.squeeze().data.cpu().numpy()
        stop_prob = output_action[3]
        output_stop = 1 if stop_prob > self.params["stop_p"] else 0
        output_action[3] = output_stop

        return output_action

    def get_overlaid_classification_results(self, map_not_features=False):
        if map_not_features:
            predictions_name = "aux_class_map_predictions"
        else:
            predictions_name = "aux_class_predictions"
        predictions = self.tensor_store.get_latest_input(predictions_name)
        if predictions is None:
            return None
        predictions = predictions[0].detach()
        # Get the 3 channels corresponding to no landmark, banana and gorilla
        predictions = predictions[[0, 3, 24], :, :]
        images = self.tensor_store.get_latest_input("images")[0].detach()
        overlaid = Presenter().overlaid_image(images,
                                              predictions,
                                              gray_bg=True)
        return overlaid

    def deterministic_action(self, action_mean, action_std, stop_prob):
        batch_size = action_mean.size(0)
        action = Variable(
            empty_float_tensor((batch_size, 4), self.is_cuda,
                               self.cuda_device))
        action[:, 0:3] = action_mean[:, 0:3]
        action[:, 3] = stop_prob
        return action

    # This is called before beginning an execution sequence
    def start_sequence(self):
        self.seq_step = 0
        self.reset()
        print("RESETTED!")
        return

    # TODO: Move this somewhere and standardize
    def cam_poses_from_states(self, states):
        cam_pos = states[:, 9:12]
        cam_rot = states[:, 12:16]

        pos_variance = 0
        rot_variance = 0
        if self.params.get("use_pos_noise"):
            pos_variance = self.params["noisy_pos_variance"]
        if self.params.get("use_rot_noise"):
            rot_variance = self.params["noisy_rot_variance"]

        pose = Pose(cam_pos, cam_rot)
        if self.params.get("use_pos_noise") or self.params.get(
                "use_rot_noise"):
            pose = get_noisy_poses_torch(pose,
                                         pos_variance,
                                         rot_variance,
                                         cuda=self.is_cuda,
                                         cuda_device=self.cuda_device)
        return pose

    def forward(self,
                images,
                states,
                instructions,
                instr_lengths,
                has_obs=None,
                plan=None,
                save_maps_only=False,
                pos_enc=None,
                noisy_poses=None,
                halfway=False):
        """
        :param images: BxCxHxW batch of images (observations)
        :param states: BxK batch of drone states
        :param instructions: BxM LongTensor where M is the maximum length of any instruction
        :param instr_lengths: list of len B of integers, indicating length of each instruction
        :param has_obs: list of booleans of length B indicating whether the given element in the sequence has an observation
        :param yield_semantic_maps: If true, will not compute actions (full model), but return the semantic maps that
            were built along the way in response to the images. This is ugly, but allows code reuse
        :return:
        """
        cam_poses = self.cam_poses_from_states(states)
        g_poses = None  #[None for pose in cam_poses]
        self.prof.tick("out")

        #str_instr = debug_untokenize_instruction(instructions[0].data[:instr_lengths[0]])
        #print("Trn: " + str_instr)

        # Calculate the instruction embedding
        if instructions is not None:
            # TODO: Take batch of instructions and their lengths, return batch of embeddings. Store the last one as internal state
            sent_embeddings = self.sentence_embedding(instructions,
                                                      instr_lengths)
            self.tensor_store.keep_inputs("sentence_embed", sent_embeddings)
        else:
            sent_embeddings = self.sentence_embedding.get()

        self.prof.tick("embed")

        # Extract and project features onto the egocentric frame for each image
        features_w, coverages_w = self.img_to_features_w(images,
                                                         cam_poses,
                                                         sent_embeddings,
                                                         self.tensor_store,
                                                         show="")

        # If we're running the model halway, return now. This is to compute enough features for the wasserstein critic, but no more
        if halfway:
            return None

        # Don't back-prop into resnet if we're freezing these features (TODO: instead set requires grad to false)
        if self.params.get("freeze_feature_net"):
            features_w = features_w.detach()

        self.prof.tick("img_to_map_frame")
        self.tensor_store.keep_inputs("images", images)
        self.tensor_store.keep_inputs("map_F_w", features_w)
        self.tensor_store.keep_inputs("map_M_w", coverages_w)

        if run_metadata.IS_ROLLOUT:
            Presenter().show_image(features_w.data[0, 0:3],
                                   "F",
                                   torch=True,
                                   scale=8,
                                   waitkey=1)

        # Accumulate the egocentric features in a global map
        maps_s_w = self.map_accumulator_w(features_w,
                                          coverages_w,
                                          add_mask=has_obs,
                                          show="acc" if IMG_DBG else "")
        map_poses_w = g_poses
        self.tensor_store.keep_inputs("map_S_W", maps_s_w)
        self.prof.tick("map_accumulate")

        Presenter().show_image(maps_s_w.data[0],
                               f"{self.name}_S_map_W",
                               torch=True,
                               scale=4,
                               waitkey=1)

        # Do grounding of objects in the map chosen to do so
        maps_r_w, map_poses_r_w = self.map_processor_a_w(maps_s_w,
                                                         sent_embeddings,
                                                         map_poses_w,
                                                         show="")
        self.tensor_store.keep_inputs("map_R_W", maps_r_w)
        Presenter().show_image(maps_r_w.data[0],
                               f"{self.name}_R_map_W",
                               torch=True,
                               scale=4,
                               waitkey=1)
        self.prof.tick("map_proc_gnd")

        # Transform to drone's reference frame
        self.map_transform_w_to_r.set_maps(maps_r_w, map_poses_r_w)
        maps_r_r, map_poses_r_r = self.map_transform_w_to_r.get_maps(cam_poses)
        self.tensor_store.keep_inputs("map_R_R", maps_r_r)
        self.prof.tick("transform_w_to_r")

        # Predict goal location
        maps_g_r, map_poses_g_r = self.map_processor_b_r(
            maps_r_r, sent_embeddings, map_poses_r_r)
        self.tensor_store.keep_inputs("map_G_R", maps_g_r)

        # Transform back to map frame
        self.map_transform_r_to_w.set_maps(maps_g_r, map_poses_g_r)
        maps_g_w, _ = self.map_transform_r_to_w.get_maps(None)
        self.tensor_store.keep_inputs("map_G_W", maps_g_w)
        self.prof.tick("map_proc_b")

        # Show and publish to RVIZ
        Presenter().show_image(maps_g_w.data[0],
                               f"{self.name}_G_map_W",
                               torch=True,
                               scale=8,
                               waitkey=1)
        if self.rviz:
            self.rviz.publish_map(
                "goal_map", maps_g_w[0].data.cpu().numpy().transpose(1, 2, 0),
                self.params["world_size_m"])

        # Output the final action given the processed map
        action_pred = self.map_to_action(maps_g_r, sent_embeddings)
        out_action = self.deterministic_action(action_pred[:, 0:3], None,
                                               action_pred[:, 3])
        self.tensor_store.keep_inputs("action", out_action)
        self.prof.tick("map_to_action")

        return out_action

    # TODO: The below two methods seem to do the same thing
    def maybe_cuda(self, tensor):
        if self.is_cuda:
            return tensor.cuda()
        else:
            return tensor

    def cuda_var(self, tensor):
        return cuda_var(tensor, self.is_cuda, self.cuda_device)

    def unbatch(self, batch):
        # TODO: Carefully consider this line. This is necessary to reset state between batches (e.g. delete all tensors in the tensor store)
        self.reset()
        # Get rid of the batch dimension for everything
        images = self.maybe_cuda(batch["images"])[0]
        seq_len = images.shape[0]
        instructions = self.maybe_cuda(batch["instr"])[0][:seq_len]
        instr_lengths = batch["instr_len"][0]
        states = self.maybe_cuda(batch["states"])[0]
        actions = self.maybe_cuda(batch["actions"])[0]

        # Auxiliary labels
        lm_pos_fpv = batch["lm_pos_fpv"][0]
        lm_pos_map = batch["lm_pos_map"][0]
        lm_indices = batch["lm_indices"][0]
        goal_pos_map = batch["goal_loc"][0]

        # TODO: Get rid of this. We will have lm_mentioned booleans and lm_mentioned_idx integers and that's it.
        TEMPLATES = True
        if TEMPLATES:
            lm_mentioned_tplt = batch["lm_mentioned_tplt"][0]
            side_mentioned_tplt = batch["side_mentioned_tplt"][0]
            side_mentioned_tplt = self.cuda_var(side_mentioned_tplt)
            lm_mentioned_tplt = self.cuda_var(lm_mentioned_tplt)
            lang_lm_mentioned = None
        else:
            lm_mentioned_tplt = None
            side_mentioned_tplt = None
            lang_lm_mentioned = batch["lang_lm_mentioned"][0]
        lm_mentioned = batch["lm_mentioned"][0]
        # This is the first-timestep metadata
        metadata = batch["md"][0]

        lm_pos_map = [
            torch.from_numpy(
                transformations.pos_m_to_px(
                    p.numpy(), self.params["global_map_size"],
                    self.params["world_size_m"], self.params["world_size_px"]))
            if p is not None else None for p in lm_pos_map
        ]

        goal_pos_map = torch.from_numpy(
            transformations.pos_m_to_px(goal_pos_map.numpy(),
                                        self.params["global_map_size"],
                                        self.params["world_size_m"],
                                        self.params["world_size_px"]))

        lm_pos_map = [
            self.cuda_var(s.long()) if s is not None else None
            for s in lm_pos_map
        ]
        lm_pos_fpv_features = [
            self.cuda_var(
                (s /
                 self.img_to_features_w.img_to_features.get_downscale_factor()
                 ).long()) if s is not None else None for s in lm_pos_fpv
        ]
        lm_pos_fpv_img = [
            self.cuda_var(s.long()) if s is not None else None
            for s in lm_pos_fpv
        ]
        lm_indices = [
            self.cuda_var(s) if s is not None else None for s in lm_indices
        ]
        goal_pos_map = self.cuda_var(goal_pos_map)
        if not TEMPLATES:
            lang_lm_mentioned = self.cuda_var(lang_lm_mentioned)
        lm_mentioned = [
            self.cuda_var(s) if s is not None else None for s in lm_mentioned
        ]

        obs_mask = [True for _ in range(seq_len)]
        plan_mask = [True for _ in range(seq_len)]
        pos_enc = None

        # TODO: Figure out how to keep these properly. Perhaps as a whole batch is best
        self.tensor_store.keep_inputs("lm_pos_fpv_img", lm_pos_fpv_img)
        self.tensor_store.keep_inputs("lm_pos_fpv_features",
                                      lm_pos_fpv_features)
        self.tensor_store.keep_inputs("lm_pos_map", lm_pos_map)
        self.tensor_store.keep_inputs("lm_indices", lm_indices)
        self.tensor_store.keep_inputs("goal_pos_map", goal_pos_map)
        if not TEMPLATES:
            self.tensor_store.keep_inputs("lang_lm_mentioned",
                                          lang_lm_mentioned)
        else:
            self.tensor_store.keep_inputs("lm_mentioned_tplt",
                                          lm_mentioned_tplt)
            self.tensor_store.keep_inputs("side_mentioned_tplt",
                                          side_mentioned_tplt)
        self.tensor_store.keep_inputs("lm_mentioned", lm_mentioned)

        # ----------------------------------------------------------------------------
        # Optional Auxiliary Inputs
        # ----------------------------------------------------------------------------
        #if self.aux_losses.input_required("lm_pos_map"):
        self.tensor_store.keep_inputs("lm_pos_map", lm_pos_map)
        #if self.aux_losses.input_required("lm_indices"):
        self.tensor_store.keep_inputs("lm_indices", lm_indices)
        #if self.aux_losses.input_required("lm_mentioned"):
        self.tensor_store.keep_inputs("lm_mentioned", lm_mentioned)

        return images, instructions, instr_lengths, states, actions, \
               lm_pos_fpv_img, lm_pos_fpv_features, lm_pos_map, lm_indices, goal_pos_map, \
               lm_mentioned, lm_mentioned_tplt, side_mentioned_tplt, lang_lm_mentioned, \
               metadata, obs_mask, plan_mask, pos_enc

    def show_landmark_locations(self, loop=True, states=None):
        # Show landmark locations in first-person images
        img_all = self.tensor_store.get("images")
        img_w_all = self.tensor_store.get("images_w")
        import rollout.run_metadata as md
        if md.IS_ROLLOUT:
            # TODO: Discard this and move this to PomdpInterface or something
            # (it's got nothing to do with the model)
            # load landmark positions from configs
            from data_io.env import load_env_config
            from learning.datasets.aux_data_providers import get_landmark_locations_airsim
            from learning.models.semantic_map.pinhole_camera_inv import PinholeCameraProjection
            projector = PinholeCameraProjection(
                map_size_px=self.params["global_map_size"],
                world_size_px=self.params["world_size_px"],
                world_size_m=self.params["world_size_m"],
                img_x=self.params["img_w"],
                img_y=self.params["img_h"],
                cam_fov=self.params["cam_h_fov"],
                #TODO: Handle correctly
                domain="sim",
                use_depth=False)
            conf_json = load_env_config(md.ENV_ID)
            landmark_names, landmark_indices, landmark_pos = get_landmark_locations_airsim(
                conf_json)
            cam_poses = self.cam_poses_from_states(states)
            cam_pos = cam_poses.position[0]
            cam_rot = cam_poses.orientation[0]
            lm_pos_map_all = []
            lm_pos_img_all = []
            for i, landmark_in_world in enumerate(landmark_pos):
                lm_pos_img, landmark_in_cam, status = projector.world_point_to_image(
                    cam_pos, cam_rot, landmark_in_world)
                lm_pos_map = torch.from_numpy(
                    transformations.pos_m_to_px(
                        landmark_in_world[np.newaxis, :],
                        self.params["global_map_size"],
                        self.params["world_size_m"],
                        self.params["world_size_px"]))
                lm_pos_map_all += [lm_pos_map[0]]
                if lm_pos_img is not None:
                    lm_pos_img_all += [lm_pos_img]

            lm_pos_img_all = [lm_pos_img_all]
            lm_pos_map_all = [lm_pos_map_all]

        else:
            lm_pos_img_all = self.tensor_store.get("lm_pos_fpv_img")
            lm_pos_map_all = self.tensor_store.get("lm_pos_map")

        print("Plotting landmark points")

        for i in range(len(img_all)):
            p = Presenter()
            overlay_fpv = p.overlay_pts_on_image(img_all[i][0],
                                                 lm_pos_img_all[i])
            overlay_map = p.overlay_pts_on_image(img_w_all[i][0],
                                                 lm_pos_map_all[i])
            p.show_image(overlay_fpv, "landmarks_on_fpv_img", scale=8)
            p.show_image(overlay_map, "landmarks_on_map", scale=20)

            if not loop:
                break

    def calc_tensor_statistics(self, prefix, tensor):
        stats = {}
        stats[f"{prefix}_mean"] = torch.mean(tensor).item()
        stats[f"{prefix}_l2"] = torch.norm(tensor).item()
        stats[f"{prefix}_stddev"] = torch.std(tensor).item()
        return stats

    def get_activation_statistics(self, keys):
        stats = {}
        from utils.dict_tools import dict_merge
        for key in keys:
            t = self.tensor_store.get_inputs_batch(key)
            t_stats = self.calc_tensor_statistics(key, t)
            stats = dict_merge(stats, t_stats)
        return stats

    # Forward pass for training (with batch optimizations
    def sup_loss_on_batch(self, batch, eval, halfway=False):
        self.prof.tick("out")

        action_loss_total = Variable(
            empty_float_tensor([1], self.is_cuda, self.cuda_device))

        if batch is None:
            print("Skipping None Batch")
            return action_loss_total

        images, instructions, instr_lengths, states, action_labels, \
        lm_pos_fpv_img, lm_pos_fpv_features, lm_pos_map, lm_indices, goal_pos_map, \
        lm_mentioned, lm_mentioned_tplt, side_mentioned_tplt, lang_lm_mentioned, \
        metadata, obs_mask, plan_mask, pos_enc = self.unbatch(batch)

        # ----------------------------------------------------------------------------
        self.prof.tick("inputs")

        pred_actions = self(images,
                            states,
                            instructions,
                            instr_lengths,
                            has_obs=obs_mask,
                            plan=plan_mask,
                            pos_enc=pos_enc,
                            halfway=halfway)

        # Debugging landmark locations
        if False:
            self.show_landmark_locations()

        # Don't compute any losses - those will not be used. All we care about are the intermediate activations
        if halfway:
            return None, self.tensor_store

        action_losses, _ = self.action_loss(action_labels,
                                            pred_actions,
                                            batchreduce=False)

        self.prof.tick("call")

        action_losses = self.action_loss.batch_reduce_loss(action_losses)
        action_loss = self.action_loss.reduce_loss(action_losses)

        action_loss_total = action_loss

        self.prof.tick("loss")

        aux_losses, aux_metrics = self.aux_losses.calculate_aux_loss(
            self.tensor_store, reduce_average=True)
        aux_loss = self.aux_losses.combine_losses(aux_losses, self.aux_weights)

        #overlaid = self.get_overlaid_classification_results()
        #Presenter().show_image(overlaid, "classification", scale=2)

        prefix = f"{self.model_name}/{'eval' if eval else 'train'}"
        act_prefix = f"{self.model_name}_activations/{'eval' if eval else 'train'}"

        # Mean, stddev, norm of maps
        act_stats = self.get_activation_statistics(
            ["map_S_W", "map_R_W", "map_G_W"])
        self.writer.add_dict(act_prefix, act_stats, self.get_iter())

        self.writer.add_dict(prefix, get_current_meters(), self.get_iter())
        self.writer.add_dict(prefix, aux_losses, self.get_iter())
        self.writer.add_dict(prefix, aux_metrics, self.get_iter())
        self.writer.add_scalar(prefix + "/action_loss",
                               action_loss_total.data.cpu().item(),
                               self.get_iter())
        # TODO: Log value here
        self.writer.add_scalar(prefix + "/goal_accuracy",
                               self.goal_acc_meter.get(), self.get_iter())

        self.prof.tick("auxiliaries")

        total_loss = action_loss_total + aux_loss

        self.inc_iter()

        self.prof.tick("summaries")
        self.prof.loop()
        self.prof.print_stats(1)

        return total_loss, self.tensor_store

    def get_dataset(self,
                    data=None,
                    envs=None,
                    dataset_names=None,
                    dataset_prefix=None,
                    eval=False):
        # TODO: Maybe use eval here
        #if self.fpv:
        data_sources = []
        # If we're running auxiliary objectives, we need to include the data sources for the auxiliary labels
        #if self.use_aux_class_features or self.use_aux_class_on_map or self.use_aux_grounding_features or self.use_aux_grounding_on_map:
        #if self.use_aux_goal_on_map:
        data_sources.append(aup.PROVIDER_LM_POS_DATA)
        data_sources.append(aup.PROVIDER_GOAL_POS)
        #data_sources.append(aup.PROVIDER_LANDMARKS_MENTIONED)
        data_sources.append(aup.PROVIDER_LANG_TEMPLATE)

        #if self.use_rot_noise or self.use_pos_noise:
        #    data_sources.append(aup.PROVIDER_POSE_NOISE)

        return SegmentDataset(data=data,
                              env_list=envs,
                              dataset_names=dataset_names,
                              dataset_prefix=dataset_prefix,
                              aux_provider_names=data_sources,
                              segment_level=True)
コード例 #17
0
class LeakyIntegratorGlobalMap(MapTransformerBase):
    def __init__(self, source_map_size, world_in_map_size, lamda=0.2):
        super(LeakyIntegratorGlobalMap, self).__init__(source_map_size,
                                                       world_in_map_size)
        self.map_size = source_map_size
        self.world_size = world_in_map_size
        self.child_transformer = MapTransformerBase(source_map_size,
                                                    world_in_map_size)
        self.lamda = lamda

        self.prof = SimpleProfiler(torch_sync=PROFILE, print=PROFILE)
        self.map_memory = []

        self.dbg_t = None
        self.seq = 0

    def init_weights(self):
        pass

    def reset(self):
        super(LeakyIntegratorGlobalMap, self).reset()
        self.map_memory = []
        self.child_transformer.reset()
        self.seq = 0

    def cuda(self, device=None):
        MapTransformerBase.cuda(self, device)
        self.child_transformer.cuda(device)
        return self

    def dbg_write_extra(self, map, pose):
        if DebugWriter().should_write():
            map = map[0:1, 0:3]
            self.seq += 1
            # Initialize a transformer module
            if pose is not None:
                if self.dbg_t is None:
                    self.dbg_t = MapTransformerBase(self.map_size,
                                                    self.world_size)
                    if self.is_cuda:
                        self.dbg_t.cuda(self.cuda_device)

                # Transform the prediction to the global frame and write out to disk.
                self.dbg_t.set_map(map, pose)
                map_global, _ = self.dbg_t.get_map(None)
            else:
                map_global = map
            DebugWriter().write_img(map_global[0],
                                    "gif_overlaid",
                                    args={
                                        "world_size": self.world_size,
                                        "name": "sm"
                                    })

    def forward(self,
                images_w,
                coverages_w,
                add_mask=None,
                reset_mask=None,
                show=False):
        #show="li"
        self.prof.tick(".")
        batch_size = len(images_w)

        assert add_mask is None or add_mask[
            0] is not None, "The first observation in a sequence needs to be used!"

        masked_observations_w_add = self.lamda * images_w * coverages_w

        all_maps_out_w = []

        self.prof.tick("maps_to_global")

        # TODO: Draw past trajectory on an extra channel of the semantic map
        # Step 2: Integrate serially in the global frame
        for i in range(batch_size):
            if len(self.map_memory) == 0 or (reset_mask is not None
                                             and reset_mask[i]):
                new_map_w = images_w[i:i + 1]

            # Allow masking of observations
            elif add_mask is None or add_mask[i]:
                # Get the current global-frame map
                map_g = self.map_memory[-1]
                cov_w = coverages_w[i:i + 1]
                obs_cov_g = masked_observations_w_add[i:i + 1]

                # Add the observation into the map using a leaky integrator rule (TODO: Output lamda from model)
                new_map_w = (1 - self.lamda
                             ) * map_g + obs_cov_g + self.lamda * map_g * (
                                 1 - cov_w)
            else:
                new_map_w = self.map_memory[-1]

            self.map_memory.append(new_map_w)
            all_maps_out_w.append(new_map_w)

            if show != "":
                Presenter().show_image(new_map_w.data[0, 0:3],
                                       show,
                                       torch=True,
                                       scale=8,
                                       waitkey=50)

        self.prof.tick("integrate")

        # Step 3: Convert all maps to local frame
        all_maps_w = torch.cat(all_maps_out_w, dim=0)

        # Write gifs for debugging
        #self.dbg_write_extra(all_maps_w, None)

        self.prof.tick("maps_to_local")
        self.prof.loop()
        self.prof.print_stats(10)

        return all_maps_w
コード例 #18
0
ファイル: path_predictor.py プロジェクト: pianpwk/drif
 def cuda(self, device=None):
     MapTransformerBase.cuda(self, device)
     #self.map_filter.cuda(device)
     self.dbg_t = None
     return self
コード例 #19
0
 def cuda(self, device=None):
     MapTransformerBase.cuda(self, device)
     self.child_transformer.cuda(device)
     return self
コード例 #20
0
 def cuda(self, device=None):
     MapTransformerBase.cuda(self, device)
     return self
コード例 #21
0
ファイル: model_gsmn_bidomain.py プロジェクト: pianpwk/drif
    def __init__(self, run_name="", model_instance_name=""):

        super(ModelGSMNBiDomain, self).__init__()
        self.model_name = "gsmn_bidomain"
        self.run_name = run_name
        self.name = model_instance_name
        if not self.name:
            self.name = ""
        self.writer = LoggingSummaryWriter(
            log_dir=f"runs/{run_name}/{self.name}")

        self.params = get_current_parameters()["Model"]
        self.aux_weights = get_current_parameters()["AuxWeights"]
        self.use_aux = self.params["UseAuxiliaries"]

        self.prof = SimpleProfiler(torch_sync=PROFILE, print=PROFILE)
        self.iter = nn.Parameter(torch.zeros(1), requires_grad=False)

        self.tensor_store = KeyTensorStore()
        self.aux_losses = AuxiliaryLosses()

        self.rviz = None
        if self.params.get("rviz"):
            self.rviz = RvizInterface(
                base_name="/gsmn/",
                map_topics=["semantic_map", "grounding_map", "goal_map"],
                markerarray_topics=["instruction"])

        # Path-pred FPV model definition
        # --------------------------------------------------------------------------------------------------------------

        self.img_to_features_w = FPVToGlobalMap(
            source_map_size=self.params["global_map_size"],
            world_size_px=self.params["world_size_px"],
            world_size_m=self.params["world_size_m"],
            res_channels=self.params["resnet_channels"],
            map_channels=self.params["feature_channels"],
            img_w=self.params["img_w"],
            img_h=self.params["img_h"],
            cam_h_fov=self.params["cam_h_fov"],
            img_dbg=IMG_DBG)

        self.map_accumulator_w = LeakyIntegratorGlobalMap(
            source_map_size=self.params["global_map_size"],
            world_size_px=self.params["world_size_px"],
            world_size_m=self.params["world_size_m"])

        # Pre-process the accumulated map to do language grounding if necessary - in the world reference frame
        if self.use_aux[
                "grounding_map"] and not self.use_aux["grounding_features"]:
            self.map_processor_a_w = LangFilterMapProcessor(
                embed_size=self.params["emb_size"],
                in_channels=self.params["feature_channels"],
                out_channels=self.params["relevance_channels"],
                spatial=False,
                cat_out=True)
        else:
            self.map_processor_a_w = IdentityMapProcessor(
                source_map_size=self.params["global_map_size"],
                world_size_px=self.params["world_size_px"],
                world_size_m=self.params["world_size_m"])

        if self.use_aux["goal_map"]:
            self.map_processor_b_r = LangFilterMapProcessor(
                embed_size=self.params["emb_size"],
                in_channels=self.params["relevance_channels"],
                out_channels=self.params["goal_channels"],
                spatial=self.params["spatial_goal_filter"],
                cat_out=self.params["cat_rel_and_goal"])
        else:
            self.map_processor_b_r = IdentityMapProcessor(
                source_map_size=self.params["local_map_size"],
                world_size_px=self.params["world_size_px"],
                world_size_m=self.params["world_size_m"])

        # Common
        # --------------------------------------------------------------------------------------------------------------

        # Sentence Embedding
        self.sentence_embedding = SentenceEmbeddingSimple(
            self.params["word_emb_size"],
            self.params["emb_size"],
            self.params["emb_layers"],
            dropout=0.0)

        self.map_transform_w_to_r = MapTransformerBase(
            source_map_size=self.params["global_map_size"],
            dest_map_size=self.params["local_map_size"],
            world_size_px=self.params["world_size_px"],
            world_size_m=self.params["world_size_m"])
        self.map_transform_r_to_w = MapTransformerBase(
            source_map_size=self.params["local_map_size"],
            dest_map_size=self.params["global_map_size"],
            world_size_px=self.params["world_size_px"],
            world_size_m=self.params["world_size_m"])

        # Output an action given the global semantic map
        if self.params["map_to_action"] == "downsample2":
            self.map_to_action = EgoMapToActionTriplet(
                map_channels=self.params["map_to_act_channels"],
                map_size=self.params["local_map_size"],
                other_features_size=self.params["emb_size"])

        elif self.params["map_to_action"] == "cropped":
            self.map_to_action = CroppedMapToActionTriplet(
                map_channels=self.params["map_to_act_channels"],
                map_size=self.params["local_map_size"])

        # Auxiliary Objectives
        # --------------------------------------------------------------------------------------------------------------

        # We add all auxiliaries that are necessary. The first argument is the auxiliary name, followed by parameters,
        # followed by variable number of names of inputs. ModuleWithAuxiliaries will automatically collect these inputs
        # that have been saved with keep_auxiliary_input() during execution
        if self.use_aux["class_features"]:
            self.aux_losses.add_auxiliary(
                ClassAuxiliary2D("aux_class", self.params["feature_channels"],
                                 self.params["num_landmarks"],
                                 self.params["dropout"], "fpv_features",
                                 "lm_pos_fpv_features", "lm_indices",
                                 "tensor_store"))
        if self.use_aux["grounding_features"]:
            self.aux_losses.add_auxiliary(
                ClassAuxiliary2D("aux_ground",
                                 self.params["relevance_channels"], 2,
                                 self.params["dropout"], "fpv_features_g",
                                 "lm_pos_fpv_features", "lm_mentioned",
                                 "tensor_store"))
        if self.use_aux["class_map"]:
            self.aux_losses.add_auxiliary(
                ClassAuxiliary2D("aux_class_map",
                                 self.params["feature_channels"],
                                 self.params["num_landmarks"],
                                 self.params["dropout"], "map_S_W",
                                 "lm_pos_map", "lm_indices", "tensor_store"))
        if self.use_aux["grounding_map"]:
            self.aux_losses.add_auxiliary(
                ClassAuxiliary2D("aux_grounding_map",
                                 self.params["relevance_channels"], 2,
                                 self.params["dropout"], "map_R_W",
                                 "lm_pos_map", "lm_mentioned", "tensor_store"))
        if self.use_aux["goal_map"]:
            self.aux_losses.add_auxiliary(
                GoalAuxiliary2D("aux_goal_map", self.params["goal_channels"],
                                self.params["global_map_size"], "map_G_W",
                                "goal_pos_map"))
        # RSS model uses templated data for landmark and side prediction
        if self.use_aux["language"] and self.params["templates"]:
            self.aux_losses.add_auxiliary(
                ClassAuxiliary("aux_lang_lm", self.params["emb_size"],
                               self.params["num_landmarks"], 1,
                               "sentence_embed", "lm_mentioned_tplt"))
            self.aux_losses.add_auxiliary(
                ClassAuxiliary("aux_lang_side", self.params["emb_size"],
                               self.params["num_sides"], 1, "sentence_embed",
                               "side_mentioned_tplt"))
        # CoRL model uses alignment-model groundings
        elif self.use_aux["language"]:
            # one output for each landmark, 2 classes per output. This is for finetuning, so use the embedding that's gonna be fine tuned
            self.aux_losses.add_auxiliary(
                ClassAuxiliary("aux_lang_lm_nl", self.params["emb_size"], 2,
                               self.params["num_landmarks"], "sentence_embed",
                               "lang_lm_mentioned"))
        if self.use_aux["l1_regularization"]:
            self.aux_losses.add_auxiliary(
                FeatureRegularizationAuxiliary2D("aux_regularize_features",
                                                 "l1", "map_S_W"))
            self.aux_losses.add_auxiliary(
                FeatureRegularizationAuxiliary2D("aux_regularize_features",
                                                 "l1", "map_R_W"))

        self.goal_acc_meter = MovingAverageMeter(10)

        self.aux_losses.print_auxiliary_info()

        self.action_loss = ActionLoss()

        self.env_id = None
        self.prev_instruction = None
        self.seq_step = 0
コード例 #22
0
class IdentityIntegratorMap(MapTransformerBase):

    def __init__(self, source_map_size, world_size_px, world_size_m):
        super(IdentityIntegratorMap, self).__init__(source_map_size, world_size_px, world_size_m)
        self.map_size = source_map_size
        self.world_size = world_size_px
        self.world_size_m = world_size_m
        self.child_transformer = MapTransformerBase(source_map_size, world_size_px, world_size_m)

        self.prof = SimpleProfiler(torch_sync=PROFILE, print=PROFILE)
        self.map_memory = MapTransformerBase(source_map_size, world_size_px, world_size_m)

        self.last_observation = None

        self.dbg_t = None
        self.seq = 0

    def init_weights(self):
        pass

    def reset(self):
        super(IdentityIntegratorMap, self).reset()
        self.map_memory.reset()
        self.child_transformer.reset()
        self.seq = 0
        self.last_observation = None

    def cuda(self, device=None):
        MapTransformerBase.cuda(self, device)
        self.child_transformer.cuda(device)
        self.map_memory.cuda(device)
        return self

    def dbg_write_extra(self, map, pose):
        if DebugWriter().should_write():
            map = map[0:1, 0:3]
            self.seq += 1
            # Initialize a transformer module
            if pose is not None:
                if self.dbg_t is None:
                    self.dbg_t = MapTransformerBase(self.map_size, self.world_size, self.world_size_m).to(map.device)

                # Transform the prediction to the global frame and write out to disk.
                self.dbg_t.set_map(map, pose)
                map_global, _ = self.dbg_t.get_map(None)
            else:
                map_global = map
            DebugWriter().write_img(map_global[0], "gif_overlaid", args={"world_size": self.world_size, "name": "identity_integrator"})

    def forward(self, images, cam_poses, add_mask=None, show=False):
        #show="li"
        self.prof.tick(".")
        batch_size = len(cam_poses)

        assert add_mask is None or add_mask[0] is not None, "The first observation in a sequence needs to be used!"

        all_maps_out_r = []

        self.prof.tick("maps_to_global")

        # For each timestep, take the latest map that was available, transformed into this timestep
        # Do only a maximum of one transformation for any map to avoid cascading of errors!
        for i in range(batch_size):

            if add_mask is None or add_mask[i]:
                this_obs = (images[i:i+1], cam_poses[i:i+1])
                self.last_observation = this_obs
            else:
                last_obs = self.last_observation
                assert last_obs is not None, "The first observation in a sequence needs to be used!"

                self.child_transformer.set_map(last_obs[0], last_obs[1])
                this_obs = self.child_transformer.get_map(cam_poses[i:i+1])

            all_maps_out_r.append(this_obs[0])

            if show != "":
                Presenter().show_image(this_obs.data[0, 0:3], show, torch=True, scale=8, waitkey=50)

        self.prof.tick("integrate")

        # Step 3: Convert all maps to local frame
        all_maps_r = torch.cat(all_maps_out_r, dim=0)

        # Write gifs for debugging
        self.dbg_write_extra(all_maps_r, None)

        self.set_maps(all_maps_r, cam_poses)

        self.prof.tick("maps_to_local")
        self.prof.loop()
        self.prof.print_stats(10)

        return all_maps_r, cam_poses

    def forward_deprecated(self, images, cam_poses, add_mask=None, show=False):
        #show="li"
        self.prof.tick(".")
        batch_size = len(cam_poses)

        assert add_mask is None or add_mask[0] is not None, "The first observation in a sequence needs to be used!"

        # Step 1: All local maps to global:
        #  TODO: Allow inputing global maps when new projector is ready
        self.child_transformer.set_maps(images, cam_poses)
        observations_g, _ = self.child_transformer.get_maps(None)

        all_maps_out_g = []

        self.prof.tick("maps_to_global")

        # TODO: Draw past trajectory on an extra channel of the semantic map
        # Step 2: Integrate serially in the global frame
        for i in range(batch_size):

            # If we don't have a map yet, initialize the map to this observation
            if self.map_memory.latest_maps is None:
                self.map_memory.set_map(observations_g[i:i+1], None)

            # Allow masking of observations
            if add_mask is None or add_mask[i]:
                # Use the map from this frame
                map_g = observations_g[i:i+1]
                self.map_memory.set_map(map_g, None)
            else:
                # Use the latest available map oriented in global frame
                map_g, _ = self.map_memory.get_map(None)

            if show != "":
                Presenter().show_image(map_g.data[0, 0:3], show, torch=True, scale=8, waitkey=50)

            all_maps_out_g.append(map_g)

        self.prof.tick("integrate")

        # Step 3: Convert all maps to local frame
        all_maps_g = torch.cat(all_maps_out_g, dim=0)

        # Write gifs for debugging
        self.dbg_write_extra(all_maps_g, None)

        self.child_transformer.set_maps(all_maps_g, None)
        maps_r, _ = self.child_transformer.get_maps(cam_poses)
        self.set_maps(maps_r, cam_poses)

        self.prof.tick("maps_to_local")
        self.prof.loop()
        self.prof.print_stats(10)

        return maps_r, cam_poses
コード例 #23
0
 def cuda(self, device=None):
     MapTransformerBase.cuda(self, device)
     self.grid_sampler.cuda(device)
コード例 #24
0
ファイル: identity_accumulator.py プロジェクト: pianpwk/drif
class IdentityMapAccumulator(MapTransformerBase):
    """
    This map accumulator rule simply keeps the latest observation and discards the rest
    """
    def __init__(self, source_map_size, world_size_px, world_size_m):
        super(IdentityMapAccumulator,
              self).__init__(source_map_size, world_size_px, world_size_m)
        self.child_transformer = MapTransformerBase(source_map_size,
                                                    world_size_px,
                                                    world_size_m)

    def reset(self):
        super(IdentityMapAccumulator, self).reset()
        self.child_transformer.reset()

    def cuda(self, device=None):
        MapTransformerBase.cuda(self, device)
        self.child_transformer.cuda(device)
        return self

    def init_weights(self):
        pass

    def forward(self,
                current_maps,
                coverages,
                cam_poses,
                add_mask=None,
                show=""):
        batch_size = len(cam_poses)

        assert add_mask is None or add_mask[
            0] is not None, "The first observation in a sequence needs to be used!"

        # If we don't have masked observations, just return each timestep observations
        if add_mask is None:
            self.set_maps(current_maps, cam_poses)
            return current_maps, cam_poses

        maps_r = []

        # If we have masked observations, then for timesteps where observation is masked (False), get the previous observation
        # rotated to the current frame
        for i in range(batch_size):

            # If we don't have a map yet, rotate this observation and initialize a map
            if self.latest_map is None:
                self.set_map(current_maps[i:i + 1], cam_poses[i:i + 1])
                map_g, _ = self.get_map(None)
                self.set_map(map_g, None)

            # Allow masking of observations
            if add_mask is None or add_mask[i]:
                # Transform the observation into the global (map) frame
                self.child_transformer.set_map(current_maps[i:i + 1],
                                               cam_poses[i:i + 1])
                obs_g, _ = self.child_transformer.get_map(None)

                # Remember this new map
                self.set_map(obs_g, None)

            # Return this map in the camera frame of reference
            map_r, _ = self.get_map(cam_poses[i:i + 1])

            if show != "":
                Presenter().show_image(map_r.data[0, 0:3],
                                       show,
                                       torch=True,
                                       scale=8,
                                       waitkey=1)

            maps_r.append(map_r)

        maps_r = torch.cat(maps_r, dim=0)
        self.set_maps(maps_r, cam_poses)

        return maps_r, cam_poses
コード例 #25
0
class DrawStartPosOnGlobalMap(MapTransformerBase):

    def __init__(self, source_map_size, world_size_px, world_size_m, lamda=0.2):
        super(DrawStartPosOnGlobalMap, self).__init__(source_map_size, world_size_px, world_size_m)
        self.map_size = source_map_size
        self.world_size_px = world_size_px
        self.world_size_m = world_size_m
        self.child_transformer = MapTransformerBase(source_map_size, world_size_px, world_size_m)

        self.prof = SimpleProfiler(torch_sync=PROFILE, print=PROFILE)
        self.start_pose = None
        self.last_emb = None

        self.dbg_t = None
        self.seq = 0

    def init_weights(self):
        pass

    def reset(self):
        super(DrawStartPosOnGlobalMap, self).reset()
        self.start_pose = None
        self.last_emb = None
        self.child_transformer.reset()
        self.seq = 0

    def cuda(self, device=None):
        MapTransformerBase.cuda(self, device)
        self.child_transformer.cuda(device)
        return self

    def get_start_poses(self, cam_poses_w, sentence_embeddings):
        # For each timestep, get the pose corresponding to the start of the instruction segment
        seq_len = len(sentence_embeddings)
        start_poses = []
        for i in range(seq_len):
            if self.last_emb is not None and (sentence_embeddings[i].data == self.last_emb).all():
                pass # Keep the same start pose since we're on the same segment
            else:
                self.last_emb = sentence_embeddings[i].data
                self.start_pose = cam_poses_w[i]
            start_poses.append(self.start_pose)
        return start_poses

    def forward(self, maps_w, sentence_embeddings, map_poses_w, cam_poses_w, show=False):
        #show="li
        self.prof.tick(".")
        batch_size = len(maps_w)

        # Initialize the layers of the same size as the maps, but with only one channel
        new_layer_size = list(maps_w.size())
        new_layer_size[1] = 1
        all_maps_out_w = empty_float_tensor(new_layer_size, self.is_cuda, self.cuda_device)

        start_poses = self.get_start_poses(cam_poses_w, sentence_embeddings)

        poses_img = [poses_m_to_px(as_pose, self.map_size, self.world_size_px, self.world_size_m) for as_pose in start_poses]
        #poses_img = poses_as_to_img(start_poses, self.world_size, batch_dim=True)

        for i in range(batch_size):
            x = min(max(int(poses_img[i].position.data[0]), 0), new_layer_size[2] - 1)
            y = min(max(int(poses_img[i].position.data[1]), 0), new_layer_size[2] - 1)
            all_maps_out_w[i, 0, x, y] = 10.0

        if show != "":
            Presenter().show_image(all_maps_out_w[0], show, torch=True, waitkey=1)

        self.prof.tick("draw")

        # Step 3: Convert all maps to local frame
        maps_out = torch.cat([Variable(all_maps_out_w), maps_w], dim=1)
        #all_maps_w = torch.cat(all_maps_out_w, dim=0)

        self.prof.loop()
        self.prof.print_stats(10)

        return maps_out, map_poses_w
コード例 #26
0
ファイル: leaky_integrator.py プロジェクト: pianpwk/drif
class LeakyIntegratorMap(MapTransformerBase):
    def __init__(self,
                 source_map_size,
                 world_size_px,
                 world_size_m,
                 lamda=0.2):
        super(LeakyIntegratorMap, self).__init__(source_map_size,
                                                 world_size_px, world_size_m)
        self.map_size = source_map_size
        self.world_size_px = world_size_px
        self.world_size_m = world_size_m
        self.child_transformer = MapTransformerBase(source_map_size,
                                                    world_size_px,
                                                    world_size_m)
        self.lamda = lamda

        self.prof = SimpleProfiler(torch_sync=PROFILE, print=PROFILE)
        self.map_memory = MapTransformerBase(source_map_size, world_size_px,
                                             world_size_m)

        self.dbg_t = None
        self.seq = 0

    def init_weights(self):
        pass

    def reset(self):
        super(LeakyIntegratorMap, self).reset()
        self.map_memory.reset()
        self.child_transformer.reset()
        self.seq = 0

    def cuda(self, device=None):
        MapTransformerBase.cuda(self, device)
        self.child_transformer.cuda(device)
        self.map_memory.cuda(device)
        return self

    def dbg_write_extra(self, map, pose):
        if DebugWriter().should_write():
            map = map[0:1, 0:3]
            self.seq += 1
            # Initialize a transformer module
            if pose is not None:
                if self.dbg_t is None:
                    self.dbg_t = MapTransformerBase(
                        self.map_size, self.world_size_px,
                        self.world_size_m).to(map.device)

                # Transform the prediction to the global frame and write out to disk.
                self.dbg_t.set_map(map, pose)
                map_global, _ = self.dbg_t.get_map(None)
            else:
                map_global = map
            DebugWriter().write_img(map_global[0],
                                    "gif_overlaid",
                                    args={
                                        "world_size": self.world_size_px,
                                        "name": "sm"
                                    })

    def forward(self, images, coverages, cam_poses, add_mask=None, show=False):
        #show="li"
        self.prof.tick(".")
        batch_size = len(images)

        assert add_mask is None or add_mask[
            0] is not None, "The first observation in a sequence needs to be used!"

        # Step 1: All local maps to global: # TODO: Allow inputing global maps when new projector is ready
        self.child_transformer.set_maps(images, cam_poses)
        observations_g, _ = self.child_transformer.get_maps(None)

        self.child_transformer.set_maps(coverages, cam_poses)
        coverages_g, _ = self.child_transformer.get_maps(None)

        masked_observations_g_add = self.lamda * observations_g * coverages_g

        all_maps_out_g = []

        self.prof.tick("maps_to_global")

        # TODO: Draw past trajectory on an extra channel of the semantic map

        # Step 2: Integrate serially in the global frame
        for i in range(batch_size):

            # If we don't have a map yet, initialize the map to this observation
            if self.map_memory.latest_maps is None:
                self.map_memory.set_map(observations_g[i:i + 1], None)
                #self.set_map(observations_g[i:i+1], None)

            # Allow masking of observations
            if add_mask is None or add_mask[i]:
                # Get the current global-frame map
                map_g, _ = self.map_memory.get_map(None)

                #obs_g = observations_g[i:i+1]
                cov_g = coverages_g[i:i + 1]
                obs_cov_g = masked_observations_g_add[i:i + 1]

                # Add the observation into the map using a leaky integrator rule (TODO: Output lamda from model)
                new_map_g = (1 - self.lamda
                             ) * map_g + obs_cov_g + self.lamda * map_g * (
                                 1 - cov_g)

                # Remember this new map
                self.map_memory.set_map(new_map_g, None)
                #self.set_map(new_map_g, None)

            map_g, _ = self.map_memory.get_map(None)

            # Return this map in the camera frame of reference
            #map_r, _ = self.get_map(cam_poses[i:i+1])

            if show != "":
                Presenter().show_image(map_g.data[0, 0:3],
                                       show,
                                       torch=True,
                                       scale=8,
                                       waitkey=50)

            all_maps_out_g.append(map_g)

        self.prof.tick("integrate")

        # Step 3: Convert all maps to local frame
        all_maps_g = torch.cat(all_maps_out_g, dim=0)

        # Write gifs for debugging
        self.dbg_write_extra(all_maps_g, None)

        self.child_transformer.set_maps(all_maps_g, None)
        maps_r, _ = self.child_transformer.get_maps(cam_poses)
        self.set_maps(maps_r, cam_poses)

        self.prof.tick("maps_to_local")
        self.prof.loop()
        self.prof.print_stats(10)

        return maps_r, cam_poses
コード例 #27
0
ファイル: path_predictor.py プロジェクト: pianpwk/drif
class PathPredictor(MapTransformerBase):

    # TODO: Standardize run_params
    def __init__(self, lingunet_params, source_map_size, world_size_px,
                 world_size_m):
        super(PathPredictor, self).__init__(source_map_size, world_size_px,
                                            world_size_m)

        if lingunet_params.get("small_network"):
            self.unet = Lingunet5S(lingunet_params)
        else:
            self.unet = Lingunet5(lingunet_params)

        #self.map_filter = MapLangSemanticFilter(emb_size, feature_channels, 3)
        self.map_size_px = source_map_size
        self.world_size_px = world_size_px
        self.world_size_m = world_size_m

        self.dbg_t = None
        self.seq = 0

    def init_weights(self):
        self.unet.init_weights()

    def reset(self):
        super(PathPredictor, self).reset()
        self.seq = 0

    def cuda(self, device=None):
        MapTransformerBase.cuda(self, device)
        #self.map_filter.cuda(device)
        self.dbg_t = None
        return self

    def dbg_write_extra(self, map, pose):
        if DebugWriter().should_write():
            self.seq += 1
            # Initialize a transformer module
            if self.dbg_t is None:
                self.dbg_t = MapTransformerBase(
                    self.map_size_px, self.world_size_px,
                    self.world_size_m).to(map.device)

            # Transform the prediction to the global frame and write out to disk.
            self.dbg_t.set_map(map, pose)
            map_global, _ = self.dbg_t.get_map(None)
            DebugWriter().write_img(map_global[0],
                                    "gif_overlaid",
                                    args={
                                        "world_size": self.world_size_px,
                                        "name": "pathpred"
                                    })

    def forward(self,
                image,
                sentence_embeddimg,
                map_poses,
                proc_mask=None,
                show=""):
        # TODO: Move map perturb data augmentation in here.

        if image.size(1) > self.feature_channels:
            image = image[:, 0:self.feature_channels, :, :]

        pred_mask = self.unet(image, sentence_embeddimg)

        # Wtf is this:
        #self.map_filter.precompute_conv_weights(sentence_embeddimg)
        #features_filtered = self.map_filter(image)
        #out_maps = torch.cat([pred_mask, features_filtered], dim=1)
        """
        if proc_mask is not None:
            bs = pred_mask.size(0)
            for i in range(bs):

                # If we are using this processed map, apply it
                if proc_mask[bs]:
                    self.set_map(pred_mask[i:i+1], map_poses[i:i+1])

                # Otherwise return the latest processed map, rotated in this frame of reference
                pred_mask[i] = self.get_map(map_poses[i:i+1])

        if show != "":
            Presenter().show_image(pred_mask.data[0], show, torch=True, scale=8, waitkey=1)

        self.set_maps(pred_mask, map_poses)

        #self.dbg_write_extra(pred_mask, map_poses)
        """
        return pred_mask, map_poses
コード例 #28
0
 def cuda(self, device=None):
     MapTransformerBase.cuda(self, device)
     self.is_cuda = True
     self.cuda_device = device
     self.map_affine.cuda(device)
     return self
コード例 #29
0
ファイル: lang_filter_map_to_map.py プロジェクト: hyzcn/drif
 def cuda(self, device=None):
     MapTransformerBase.cuda(self, device)
     self.lang_filter.cuda(device)
     return self
コード例 #30
0
ファイル: map_batch_fill_missing.py プロジェクト: hyzcn/drif
class MapBatchFillMissing(MapTransformerBase):

    def __init__(self, source_map_size, world_in_map_size):
        super(MapBatchFillMissing, self).__init__(source_map_size, world_in_map_size)
        self.map_size = source_map_size
        self.world_size = world_in_map_size
        self.child_transformer = MapTransformerBase(source_map_size, world_in_map_size)

        self.prof = SimpleProfiler(torch_sync=PROFILE, print=PROFILE)
        self.map_memory = MapTransformerBase(source_map_size, world_in_map_size)

        self.last_observation = None

        self.dbg_t = None
        self.seq = 0

    def init_weights(self):
        pass

    def reset(self):
        super(MapBatchFillMissing, self).reset()
        self.map_memory.reset()
        self.child_transformer.reset()
        self.seq = 0
        self.last_observation = None

    def cuda(self, device=None):
        MapTransformerBase.cuda(self, device)
        self.child_transformer.cuda(device)
        self.map_memory.cuda(device)
        return self

    def dbg_write_extra(self, map, pose):
        if DebugWriter().should_write():
            map = map[0:1, 0:3]
            self.seq += 1
            # Initialize a transformer module
            if pose is not None:
                if self.dbg_t is None:
                    self.dbg_t = MapTransformerBase(self.map_size, self.world_size)
                    if self.is_cuda:
                        self.dbg_t.cuda(self.cuda_device)

                # Transform the prediction to the global frame and write out to disk.
                self.dbg_t.set_map(map, pose)
                map_global, _ = self.dbg_t.get_map(None)
            else:
                map_global = map
            DebugWriter().write_img(map_global[0], "gif_overlaid", args={"world_size": self.world_size, "name": "identity_integrator"})

    def forward(self, select_images, all_cam_poses, plan_mask=None, show=False):
        #show="li"
        self.prof.tick(".")

        # During rollout, plan_mask will alternate between [True] and [False]
        if plan_mask is None:
            all_images = select_images
            return all_images, all_cam_poses

        full_batch_size = len(all_cam_poses)

        all_maps_out_r = []

        self.prof.tick("maps_to_global")

        # For each timestep, take the latest map that was available, transformed into this timestep
        # Do only a maximum of one transformation for any map to avoid cascading of errors!
        ptr = 0
        for i in range(full_batch_size):
            this_pose = all_cam_poses[i:i+1]
            if plan_mask[i]:
                this_obs = (select_images[ptr:ptr+1], this_pose)
                ptr += 1
                self.last_observation = this_obs
            else:
                assert self.last_observation is not None, "The first observation in a sequence needs to be used!"
                last_map, last_pose = self.last_observation

                # TODO: See if we can speed this up. Perhaps batch for all timesteps inbetween observations
                self.child_transformer.set_map(last_map, last_pose)
                this_obs = self.child_transformer.get_map(this_pose)

            all_maps_out_r.append(this_obs[0])

            if show != "":
                Presenter().show_image(this_obs.data[0, 0:3], show, torch=True, scale=8, waitkey=50)

        self.prof.tick("integrate")

        # Step 3: Convert all maps to local frame
        all_maps_r = torch.cat(all_maps_out_r, dim=0)

        # Write gifs for debugging
        #self.dbg_write_extra(all_maps_r, None)

        self.set_maps(all_maps_r, all_cam_poses)

        self.prof.tick("maps_to_local")
        self.prof.loop()
        self.prof.print_stats(10)

        return all_maps_r, all_cam_poses