Esempio n. 1
0
    def forward(self, inputs, rnn_hxs, masks):
        x_rgb = inputs["rgb_at_t"]
        x_rgb = process_image(x_rgb, self.img_mean, self.img_std)
        x_goal = inputs["goal_at_t"]
        x_time = inputs["t"].squeeze(1)

        x_rho = torch.norm(x_goal, dim=1)
        x_phi = torch.atan2(x_goal[:, 1], x_goal[:, 0])

        x_rho_emb = self.distance_encoder(x_rho)
        x_phi_emb = self.angle_encoder(x_phi)
        x_time_emb = self.time_encoder(x_time)
        x_rgb_emb = self.rgb_encoder(x_rgb)

        embeddings = [x_rgb_emb, x_rho_emb, x_phi_emb, x_time_emb]

        x = self.fuse_embedding(torch.cat(embeddings, dim=1))

        x, rnn_hxs = self._forward_gru(x, rnn_hxs.squeeze(0), masks)
        rnn_hxs = rnn_hxs.unsqueeze(0)

        return self.critic_linear(x), x, rnn_hxs
Esempio n. 2
0
 def predict_deltas(self, x, masks=None):
     # Transpose multichannel inputs
     st_1 = process_image(x["rgb_at_t_1"], self.img_mean_t, self.img_std_t)
     dt_1 = transpose_image(x["depth_at_t_1"])
     ego_map_gt_at_t_1 = transpose_image(x["ego_map_gt_at_t_1"])
     st = process_image(x["rgb_at_t"], self.img_mean_t, self.img_std_t)
     dt = transpose_image(x["depth_at_t"])
     ego_map_gt_at_t = transpose_image(x["ego_map_gt_at_t"])
     # This happens only for a baseline
     if ("ego_map_gt_anticipated_at_t_1" in x
             and x["ego_map_gt_anticipated_at_t_1"] is not None):
         ego_map_gt_anticipated_at_t_1 = transpose_image(
             x["ego_map_gt_anticipated_at_t_1"])
         ego_map_gt_anticipated_at_t = transpose_image(
             x["ego_map_gt_anticipated_at_t"])
     else:
         ego_map_gt_anticipated_at_t_1 = None
         ego_map_gt_anticipated_at_t = None
     # Compute past and current egocentric maps
     bs = st_1.size(0)
     pu_inputs_t_1 = {
         "rgb": st_1,
         "depth": dt_1,
         "ego_map_gt": ego_map_gt_at_t_1,
         "ego_map_gt_anticipated": ego_map_gt_anticipated_at_t_1,
     }
     pu_inputs_t = {
         "rgb": st,
         "depth": dt,
         "ego_map_gt": ego_map_gt_at_t,
         "ego_map_gt_anticipated": ego_map_gt_anticipated_at_t,
     }
     pu_inputs = self._safe_cat(pu_inputs_t_1, pu_inputs_t)
     pu_outputs = self.projection_unit(pu_inputs)
     pu_outputs_t = {k: v[bs:] for k, v in pu_outputs.items()}
     pt_1, pt = pu_outputs["occ_estimate"][:bs], pu_outputs["occ_estimate"][
         bs:]
     # Compute relative pose
     dx = subtract_pose(x["pose_at_t_1"], x["pose_at_t"])
     # Estimate pose
     dx_hat = dx
     xt_hat = x["pose_at_t"]
     all_pose_outputs = None
     if not self.config.ignore_pose_estimator:
         all_pose_outputs = {}
         pose_inputs = {}
         if "rgb" in self.config.pose_predictor_inputs:
             pose_inputs["rgb_t_1"] = st_1
             pose_inputs["rgb_t"] = st
         if "depth" in self.config.pose_predictor_inputs:
             pose_inputs["depth_t_1"] = dt_1
             pose_inputs["depth_t"] = dt
         if "ego_map" in self.config.pose_predictor_inputs:
             pose_inputs["ego_map_t_1"] = pt_1
             pose_inputs["ego_map_t"] = pt
         if self.config.detach_map:
             for k in pose_inputs.keys():
                 pose_inputs[k] = pose_inputs[k].detach()
         n_pose_inputs = self._transform_observations(pose_inputs, dx)
         pose_outputs = self.pose_estimator(n_pose_inputs)
         dx_hat = add_pose(dx, pose_outputs["pose"])
         all_pose_outputs["pose_outputs"] = pose_outputs
         # Estimate global pose
         xt_hat = add_pose(x["pose_hat_at_t_1"], dx_hat)
     # Zero out pose prediction based on the mask
     if masks is not None:
         xt_hat = xt_hat * masks
         dx_hat = dx_hat * masks
     outputs = {
         "pt": pt,
         "dx_hat": dx_hat,
         "xt_hat": xt_hat,
         "all_pu_outputs": pu_outputs_t,
         "all_pose_outputs": all_pose_outputs,
     }
     if "ego_map_hat" in pu_outputs_t:
         outputs["ego_map_hat_at_t"] = pu_outputs_t["ego_map_hat"]
     return outputs