def crop_zoom_from_mem(mem, lrt, Z2, Y2, X2, additive_pad=0.1): # mem is B x C x Z x Y x X # lrt is B x 19 B, C, Z, Y, X = list(mem.shape) B2, E = list(lrt.shape) assert (E == 19) assert (B == B2) # for each voxel in the zoom grid, i want to # sample a voxel from the mem # this puts each C-dim pixel in the image # along a ray in the zoomed voxelgrid xyz_zoom = utils_basic.gridcloud3D(B, Z2, Y2, X2, norm=False) # these represent the zoom grid coordinates # we need to convert these to mem coordinates xyz_ref = Zoom2Ref(xyz_zoom, lrt, Z2, Y2, X2, additive_pad=additive_pad) xyz_mem = Ref2Mem(xyz_ref, Z, Y, X) zoom = utils_samp.sample3D(mem, xyz_mem, Z2, Y2, X2) zoom = torch.reshape(zoom, [B, C, Z2, Y2, X2]) return zoom
def apply_pixX_T_memR_to_voxR(pix_T_camX, camX_T_camR, voxR, D, H, W): # mats are B x 4 x 4 # voxR is B x C x Z x Y x X # H, W, D indicates how big to make the output # returns B x C x D x H x W B, C, Z, Y, X = list(voxR.shape) z_near = hyp.ZMIN z_far = hyp.ZMAX grid_z = torch.linspace(z_near, z_far, steps=D, dtype=torch.float32, device=torch.device('cuda')) # grid_z = torch.exp(torch.linspace(np.log(z_near), np.log(z_far), steps=D, dtype=torch.float32, device=torch.device('cuda'))) grid_z = torch.reshape(grid_z, [1, 1, D, 1, 1]) grid_z = grid_z.repeat([B, 1, 1, H, W]) grid_z = torch.reshape(grid_z, [B * D, 1, H, W]) pix_T_camX__ = torch.unsqueeze(pix_T_camX, axis=1).repeat([1, D, 1, 1]) pix_T_camX = torch.reshape(pix_T_camX__, [B * D, 4, 4]) xyz_camX = utils_geom.depth2pointcloud(grid_z, pix_T_camX) camR_T_camX = utils_geom.safe_inverse(camX_T_camR) camR_T_camX_ = torch.unsqueeze(camR_T_camX, dim=1).repeat([1, D, 1, 1]) camR_T_camX = torch.reshape(camR_T_camX_, [B * D, 4, 4]) mem_T_cam = get_mem_T_ref(B * D, Z, Y, X) memR_T_camX = matmul2(mem_T_cam, camR_T_camX) xyz_memR = utils_geom.apply_4x4(memR_T_camX, xyz_camX) xyz_memR = torch.reshape(xyz_memR, [B, D * H * W, 3]) samp = utils_samp.sample3D(voxR, xyz_memR, D, H, W) # samp is B x H x W x D x C return samp
def assemble(bkg_feat0, obj_feat0, origin_T_camRs, camRs_T_zoom): # let's first assemble the seq of background tensors # this should effectively CREATE egomotion # i fully expect we can do this all in one shot # note it makes sense to create egomotion here, because # we want to predict each view B, C, Z, Y, X = list(bkg_feat0.shape) B2, C2, Z2, Y2, X2 = list(obj_feat0.shape) assert (B == B2) assert (C == C2) B, S, _, _ = list(origin_T_camRs.shape) # ok, we have everything we need # for each timestep, we want to warp the bkg to this timestep # utils for packing/unpacking along seq dim __p = lambda x: pack_seqdim(x, B) __u = lambda x: unpack_seqdim(x, B) # we in fact have utils for this already cam0s_T_camRs = utils_geom.get_camM_T_camXs(origin_T_camRs, ind=0) camRs_T_cam0s = __u(utils_geom.safe_inverse(__p(cam0s_T_camRs))) bkg_feat0s = bkg_feat0.unsqueeze(1).repeat(1, S, 1, 1, 1, 1) bkg_featRs = apply_4x4s_to_voxs(camRs_T_cam0s, bkg_feat0s) # now for the objects # we want to sample for each location in the bird grid xyz_mems_ = utils_basic.gridcloud3D(B * S, Z, Y, X, norm=False) # this is B*S x Z*Y*X x 3 xyz_camRs_ = Mem2Ref(xyz_mems_, Z, Y, X) camRs_T_zoom_ = __p(camRs_T_zoom) zoom_T_camRs_ = camRs_T_zoom_.inverse( ) # note this is not a rigid transform xyz_zooms_ = utils_geom.apply_4x4(zoom_T_camRs_, xyz_camRs_) # we will do the whole traj at once (per obj) # note we just have one feat for the whole traj, so we tile up obj_feats = obj_feat0.unsqueeze(1).repeat(1, S, 1, 1, 1, 1) obj_feats_ = __p(obj_feats) # this is B*S x Z x Y x X x C # to sample, we need feats_ in ZYX order obj_featRs_ = utils_samp.sample3D(obj_feats_, xyz_zooms_, Z, Y, X) obj_featRs = __u(obj_featRs_) # overweigh objects, so that we essentially overwrite # featRs = 0.05*bkg_featRs + 0.95*obj_featRs # overwrite the bkg at the object obj_mask = (bkg_featRs > 0).float() featRs = obj_featRs + (1.0 - obj_mask) * bkg_featRs # note the normalization (next) will restore magnitudes for the bkg # # featRs = bkg_featRs # featRs = obj_featRs # l2 normalize on chans featRs = l2_normalize(featRs, dim=2) validRs = 1.0 - (featRs == 0).all(dim=2, keepdim=True).float().cuda() return featRs, validRs, bkg_featRs, obj_featRs