def unproject_rgb_to_mem(rgb_camB, pixB_T_camA, mem_coord, device=None):
    # rgb_camB is B x C x H x W
    # pixB_T_camA is B x 4 x 4 (pix_T_camR)

    # rgb lives in B pixel coords
    # we want everything in A memory coords

    # this puts each C-dim pixel in the rgb_camB
    # along a ray in the voxelgrid
    B, C, H, W = list(rgb_camB.shape)

    Y, X, Z = mem_coord.proto.shape

    xyz_memA = utils_basic.gridcloud3D(B, Z, Y, X, norm=False, device=device)
    # grid_z, grid_y, grid_x = meshgrid3D(B, Z, Y, X)
    # # these are B x Z x Y x X
    # # these represent the mem grid coordinates

    # # we need to convert these to pixel coordinates
    # x = torch.reshape(grid_x, [B, -1])
    # y = torch.reshape(grid_y, [B, -1])
    # z = torch.reshape(grid_z, [B, -1])
    # # these are B x N
    # xyz_mem = torch.stack([x, y, z], dim=2)

    # not specifically related to Ref, I am just
    # converting grid to points here, irrespective
    # of the which cam it is associated to.
    xyz_camA = Mem2Ref(xyz_memA, mem_coord)

    xyz_pixB = utils_geom.apply_4x4(pixB_T_camA, xyz_camA)
    # this is just getting the z coordinate to divide x/Z, y/Z
    normalizer = torch.unsqueeze(xyz_pixB[:, :, 2], 2)
    EPS = 1e-6
    xy_pixB = xyz_pixB[:, :, :2] / (EPS + normalizer)
    # this is B x N x 2
    # this is the (floating point) pixel coordinate of each voxel
    x_pixB, y_pixB = xy_pixB[:, :, 0], xy_pixB[:, :, 1]
    # these are B x N

    if (0):
        # handwritten version
        values = torch.zeros([B, C, Z * Y * X], dtype=torch.float32)
        for b in range(B):
            values[b] = utils_samp.bilinear_sample_single(
                rgb_camB[b], x_pixB[b], y_pixB[b])
    else:
        # native pytorch version, this makes the pixel between -1 to 1
        y_pixB, x_pixB = utils_basic.normalize_grid2D(y_pixB, x_pixB, H, W)
        # since we want a 3d output, we need 5d tensors
        z_pixB = torch.zeros_like(x_pixB)
        xyz_pixB = torch.stack([x_pixB, y_pixB, z_pixB], axis=2)
        rgb_camB = rgb_camB.unsqueeze(2)
        xyz_pixB = torch.reshape(xyz_pixB, [B, Z, Y, X, 3])
        values = F.grid_sample(rgb_camB, xyz_pixB, mode='nearest')

    values = torch.reshape(values, (B, C, Z, Y, X))
    return values
def apply_4x4_to_vox(B_T_A,
                     feat_A,
                     mem_coord_As=None,
                     mem_coord_Bs=None,
                     already_mem=False,
                     binary_feat=False,
                     rigid=True):
    # B_T_A is B x 4 x 4
    # if already_mem=False, it is a transformation between cam systems
    # if already_mem=True, it is a transformation between mem systems

    # feat_A is B x C x Z x Y x X
    # it represents some scene features in reference/canonical coordinates
    # we want to go from these coords to some target coords

    # since this is a backwarp,
    # the question to ask is:
    # "WHERE in the tensor do you want to sample,
    # to replace each voxel's current value?"

    # the inverse of B_T_A represents this "where";
    # it transforms each coordinate in B
    # to the location we want to sample in A

    B, C, Z, Y, X = list(feat_A.shape)

    # we have B_T_A in input, since this follows the other utils_geom.apply_4x4
    # for an apply_4x4 func, but really we need A_T_B
    if rigid:
        A_T_B = utils_geom.safe_inverse(B_T_A)
    else:
        # this op is slower but more powerful
        A_T_B = B_T_A.inverse()

    if not already_mem:
        cam_T_mem = mem_coord_Bs.cam_T_vox.repeat(B, 1, 1)
        mem_T_cam = mem_coord_As.vox_T_cam.repeat(B, 1, 1)
        A_T_B = utils_basic.matmul3(mem_T_cam, A_T_B, cam_T_mem)

    # we want to sample for each location in the bird grid
    xyz_B = utils_basic.gridcloud3D(B, Z, Y, X)
    # this is B x N x 3

    # transform
    xyz_A = utils_geom.apply_4x4(A_T_B, xyz_B)
    # we want each voxel to take its value
    # from whatever is at these A coordinates
    # i.e., we are back-warping from the "A" coords

    # feat_B = F.grid_sample(feat_A, normalize_grid(xyz_A, Z, Y, X))
    feat_B = utils_samp.resample3D(feat_A, xyz_A, binary_feat=binary_feat)

    # feat_B, valid = utils_samp.resample3D(feat_A, xyz_A, binary_feat=binary_feat)
    # return feat_B, valid
    return feat_B
def assemble_padded_obj_masklist(lrtlist, scorelist, Z, Y, X, coeff=1.0):
    # compute a binary mask in 3D for each object
    # we use this when computing the center-surround objectness score
    # lrtlist is B x N x 19
    # scorelist is B x N

    # returns masklist shaped B x N x 1 x Z x Y x Z

    B, N, D = list(lrtlist.shape)
    assert (D == 19)
    masks = torch.zeros(B, N, Z, Y, X)

    lenlist, ref_T_objlist = utils_geom.split_lrtlist(lrtlist)
    # lenlist is B x N x 3
    # ref_T_objlist is B x N x 4 x 4

    lenlist_ = lenlist.reshape(B * N, 3)
    ref_T_objlist_ = ref_T_objlist.reshape(B * N, 4, 4)
    obj_T_reflist_ = utils_geom.safe_inverse(ref_T_objlist_)

    # we want a value for each location in the mem grid
    xyz_mem_ = utils_basic.gridcloud3D(B * N, Z, Y, X)
    # this is B*N x V x 3, where V = Z*Y*X
    xyz_ref_ = Mem2Ref(xyz_mem_, Z, Y, X)
    # this is B*N x V x 3

    lx, ly, lz = torch.unbind(lenlist_, dim=1)
    # these are B*N

    # ref_T_obj = convert_box_to_ref_T_obj(boxes3D)
    # obj_T_ref = ref_T_obj.inverse()

    xyz_obj_ = utils_geom.apply_4x4(obj_T_reflist_, xyz_ref_)
    x, y, z = torch.unbind(xyz_obj_, dim=2)
    # these are B*N x V

    lx = lx.unsqueeze(1) * coeff
    ly = ly.unsqueeze(1) * coeff
    lz = lz.unsqueeze(1) * coeff
    # these are B*N x 1

    x_valid = (x > -lx / 2.0).byte() & (x < lx / 2.0).byte()
    y_valid = (y > -ly / 2.0).byte() & (y < ly / 2.0).byte()
    z_valid = (z > -lz / 2.0).byte() & (z < lz / 2.0).byte()
    inbounds = x_valid.byte() & y_valid.byte() & z_valid.byte()
    masklist = inbounds.float()
    # print(masklist.shape)
    masklist = masklist.reshape(B, N, 1, Z, Y, X)
    # print(masklist.shape)
    # print(scorelist.shape)
    masklist = masklist * scorelist.view(B, N, 1, 1, 1, 1)
    return masklist
def crop_zoom_from_mem(mem,
                       mem_coord,
                       lrt,
                       Z2,
                       Y2,
                       X2,
                       sensor_camR_T_camXs=None):
    # mem is B x C x Z x Y x X
    # lrt is B x 9 it takes me from object to cam coords
    # sensor_camR_T_camXs takes me from cam coords to ref coords

    B, C, Z, Y, X = list(mem.shape)  ## 2, 512, 32, 32, 32
    memY, memX, memZ = mem_coord.proto.shape

    assert (Z == memZ)
    assert (Y == memY)
    assert (X == memX)
    B2, E = list(lrt.shape)  ## 2, 19
    # I do not particularly like this inclusion of if statement
    if sensor_camR_T_camXs is not None:
        B3, _, _ = list(sensor_camR_T_camXs.shape)

    assert (E == 19)
    assert (B == B2)
    if sensor_camR_T_camXs is not None:
        assert (B == B3)

    # this puts each C-dim pixel in the image
    # along a ray in the zoomed voxelgrid just
    # the grid it has no coordinate system attached to it

    xyz_zoom = utils_basic.gridcloud3D(B, Z2, Y2, X2, norm=False)
    # these represent the zoom grid coordinates
    # we need to convert these to mem coordinates
    xyz_ref = Zoom2Ref(xyz_zoom, lrt, Z2, Y2, X2, sensor_camR_T_camXs)
    xyz_mem = Ref2Mem(xyz_ref,
                      mem_coord)  # now it is in grid coordinates of mem

    # this is just like the grid sample
    zoom = utils_samp.sample3D(mem, xyz_mem, Z2, Y2, X2)
    zoom = torch.reshape(zoom, [B, C, Z2, Y2, X2])
    return zoom
Exemple #5
0
def get_synth_flow(unpRs,
                   occRs,
                   obj_lrtlist_camX0s,
                   obj_scorelist_s,
                   occXs,
                   set_name,
                   K,
                   summ_writer,
                   sometimes_zero=False,
                   sometimes_real=False,
                   do_vis=False):
    B, S, _, Z, Y, X = list(occXs.shape)
    assert (S == 2)
    flowX0 = get_gt_flow(
        obj_lrtlist_camX0s,
        obj_scorelist_s,
        utils_geom.eye_4x4s(B, S),
        occXs[:, 0],
        K=K,
        occ_only=False,  # get the dense flow
        mod='X0',
        summ_writer=summ_writer)

    # we do not sample any rotations here, to keep the distribution purely
    # uniform across all translation angles.
    # (rotation ruins this, since the pivot point is at the camera)
    cam1_T_cam0 = [
        utils_geom.get_random_rt(B, r_amount=0.0,
                                 t_amount=1.0),  # large motion
        utils_geom.get_random_rt(
            B,
            r_amount=0.0,
            t_amount=0.1,  # small motion
            sometimes_zero=sometimes_zero)
    ]
    cam1_T_cam0 = random.sample(cam1_T_cam0, k=1)[0]

    occ0 = occRs[:, 0]
    unp0 = unpRs[:, 0]
    occ1 = utils_vox.apply_4x4_to_vox(cam1_T_cam0, occ0)
    unp1 = utils_vox.apply_4x4_to_vox(cam1_T_cam0, unp0)
    occs = [occ0, occ1]
    unps = [unp0, unp1]

    # occ1 should be a binary thing, so let's restore that property
    occ1 = torch.round(occ1)
    if do_vis:
        summ_writer.summ_occs('synth/occs', occs)
        summ_writer.summ_unps('synth/unps', unps, occs)

    mem_T_cam = utils_vox.get_mem_T_ref(B, Z, Y, X)
    cam_T_mem = utils_vox.get_ref_T_mem(B, Z, Y, X)
    mem1_T_mem0 = utils_basic.matmul3(mem_T_cam, cam1_T_cam0, cam_T_mem)
    xyz_mem0 = utils_basic.gridcloud3D(B, Z, Y, X)
    xyz_mem1 = utils_geom.apply_4x4(mem1_T_mem0, xyz_mem0)
    xyz_mem0 = xyz_mem0.reshape(B, Z, Y, X, 3)
    xyz_mem1 = xyz_mem1.reshape(B, Z, Y, X, 3)
    flow = xyz_mem1 - xyz_mem0
    # this is B x Z x Y x X x 3
    flow = flow.permute(0, 4, 1, 2, 3)
    # this is B x 3 x Z x Y x X
    if do_vis:
        summ_writer.summ_3D_flow('synth/flow', flow, clip=2.0)

    occ0_e = utils_samp.backwarp_using_3D_flow(occ1, flow, binary_feat=True)
    unp0_e = utils_samp.backwarp_using_3D_flow(unp1, flow)
    if do_vis:
        summ_writer.summ_occs('synth/occs_stab', [occ0, occ0_e])
        summ_writer.summ_unps('synth/unps_stab', [unp0, unp0_e],
                              [occ0, occ0_e])

    occs = torch.stack(occs, dim=1)
    unps = torch.stack(unps, dim=1)

    is_synth = 1
    if sometimes_real and set_name == 'train':
        is_synth = random.randint(0, 1)
        occs = [occRs, occs][is_synth]
        unps = [unpRs, unps][is_synth]
        flow = [flowX0, flow][is_synth]
        cam1_T_cam0 = [utils_geom.eye_4x4(B), cam1_T_cam0][is_synth]

    return occs, unps, flow, cam1_T_cam0, is_synth
Exemple #6
0
def get_gt_flow(obj_lrtlist_camRs,
                obj_scorelist,
                camRs_T_camXs,
                occR,
                K=2,
                occ_only=True,
                mod='',
                vis=True,
                summ_writer=None):
    # this constructs the flow field according to the given
    # box trajectories (obj_lrtlist_camRs) (collected from a moving camR)
    # and egomotion (encoded in camRs_T_camXs)
    # (so they do not take into account egomotion)
    # so, we first generate the flow for all the objects,
    # then in the background, put the ego flow

    N, B, S, D = list(obj_lrtlist_camRs.shape)
    assert (S == 2)  # as a flow util, this expects S=2
    B, _, Z, Y, X = list(occR.shape)

    flows = []
    masks = []
    for k in range(K):
        obj_masklistR0 = utils_vox.assemble_padded_obj_masklist(
            obj_lrtlist_camRs[k, :, 0:1],
            obj_scorelist[k, :, 0:1],
            Z,
            Y,
            X,
            coeff=1.0)
        # this is B x 1(N) x 1(C) x Z x Y x Z
        # obj_masklistR0 = obj_masklistR0.squeeze(1)
        # this is B x 1 x Z x Y x X
        obj_mask0 = obj_masklistR0.squeeze(1)
        # this is B x 1 x Z x Y x X

        camR_T_cam0 = camRs_T_camXs[:, 0]
        camR_T_cam1 = camRs_T_camXs[:, 1]
        cam0_T_camR = utils_geom.safe_inverse(camR_T_cam0)
        cam1_T_camR = utils_geom.safe_inverse(camR_T_cam1)
        # camR0_T_camR1 = camR0_T_camRs[:,1]
        # camR1_T_camR0 = utils_geom.safe_inverse(camR0_T_camR1)

        # obj_masklistA1 = utils_vox.apply_4x4_to_vox(camR1_T_camR0, obj_masklistA0)
        # if vis and (summ_writer is not None):
        #     summ_writer.summ_occ('flow/obj%d_maskA0' % k, obj_masklistA0)
        #     summ_writer.summ_occ('flow/obj%d_maskA1' % k, obj_masklistA1)

        if vis and (summ_writer is not None):
            # summ_writer.summ_occ('flow/obj%d_mask0' % k, obj_mask0)
            summ_writer.summ_oned('flow/obj%d_mask0' % k,
                                  torch.mean(obj_mask0, 3))

        _, ref_T_objs_list = utils_geom.split_lrtlist(obj_lrtlist_camRs[k])
        # this is B x S x 4 x 4
        ref_T_obj0 = ref_T_objs_list[:, 0]
        ref_T_obj1 = ref_T_objs_list[:, 1]
        obj0_T_ref = utils_geom.safe_inverse(ref_T_obj0)
        obj1_T_ref = utils_geom.safe_inverse(ref_T_obj1)
        # these are B x 4 x 4

        mem_T_ref = utils_vox.get_mem_T_ref(B, Z, Y, X)
        ref_T_mem = utils_vox.get_ref_T_mem(B, Z, Y, X)

        ref1_T_ref0 = utils_basic.matmul2(ref_T_obj1, obj0_T_ref)
        cam1_T_cam0 = utils_basic.matmul3(cam1_T_camR, ref1_T_ref0,
                                          camR_T_cam0)
        mem1_T_mem0 = utils_basic.matmul3(mem_T_ref, cam1_T_cam0, ref_T_mem)

        xyz_mem0 = utils_basic.gridcloud3D(B, Z, Y, X)
        xyz_mem1 = utils_geom.apply_4x4(mem1_T_mem0, xyz_mem0)

        xyz_mem0 = xyz_mem0.reshape(B, Z, Y, X, 3)
        xyz_mem1 = xyz_mem1.reshape(B, Z, Y, X, 3)

        # only use these displaced points within the obj mask
        # obj_mask03 = obj_mask0.view(B, Z, Y, X, 1).repeat(1, 1, 1, 1, 3)
        obj_mask0 = obj_mask0.view(B, Z, Y, X, 1)
        # # xyz_mem1[(obj_mask03 < 1.0).bool()] = xyz_mem0
        # cond = (obj_mask03 < 1.0).float()
        cond = (obj_mask0 > 0.0).float()
        xyz_mem1 = cond * xyz_mem1 + (1.0 - cond) * xyz_mem0

        flow = xyz_mem1 - xyz_mem0
        flow = flow.permute(0, 4, 1, 2, 3)
        obj_mask0 = obj_mask0.permute(0, 4, 1, 2, 3)

        # flow is centered on frame0, so we use occ0 to mask it
        if occ_only:
            flow = flow * occR

        # if vis and k==0:
        if vis:
            summ_writer.summ_3D_flow('flow/gt_%d' % k, flow, clip=4.0)

        masks.append(obj_mask0)
        flows.append(flow)

    camR_T_cam0 = camRs_T_camXs[:, 0]
    camR_T_cam1 = camRs_T_camXs[:, 1]
    cam0_T_camR = utils_geom.safe_inverse(camR_T_cam0)
    cam1_T_camR = utils_geom.safe_inverse(camR_T_cam1)

    mem_T_ref = utils_vox.get_mem_T_ref(B, Z, Y, X)
    ref_T_mem = utils_vox.get_ref_T_mem(B, Z, Y, X)

    cam1_T_cam0 = utils_basic.matmul2(cam1_T_camR, camR_T_cam0)
    mem1_T_mem0 = utils_basic.matmul3(mem_T_ref, cam1_T_cam0, ref_T_mem)

    xyz_mem0 = utils_basic.gridcloud3D(B, Z, Y, X)
    xyz_mem1 = utils_geom.apply_4x4(mem1_T_mem0, xyz_mem0)

    xyz_mem0 = xyz_mem0.reshape(B, Z, Y, X, 3)
    xyz_mem1 = xyz_mem1.reshape(B, Z, Y, X, 3)

    flow = xyz_mem1 - xyz_mem0
    flow = flow.permute(0, 4, 1, 2, 3)
    if occ_only:
        flow = flow * occXs[:, 0]

    bkg_flow = flow

    # allow zero motion in the bkg
    any_mask = torch.max(torch.stack(masks, axis=0), axis=0)[0]
    masks.append(1.0 - any_mask)
    flows.append(bkg_flow)

    flows = torch.stack(flows, axis=0)
    masks = torch.stack(masks, axis=0)
    masks = masks.repeat(1, 1, 3, 1, 1, 1)
    flow = utils_basic.reduce_masked_mean(flows, masks, dim=0)

    if vis:
        summ_writer.summ_3D_flow('flow/gt_complete', flow, clip=4.0)

    # flow is shaped B x 3 x D x H x W
    return flow