Exemple #1
0
def project_voxel(voxel):
    rotmat = tf.constant(get_transform_matrix(0.0, 0.0), dtype=tf.float32)
    rotmat = tf.reshape(rotmat, (1, 4, 4))
    rotmat = tf.tile(rotmat, tf.stack([tfutil.batchdim(voxel), 1, 1]))
    return transformer(
        voxel,
        tf.reshape(rotmat, tf.stack([tfutil.batchdim(voxel), 16])),
        (const.S, const.S, const.S),
        const.NEAR_PLANE,
        const.FAR_PLANE,
        do_project=True,
    )
Exemple #2
0
def voxel2depth_aligned(voxel):
    voxel = tf.squeeze(voxel, axis=4)

    imgshape = tf.stack([tfutil.batchdim(voxel), const.S, const.S, 1])

    costgrid = tf.cast(
        tf.tile(tf.reshape(tf.range(0, const.S), (1, 1, 1, const.S)),
                imgshape), tf.float32)

    invalid = 1000 * tf.cast(voxel < 0.5, dtype=tf.float32)
    invalid_mask = tf.tile(
        tf.reshape(tf.constant([1.0] * (const.S - 1) + [0.0], tf.float32),
                   (1, 1, 1, const.S)), imgshape)

    costgrid = costgrid + invalid * invalid_mask

    depth = tf.expand_dims(tf.argmin(costgrid, axis=3), axis=3)

    #convert back to (3,5)
    depth = tf.cast(depth, tf.float32)

    #depth += 0.5 #0.5 to 127.5

    #we don't add 0.5 because assume the surface is at the boundary
    #of two voxels

    depth /= const.S  #almost 0.0 to 1.0
    depth *= const.FAR_PLANE - const.NEAR_PLANE  #almost 0.0 to 2.0
    depth += const.NEAR_PLANE  #about 3.0 to 5.0

    return depth
Exemple #3
0
def rotate_voxel(voxel, rotmat):
    return transformer(
        voxel,
        tf.reshape(rotmat, tf.stack([tfutil.batchdim(voxel), 16])),
        (const.S, const.S, const.S),
        const.NEAR_PLANE,
        const.FAR_PLANE,
        do_project=False,
    )
Exemple #4
0
def unproject_voxel(voxel):
    #need to rewrite this

    #how does projection work?
    #input: voxels 128^3, output: voxels 128^3

    #first generate the meshgrid: (-1,1) for x and y, (3,5) for z
    #since X = xz/fx, next multiply x by z in the meshgrid, giving xz
    #finally, apply projectino matrix, which adds on the 1/fx factor

    #projection matrix also subtracts displacement, so we now have:
    #X = xz/fx, Y = yz/fy
    #Z = z-4
    #all these coordinates should be approximately within (-1,1)^3
    #convert to 0, 128, and tf gather from input grid to get output grid

    #####

    #now here is the strategy for unprojection.
    #start meshgrid with (-1,1) for X and Y, and (3,5) for z
    #since x = Xfx/Z, next divide X by z giving us X/z (by passing 'invert' to get_transform matrix
    #apply unprojectino matrix, which adds on fx factor (by passing 'invert' to transformer)

    #now, everything after this step should be the same.

    rotmat = tf.constant(get_transform_matrix(0.0, 0.0, invert_focal=True),
                         dtype=tf.float32)
    rotmat = tf.reshape(rotmat, (1, 4, 4))
    rotmat = tf.tile(rotmat, tf.stack([tfutil.batchdim(voxel), 1, 1]))

    voxel = transformer(
        voxel,
        tf.reshape(rotmat, tf.stack([tfutil.batchdim(voxel), 16])),
        (const.S, const.S, const.S),
        const.NEAR_PLANE,
        const.FAR_PLANE,
        do_project='invert',
    )

    voxel = tf.reverse(voxel, axis=[2, 3])
    return voxel
Exemple #5
0
def unproject(inputs):

    inputs = tf.image.resize_images(inputs, (const.S, const.S))
    #now unproject, to get our starting point
    inputs = voxel.unproject_image(inputs)

    #in addition, add on a z-map, and a local bias
    #copied from components.py
    meshgridz = tf.range(const.S, dtype=tf.float32)
    meshgridz = tf.reshape(meshgridz, (1, const.S, 1, 1))
    meshgridz = tf.tile(
        meshgridz, tf.stack([tfutil.batchdim(inputs), 1, const.S, const.S]))
    meshgridz = tf.expand_dims(meshgridz, axis=4)
    meshgridz = (meshgridz + 0.5) / (const.S / 2.0) - 1.0  #now (-1,1)

    #get the rough outline
    unprojected_mask = tf.expand_dims(inputs[:, :, :, :, 0], 4)
    unprojected_depth = tf.expand_dims(inputs[:, :, :, :, 1], 4)
    outline_thickness = 0.1
    outline = tf.cast(
        tf.logical_and(unprojected_depth <= meshgridz,
                       unprojected_depth + 0.1 > meshgridz), tf.float32)
    outline *= unprojected_mask

    if const.DEBUG_UNPROJECT:
        #return tf.expand_dims(inputs[:,:,:,:,0], 4) #this is the unprojected mask
        return outline

    if const.USE_LOCAL_BIAS:
        bias = tf.get_variable("voxelnet_bias",
                               dtype=tf.float32,
                               shape=[1, const.S, const.S, const.S, 1],
                               initializer=tf.zeros_initializer())
        bias = tf.tile(bias, (bs, 1, 1, 1, 1))

    inputs_ = [inputs, meshgridz]
    if const.USE_LOCAL_BIAS:
        inputs_.append(bias)
    if const.USE_OUTLINE:
        inputs_.append(outline)
    inputs = tf.concat(inputs_, axis=4)
    return inputs
Exemple #6
0
def rotate_and_project_voxel(voxel, rotmat):

    r = tfutil.rank(voxel)
    assert r in [4, 5]
    if r == 4:
        voxel = tf.expand_dims(voxel, axis=4)

    voxel = transformer_preprocess(voxel)

    out = transformer(
        voxel,
        tf.reshape(rotmat, tf.stack([tfutil.batchdim(voxel), 16])),
        (const.S, const.S, const.S),
        const.NEAR_PLANE,
        const.FAR_PLANE,
    )

    out = transformer_postprocess(out)

    if r == 4:
        out = tf.squeeze(out, axis=4)

    return out
Exemple #7
0
    def _interpolate(im, x, y, z, out_size):
        """Bilinear interploation layer.

        Args:
            im: A 5D tensor of size [num_batch, depth, height, width, num_channels].
                It is the input volume for the transformation layer (tf.float32).
            x: A tensor of size [num_batch, out_depth, out_height, out_width]
                representing the inverse coordinate mapping for x (tf.float32).
            y: A tensor of size [num_batch, out_depth, out_height, out_width]
                representing the inverse coordinate mapping for y (tf.float32).
            z: A tensor of size [num_batch, out_depth, out_height, out_width]
                representing the inverse coordinate mapping for z (tf.float32).
            out_size: A tuple representing the output size of transformation layer
                (float).

        Returns:
            A transformed tensor (tf.float32).

        """
        with tf.variable_scope('_interpolate'):
            #num_batch = im.get_shape().as_list()[0]
            num_batch = tfutil.batchdim(im)

            depth = im.get_shape().as_list()[1]
            height = im.get_shape().as_list()[2]
            width = im.get_shape().as_list()[3]
            channels = im.get_shape().as_list()[4]

            x = tf.to_float(x)
            y = tf.to_float(y)
            z = tf.to_float(z)
            depth_f = tf.to_float(depth)
            height_f = tf.to_float(height)
            width_f = tf.to_float(width)
            # Number of disparity interpolated.
            out_depth = out_size[0]
            out_height = out_size[1]
            out_width = out_size[2]
            zero = tf.zeros([], dtype='int32')
            # 0 <= z < depth, 0 <= y < height & 0 <= x < width.
            max_z = tf.to_int32(tf.shape(im)[1] - 1)
            max_y = tf.to_int32(tf.shape(im)[2] - 1)
            max_x = tf.to_int32(tf.shape(im)[3] - 1)

            # Converts scale indices from [-1, 1] to [0, width/height/depth].
            #to be precise, we should actually be mapping to [-0.5, S-0.5]

            cube_size = z_far - z_near
            half_size = cube_size / 2.0

            #x = tfpy.summarize_tensor(x, 'x') #ranges (-2/3, 2/3) then (-2.5, 2.5) then (-1, 1)
            #y = tfpy.summarize_tensor(y, 'y') #ranges ()
            #z = tfpy.summarize_tensor(z, 'z') #ranges (-0.5, 0.5) then it changes

            #raise Exception, 'bad'

            #centers seems to be the correct mapping mode for most use cases
            mapping_mode = 'centers'

            if mapping_mode == 'default':
                x = (x + 1) * (width_f) / 2.0
                y = (y + 1) * (height_f) / 2.0
                z = (z + 1) * (depth_f) / 2.0
            elif mapping_mode == 'corners':
                x = (x + 1) * (width_f) / 2.0 - 0.5
                y = (y + 1) * (height_f) / 2.0 - 0.5
                z = (z + 1) * (depth_f) / 2.0 - 0.5
            elif mapping_mode == 'centers':
                x = (x + 1) * (width_f - 1.0) / 2.0
                y = (y + 1) * (height_f - 1.0) / 2.0
                z = (z + 1) * (depth_f - 1.0) / 2.0

            #z = (z + half_size) * (depth_f) / cube_size

            #x = tfpy.summarize_tensor(x, 'x') #(-21, 150)
            #z = tfpy.summarize_tensor(z, 'z') #(0, 128)

            x0 = tf.to_int32(tf.floor(x))
            x1 = x0 + 1
            y0 = tf.to_int32(tf.floor(y))
            y1 = y0 + 1
            z0 = tf.to_int32(tf.floor(z))
            z1 = z0 + 1

            x0_clip = tf.clip_by_value(x0, zero, max_x)
            x1_clip = tf.clip_by_value(x1, zero, max_x)
            y0_clip = tf.clip_by_value(y0, zero, max_y)
            y1_clip = tf.clip_by_value(y1, zero, max_y)
            z0_clip = tf.clip_by_value(z0, zero, max_z)
            z1_clip = tf.clip_by_value(z1, zero, max_z)
            dim3 = width
            dim2 = width * height
            dim1 = width * height * depth

            #repeat can only be run on cpu
            if True:
                base = _repeat(
                    tf.range(num_batch) * dim1,
                    out_depth * out_height * out_width)
            else:
                base = tf.constant(
                    np.concatenate([
                        np.array([i] * out_depth * out_height * out_width)
                        for i in range(const.BS)
                    ]).astype(np.int32))

            #only works for bs = 1
            #base = tf.zeros((out_depth * out_height * out_width), dtype=tf.int32)

            base_z0_y0 = base + z0_clip * dim2 + y0_clip * dim3
            base_z0_y1 = base + z0_clip * dim2 + y1_clip * dim3
            base_z1_y0 = base + z1_clip * dim2 + y0_clip * dim3
            base_z1_y1 = base + z1_clip * dim2 + y1_clip * dim3

            idx_z0_y0_x0 = base_z0_y0 + x0_clip
            idx_z0_y0_x1 = base_z0_y0 + x1_clip
            idx_z0_y1_x0 = base_z0_y1 + x0_clip
            idx_z0_y1_x1 = base_z0_y1 + x1_clip
            idx_z1_y0_x0 = base_z1_y0 + x0_clip
            idx_z1_y0_x1 = base_z1_y0 + x1_clip
            idx_z1_y1_x0 = base_z1_y1 + x0_clip
            idx_z1_y1_x1 = base_z1_y1 + x1_clip

            # Use indices to lookup pixels in the flat image and restore
            # channels dim
            im_flat = tf.reshape(im, tf.stack([-1, channels]))
            im_flat = tf.to_float(im_flat)
            i_z0_y0_x0 = tf.gather(im_flat, idx_z0_y0_x0)
            i_z0_y0_x1 = tf.gather(im_flat, idx_z0_y0_x1)
            i_z0_y1_x0 = tf.gather(im_flat, idx_z0_y1_x0)
            i_z0_y1_x1 = tf.gather(im_flat, idx_z0_y1_x1)
            i_z1_y0_x0 = tf.gather(im_flat, idx_z1_y0_x0)
            i_z1_y0_x1 = tf.gather(im_flat, idx_z1_y0_x1)
            i_z1_y1_x0 = tf.gather(im_flat, idx_z1_y1_x0)
            i_z1_y1_x1 = tf.gather(im_flat, idx_z1_y1_x1)

            # Finally calculate interpolated values.
            x0_f = tf.to_float(x0)
            x1_f = tf.to_float(x1)
            y0_f = tf.to_float(y0)
            y1_f = tf.to_float(y1)
            z0_f = tf.to_float(z0)
            z1_f = tf.to_float(z1)
            # Check the out-of-boundary case.
            x0_valid = tf.to_float(
                tf.less_equal(x0, max_x) & tf.greater_equal(x0, 0))
            x1_valid = tf.to_float(
                tf.less_equal(x1, max_x) & tf.greater_equal(x1, 0))
            y0_valid = tf.to_float(
                tf.less_equal(y0, max_y) & tf.greater_equal(y0, 0))
            y1_valid = tf.to_float(
                tf.less_equal(y1, max_y) & tf.greater_equal(y1, 0))
            z0_valid = tf.to_float(
                tf.less_equal(z0, max_z) & tf.greater_equal(z0, 0))
            z1_valid = tf.to_float(
                tf.less_equal(z1, max_z) & tf.greater_equal(z1, 0))

            w_z0_y0_x0 = tf.expand_dims(
                ((x1_f - x) * (y1_f - y) *
                 (z1_f - z) * x1_valid * y1_valid * z1_valid), 1)
            w_z0_y0_x1 = tf.expand_dims(
                ((x - x0_f) * (y1_f - y) *
                 (z1_f - z) * x0_valid * y1_valid * z1_valid), 1)
            w_z0_y1_x0 = tf.expand_dims(
                ((x1_f - x) * (y - y0_f) *
                 (z1_f - z) * x1_valid * y0_valid * z1_valid), 1)
            w_z0_y1_x1 = tf.expand_dims(
                ((x - x0_f) * (y - y0_f) *
                 (z1_f - z) * x0_valid * y0_valid * z1_valid), 1)
            w_z1_y0_x0 = tf.expand_dims(
                ((x1_f - x) * (y1_f - y) *
                 (z - z0_f) * x1_valid * y1_valid * z0_valid), 1)
            w_z1_y0_x1 = tf.expand_dims(
                ((x - x0_f) * (y1_f - y) *
                 (z - z0_f) * x0_valid * y1_valid * z0_valid), 1)
            w_z1_y1_x0 = tf.expand_dims(
                ((x1_f - x) * (y - y0_f) *
                 (z - z0_f) * x1_valid * y0_valid * z0_valid), 1)
            w_z1_y1_x1 = tf.expand_dims(
                ((x - x0_f) * (y - y0_f) *
                 (z - z0_f) * x0_valid * y0_valid * z0_valid), 1)

            weights_summed = (w_z0_y0_x0 + w_z0_y0_x1 + w_z0_y1_x0 +
                              w_z0_y1_x1 + w_z1_y0_x0 + w_z1_y0_x1 +
                              w_z1_y1_x0 + w_z1_y1_x1)

            output = tf.add_n([
                w_z0_y0_x0 * i_z0_y0_x0, w_z0_y0_x1 * i_z0_y0_x1,
                w_z0_y1_x0 * i_z0_y1_x0, w_z0_y1_x1 * i_z0_y1_x1,
                w_z1_y0_x0 * i_z1_y0_x0, w_z1_y0_x1 * i_z1_y0_x1,
                w_z1_y1_x0 * i_z1_y1_x0, w_z1_y1_x1 * i_z1_y1_x1
            ])

            #with tf.control_dependencies([tfpy.summarize_tensor(weights_summed, 'weights')]):
            #    output = output + 0.0

            return output
Exemple #8
0
    def _transform(theta, input_dim, out_size, z_near, z_far):
        with tf.variable_scope('_transform'):
            #num_batch = input_dim.get_shape().as_list()[0]
            num_batch = tfutil.batchdim(input_dim)

            num_channels = input_dim.get_shape().as_list()[4]
            theta = tf.reshape(theta, (-1, 4, 4))
            theta = tf.cast(theta, 'float32')

            out_depth = out_size[0]
            out_height = out_size[1]
            out_width = out_size[2]

            if do_project is True:
                grid = _meshgrid(out_depth, out_height, out_width, z_near,
                                 z_far)
            elif do_project == 'invert':
                grid = _invproj_meshgrid(out_depth, out_height, out_width,
                                         z_near, z_far)
            else:
                grid = _noproj_meshgrid(out_depth, out_height, out_width,
                                        z_near, z_far)

            grid = tf.expand_dims(grid, 0)
            grid = tf.reshape(grid, [-1])
            grid = tf.tile(grid, tf.stack([num_batch]))
            grid = tf.reshape(grid, tf.stack([num_batch, 4, -1]))

            #grid = tfpy.summarize_tensor(grid, 'grid')

            def printgrid(grid_):
                #z in 3, 5
                #x/y in 5, -5
                zs = grid_[:, 0, :]
                print '==='
                print zs.shape
                print np.mean(zs)
                print np.max(zs)
                print np.min(zs)

            #grid = tfpy.inject_callback(grid, printgrid)

            # Transform A x (x_t', y_t', 1, d_t)^T -> (x_s, y_s, z_s, 1).
            t_g = tf.matmul(theta, grid)

            #z_s = tf.slice(t_g, [0, 0, 0], [-1, 1, -1])
            #y_s = tf.slice(t_g, [0, 1, 0], [-1, 1, -1])
            #x_s = tf.slice(t_g, [0, 2, 0], [-1, 1, -1])
            #this gives a different shape, but it'll be reshaped anyway
            z_s = t_g[:, 0, :]
            y_s = t_g[:, 1, :]
            x_s = t_g[:, 2, :]

            #z_s = tfpy.summarize_tensor(z_s, 'z_s') #-1, 1
            #y_s = tfpy.summarize_tensor(y_s, 'y_s') #-1.34, 1.34
            #x_s = tfpy.summarize_tensor(x_s, 'x_s')

            z_s_flat = tf.reshape(z_s, [-1])
            y_s_flat = tf.reshape(y_s, [-1])
            x_s_flat = tf.reshape(x_s, [-1])

            input_transformed = _interpolate(input_dim, x_s_flat, y_s_flat,
                                             z_s_flat, out_size)

            output = tf.reshape(
                input_transformed,
                tf.stack([
                    num_batch, out_depth, out_height, out_width, num_channels
                ]))

            return output