def project_voxel(voxel): rotmat = tf.constant(get_transform_matrix(0.0, 0.0), dtype=tf.float32) rotmat = tf.reshape(rotmat, (1, 4, 4)) rotmat = tf.tile(rotmat, tf.stack([tfutil.batchdim(voxel), 1, 1])) return transformer( voxel, tf.reshape(rotmat, tf.stack([tfutil.batchdim(voxel), 16])), (const.S, const.S, const.S), const.NEAR_PLANE, const.FAR_PLANE, do_project=True, )
def voxel2depth_aligned(voxel): voxel = tf.squeeze(voxel, axis=4) imgshape = tf.stack([tfutil.batchdim(voxel), const.S, const.S, 1]) costgrid = tf.cast( tf.tile(tf.reshape(tf.range(0, const.S), (1, 1, 1, const.S)), imgshape), tf.float32) invalid = 1000 * tf.cast(voxel < 0.5, dtype=tf.float32) invalid_mask = tf.tile( tf.reshape(tf.constant([1.0] * (const.S - 1) + [0.0], tf.float32), (1, 1, 1, const.S)), imgshape) costgrid = costgrid + invalid * invalid_mask depth = tf.expand_dims(tf.argmin(costgrid, axis=3), axis=3) #convert back to (3,5) depth = tf.cast(depth, tf.float32) #depth += 0.5 #0.5 to 127.5 #we don't add 0.5 because assume the surface is at the boundary #of two voxels depth /= const.S #almost 0.0 to 1.0 depth *= const.FAR_PLANE - const.NEAR_PLANE #almost 0.0 to 2.0 depth += const.NEAR_PLANE #about 3.0 to 5.0 return depth
def rotate_voxel(voxel, rotmat): return transformer( voxel, tf.reshape(rotmat, tf.stack([tfutil.batchdim(voxel), 16])), (const.S, const.S, const.S), const.NEAR_PLANE, const.FAR_PLANE, do_project=False, )
def unproject_voxel(voxel): #need to rewrite this #how does projection work? #input: voxels 128^3, output: voxels 128^3 #first generate the meshgrid: (-1,1) for x and y, (3,5) for z #since X = xz/fx, next multiply x by z in the meshgrid, giving xz #finally, apply projectino matrix, which adds on the 1/fx factor #projection matrix also subtracts displacement, so we now have: #X = xz/fx, Y = yz/fy #Z = z-4 #all these coordinates should be approximately within (-1,1)^3 #convert to 0, 128, and tf gather from input grid to get output grid ##### #now here is the strategy for unprojection. #start meshgrid with (-1,1) for X and Y, and (3,5) for z #since x = Xfx/Z, next divide X by z giving us X/z (by passing 'invert' to get_transform matrix #apply unprojectino matrix, which adds on fx factor (by passing 'invert' to transformer) #now, everything after this step should be the same. rotmat = tf.constant(get_transform_matrix(0.0, 0.0, invert_focal=True), dtype=tf.float32) rotmat = tf.reshape(rotmat, (1, 4, 4)) rotmat = tf.tile(rotmat, tf.stack([tfutil.batchdim(voxel), 1, 1])) voxel = transformer( voxel, tf.reshape(rotmat, tf.stack([tfutil.batchdim(voxel), 16])), (const.S, const.S, const.S), const.NEAR_PLANE, const.FAR_PLANE, do_project='invert', ) voxel = tf.reverse(voxel, axis=[2, 3]) return voxel
def unproject(inputs): inputs = tf.image.resize_images(inputs, (const.S, const.S)) #now unproject, to get our starting point inputs = voxel.unproject_image(inputs) #in addition, add on a z-map, and a local bias #copied from components.py meshgridz = tf.range(const.S, dtype=tf.float32) meshgridz = tf.reshape(meshgridz, (1, const.S, 1, 1)) meshgridz = tf.tile( meshgridz, tf.stack([tfutil.batchdim(inputs), 1, const.S, const.S])) meshgridz = tf.expand_dims(meshgridz, axis=4) meshgridz = (meshgridz + 0.5) / (const.S / 2.0) - 1.0 #now (-1,1) #get the rough outline unprojected_mask = tf.expand_dims(inputs[:, :, :, :, 0], 4) unprojected_depth = tf.expand_dims(inputs[:, :, :, :, 1], 4) outline_thickness = 0.1 outline = tf.cast( tf.logical_and(unprojected_depth <= meshgridz, unprojected_depth + 0.1 > meshgridz), tf.float32) outline *= unprojected_mask if const.DEBUG_UNPROJECT: #return tf.expand_dims(inputs[:,:,:,:,0], 4) #this is the unprojected mask return outline if const.USE_LOCAL_BIAS: bias = tf.get_variable("voxelnet_bias", dtype=tf.float32, shape=[1, const.S, const.S, const.S, 1], initializer=tf.zeros_initializer()) bias = tf.tile(bias, (bs, 1, 1, 1, 1)) inputs_ = [inputs, meshgridz] if const.USE_LOCAL_BIAS: inputs_.append(bias) if const.USE_OUTLINE: inputs_.append(outline) inputs = tf.concat(inputs_, axis=4) return inputs
def rotate_and_project_voxel(voxel, rotmat): r = tfutil.rank(voxel) assert r in [4, 5] if r == 4: voxel = tf.expand_dims(voxel, axis=4) voxel = transformer_preprocess(voxel) out = transformer( voxel, tf.reshape(rotmat, tf.stack([tfutil.batchdim(voxel), 16])), (const.S, const.S, const.S), const.NEAR_PLANE, const.FAR_PLANE, ) out = transformer_postprocess(out) if r == 4: out = tf.squeeze(out, axis=4) return out
def _interpolate(im, x, y, z, out_size): """Bilinear interploation layer. Args: im: A 5D tensor of size [num_batch, depth, height, width, num_channels]. It is the input volume for the transformation layer (tf.float32). x: A tensor of size [num_batch, out_depth, out_height, out_width] representing the inverse coordinate mapping for x (tf.float32). y: A tensor of size [num_batch, out_depth, out_height, out_width] representing the inverse coordinate mapping for y (tf.float32). z: A tensor of size [num_batch, out_depth, out_height, out_width] representing the inverse coordinate mapping for z (tf.float32). out_size: A tuple representing the output size of transformation layer (float). Returns: A transformed tensor (tf.float32). """ with tf.variable_scope('_interpolate'): #num_batch = im.get_shape().as_list()[0] num_batch = tfutil.batchdim(im) depth = im.get_shape().as_list()[1] height = im.get_shape().as_list()[2] width = im.get_shape().as_list()[3] channels = im.get_shape().as_list()[4] x = tf.to_float(x) y = tf.to_float(y) z = tf.to_float(z) depth_f = tf.to_float(depth) height_f = tf.to_float(height) width_f = tf.to_float(width) # Number of disparity interpolated. out_depth = out_size[0] out_height = out_size[1] out_width = out_size[2] zero = tf.zeros([], dtype='int32') # 0 <= z < depth, 0 <= y < height & 0 <= x < width. max_z = tf.to_int32(tf.shape(im)[1] - 1) max_y = tf.to_int32(tf.shape(im)[2] - 1) max_x = tf.to_int32(tf.shape(im)[3] - 1) # Converts scale indices from [-1, 1] to [0, width/height/depth]. #to be precise, we should actually be mapping to [-0.5, S-0.5] cube_size = z_far - z_near half_size = cube_size / 2.0 #x = tfpy.summarize_tensor(x, 'x') #ranges (-2/3, 2/3) then (-2.5, 2.5) then (-1, 1) #y = tfpy.summarize_tensor(y, 'y') #ranges () #z = tfpy.summarize_tensor(z, 'z') #ranges (-0.5, 0.5) then it changes #raise Exception, 'bad' #centers seems to be the correct mapping mode for most use cases mapping_mode = 'centers' if mapping_mode == 'default': x = (x + 1) * (width_f) / 2.0 y = (y + 1) * (height_f) / 2.0 z = (z + 1) * (depth_f) / 2.0 elif mapping_mode == 'corners': x = (x + 1) * (width_f) / 2.0 - 0.5 y = (y + 1) * (height_f) / 2.0 - 0.5 z = (z + 1) * (depth_f) / 2.0 - 0.5 elif mapping_mode == 'centers': x = (x + 1) * (width_f - 1.0) / 2.0 y = (y + 1) * (height_f - 1.0) / 2.0 z = (z + 1) * (depth_f - 1.0) / 2.0 #z = (z + half_size) * (depth_f) / cube_size #x = tfpy.summarize_tensor(x, 'x') #(-21, 150) #z = tfpy.summarize_tensor(z, 'z') #(0, 128) x0 = tf.to_int32(tf.floor(x)) x1 = x0 + 1 y0 = tf.to_int32(tf.floor(y)) y1 = y0 + 1 z0 = tf.to_int32(tf.floor(z)) z1 = z0 + 1 x0_clip = tf.clip_by_value(x0, zero, max_x) x1_clip = tf.clip_by_value(x1, zero, max_x) y0_clip = tf.clip_by_value(y0, zero, max_y) y1_clip = tf.clip_by_value(y1, zero, max_y) z0_clip = tf.clip_by_value(z0, zero, max_z) z1_clip = tf.clip_by_value(z1, zero, max_z) dim3 = width dim2 = width * height dim1 = width * height * depth #repeat can only be run on cpu if True: base = _repeat( tf.range(num_batch) * dim1, out_depth * out_height * out_width) else: base = tf.constant( np.concatenate([ np.array([i] * out_depth * out_height * out_width) for i in range(const.BS) ]).astype(np.int32)) #only works for bs = 1 #base = tf.zeros((out_depth * out_height * out_width), dtype=tf.int32) base_z0_y0 = base + z0_clip * dim2 + y0_clip * dim3 base_z0_y1 = base + z0_clip * dim2 + y1_clip * dim3 base_z1_y0 = base + z1_clip * dim2 + y0_clip * dim3 base_z1_y1 = base + z1_clip * dim2 + y1_clip * dim3 idx_z0_y0_x0 = base_z0_y0 + x0_clip idx_z0_y0_x1 = base_z0_y0 + x1_clip idx_z0_y1_x0 = base_z0_y1 + x0_clip idx_z0_y1_x1 = base_z0_y1 + x1_clip idx_z1_y0_x0 = base_z1_y0 + x0_clip idx_z1_y0_x1 = base_z1_y0 + x1_clip idx_z1_y1_x0 = base_z1_y1 + x0_clip idx_z1_y1_x1 = base_z1_y1 + x1_clip # Use indices to lookup pixels in the flat image and restore # channels dim im_flat = tf.reshape(im, tf.stack([-1, channels])) im_flat = tf.to_float(im_flat) i_z0_y0_x0 = tf.gather(im_flat, idx_z0_y0_x0) i_z0_y0_x1 = tf.gather(im_flat, idx_z0_y0_x1) i_z0_y1_x0 = tf.gather(im_flat, idx_z0_y1_x0) i_z0_y1_x1 = tf.gather(im_flat, idx_z0_y1_x1) i_z1_y0_x0 = tf.gather(im_flat, idx_z1_y0_x0) i_z1_y0_x1 = tf.gather(im_flat, idx_z1_y0_x1) i_z1_y1_x0 = tf.gather(im_flat, idx_z1_y1_x0) i_z1_y1_x1 = tf.gather(im_flat, idx_z1_y1_x1) # Finally calculate interpolated values. x0_f = tf.to_float(x0) x1_f = tf.to_float(x1) y0_f = tf.to_float(y0) y1_f = tf.to_float(y1) z0_f = tf.to_float(z0) z1_f = tf.to_float(z1) # Check the out-of-boundary case. x0_valid = tf.to_float( tf.less_equal(x0, max_x) & tf.greater_equal(x0, 0)) x1_valid = tf.to_float( tf.less_equal(x1, max_x) & tf.greater_equal(x1, 0)) y0_valid = tf.to_float( tf.less_equal(y0, max_y) & tf.greater_equal(y0, 0)) y1_valid = tf.to_float( tf.less_equal(y1, max_y) & tf.greater_equal(y1, 0)) z0_valid = tf.to_float( tf.less_equal(z0, max_z) & tf.greater_equal(z0, 0)) z1_valid = tf.to_float( tf.less_equal(z1, max_z) & tf.greater_equal(z1, 0)) w_z0_y0_x0 = tf.expand_dims( ((x1_f - x) * (y1_f - y) * (z1_f - z) * x1_valid * y1_valid * z1_valid), 1) w_z0_y0_x1 = tf.expand_dims( ((x - x0_f) * (y1_f - y) * (z1_f - z) * x0_valid * y1_valid * z1_valid), 1) w_z0_y1_x0 = tf.expand_dims( ((x1_f - x) * (y - y0_f) * (z1_f - z) * x1_valid * y0_valid * z1_valid), 1) w_z0_y1_x1 = tf.expand_dims( ((x - x0_f) * (y - y0_f) * (z1_f - z) * x0_valid * y0_valid * z1_valid), 1) w_z1_y0_x0 = tf.expand_dims( ((x1_f - x) * (y1_f - y) * (z - z0_f) * x1_valid * y1_valid * z0_valid), 1) w_z1_y0_x1 = tf.expand_dims( ((x - x0_f) * (y1_f - y) * (z - z0_f) * x0_valid * y1_valid * z0_valid), 1) w_z1_y1_x0 = tf.expand_dims( ((x1_f - x) * (y - y0_f) * (z - z0_f) * x1_valid * y0_valid * z0_valid), 1) w_z1_y1_x1 = tf.expand_dims( ((x - x0_f) * (y - y0_f) * (z - z0_f) * x0_valid * y0_valid * z0_valid), 1) weights_summed = (w_z0_y0_x0 + w_z0_y0_x1 + w_z0_y1_x0 + w_z0_y1_x1 + w_z1_y0_x0 + w_z1_y0_x1 + w_z1_y1_x0 + w_z1_y1_x1) output = tf.add_n([ w_z0_y0_x0 * i_z0_y0_x0, w_z0_y0_x1 * i_z0_y0_x1, w_z0_y1_x0 * i_z0_y1_x0, w_z0_y1_x1 * i_z0_y1_x1, w_z1_y0_x0 * i_z1_y0_x0, w_z1_y0_x1 * i_z1_y0_x1, w_z1_y1_x0 * i_z1_y1_x0, w_z1_y1_x1 * i_z1_y1_x1 ]) #with tf.control_dependencies([tfpy.summarize_tensor(weights_summed, 'weights')]): # output = output + 0.0 return output
def _transform(theta, input_dim, out_size, z_near, z_far): with tf.variable_scope('_transform'): #num_batch = input_dim.get_shape().as_list()[0] num_batch = tfutil.batchdim(input_dim) num_channels = input_dim.get_shape().as_list()[4] theta = tf.reshape(theta, (-1, 4, 4)) theta = tf.cast(theta, 'float32') out_depth = out_size[0] out_height = out_size[1] out_width = out_size[2] if do_project is True: grid = _meshgrid(out_depth, out_height, out_width, z_near, z_far) elif do_project == 'invert': grid = _invproj_meshgrid(out_depth, out_height, out_width, z_near, z_far) else: grid = _noproj_meshgrid(out_depth, out_height, out_width, z_near, z_far) grid = tf.expand_dims(grid, 0) grid = tf.reshape(grid, [-1]) grid = tf.tile(grid, tf.stack([num_batch])) grid = tf.reshape(grid, tf.stack([num_batch, 4, -1])) #grid = tfpy.summarize_tensor(grid, 'grid') def printgrid(grid_): #z in 3, 5 #x/y in 5, -5 zs = grid_[:, 0, :] print '===' print zs.shape print np.mean(zs) print np.max(zs) print np.min(zs) #grid = tfpy.inject_callback(grid, printgrid) # Transform A x (x_t', y_t', 1, d_t)^T -> (x_s, y_s, z_s, 1). t_g = tf.matmul(theta, grid) #z_s = tf.slice(t_g, [0, 0, 0], [-1, 1, -1]) #y_s = tf.slice(t_g, [0, 1, 0], [-1, 1, -1]) #x_s = tf.slice(t_g, [0, 2, 0], [-1, 1, -1]) #this gives a different shape, but it'll be reshaped anyway z_s = t_g[:, 0, :] y_s = t_g[:, 1, :] x_s = t_g[:, 2, :] #z_s = tfpy.summarize_tensor(z_s, 'z_s') #-1, 1 #y_s = tfpy.summarize_tensor(y_s, 'y_s') #-1.34, 1.34 #x_s = tfpy.summarize_tensor(x_s, 'x_s') z_s_flat = tf.reshape(z_s, [-1]) y_s_flat = tf.reshape(y_s, [-1]) x_s_flat = tf.reshape(x_s, [-1]) input_transformed = _interpolate(input_dim, x_s_flat, y_s_flat, z_s_flat, out_size) output = tf.reshape( input_transformed, tf.stack([ num_batch, out_depth, out_height, out_width, num_channels ])) return output