def _test_op_backward_pass(self, on_gpu, dtype, tol): np.random.seed(13) data_width = 5 data_height = 4 data_channels = 3 warp_width = 2 warp_height = 6 batch_size = 3 warp = _make_warp(batch_size, warp_height, warp_width, dtype.as_numpy_dtype) data_shape = (batch_size, data_height, data_width, data_channels) data = np.random.rand(*data_shape).astype(dtype.as_numpy_dtype) with self.test_session(use_gpu=on_gpu, force_gpu=False): data_tensor = constant_op.constant(data) warp_tensor = constant_op.constant(warp) output_tensor = resampler.resampler(data=data_tensor, warp=warp_tensor) grads = test.compute_gradient([data_tensor, warp_tensor], [ data_tensor.get_shape().as_list(), warp_tensor.get_shape().as_list() ], output_tensor, output_tensor.get_shape().as_list(), [data, warp]) if not on_gpu: # On CPU we perform numerical differentiation at the best available # precision, and compare against that. This is necessary for test to # pass for float16. data_tensor_64 = constant_op.constant(data, dtype=dtypes.float64) warp_tensor_64 = constant_op.constant(warp, dtype=dtypes.float64) output_tensor_64 = resampler.resampler(data=data_tensor_64, warp=warp_tensor_64) grads_64 = test.compute_gradient( [data_tensor_64, warp_tensor_64], [ data_tensor.get_shape().as_list(), warp_tensor.get_shape().as_list() ], output_tensor_64, output_tensor.get_shape().as_list(), [data, warp]) for g, g_64 in zip(grads, grads_64): self.assertLess(np.fabs(g[0] - g_64[1]).max(), tol) else: for g in grads: self.assertLess(np.fabs(g[0] - g[1]).max(), tol)
def _test_op_forward_pass(self, on_gpu, dtype, tol): np.random.seed(0) data_width = 7 data_height = 9 data_channels = 5 warp_width = 4 warp_height = 8 batch_size = 10 warp = _make_warp(batch_size, warp_height, warp_width, dtype.as_numpy_dtype) data_shape = (batch_size, data_height, data_width, data_channels) data = np.random.rand(*data_shape).astype(dtype.as_numpy_dtype) with self.test_session(use_gpu=on_gpu, force_gpu=False) as sess: data_ph = array_ops.placeholder(dtype, shape=(None, ) + data.shape[1:]) warp_ph = array_ops.placeholder(dtype, shape=(None, ) + warp.shape[1:]) outputs = resampler.resampler(data=data_ph, warp=warp_ph) self.assertEqual(outputs.get_shape().as_list(), [None, warp_height, warp_width, data_channels]) out = sess.run(outputs, feed_dict={data_ph: data, warp_ph: warp}) # Generate reference output via bilinear interpolation in numpy reference_output = np.zeros_like(out) for batch in xrange(batch_size): for c in xrange(data_channels): reference_output[batch, :, :, c] = _bilinearly_interpolate( data[batch, :, :, c], warp[batch, :, :, 0], warp[batch, :, :, 1]) self.assertAllClose(out, reference_output, rtol=tol, atol=tol)
def bilinear_wrapper(imgs, coords): """Wrapper around bilinear sampling function, handles arbitrary input sizes. Args: imgs: are [B,H,W,C] coords: [B,H,W,2] indicating the source pixels to copy from Returns: [B,H,W,C] images after bilinear sampling from input. """ # the bilinear sampling code only handles 4D input, so we'll need to reshape init_dims = tf.shape(imgs)[:-3] end_dims_img = tf.shape(imgs)[-3:] end_dims_coords = tf.shape(coords)[-3:] prod_init_dims = tf.reduce_prod(init_dims) imgs = tf.reshape( imgs, tf.concat([prod_init_dims[tf.newaxis], end_dims_img], axis=0)) coords = tf.reshape( coords, tf.concat([prod_init_dims[tf.newaxis], end_dims_coords], axis=0)) imgs_sampled = contrib_resampler.resampler(imgs, coords) imgs_sampled = tf.reshape( imgs_sampled, tf.concat([init_dims, tf.shape(imgs_sampled)[-3:]], axis=0)) return imgs_sampled
def _test_op_forward_pass(self, on_gpu, dtype, tol): np.random.seed(0) data_width = 7 data_height = 9 data_channels = 5 warp_width = 4 warp_height = 8 batch_size = 10 warp = _make_warp(batch_size, warp_height, warp_width, dtype.as_numpy_dtype) data_shape = (batch_size, data_height, data_width, data_channels) data = np.random.rand(*data_shape).astype(dtype.as_numpy_dtype) with self.test_session(use_gpu=on_gpu, force_gpu=False) as sess: data_ph = array_ops.placeholder(dtype, shape=(None,) + data.shape[1:]) warp_ph = array_ops.placeholder(dtype, shape=(None,) + warp.shape[1:]) outputs = resampler.resampler(data=data_ph, warp=warp_ph) self.assertEqual(outputs.get_shape().as_list(), [None, warp_height, warp_width, data_channels]) out = sess.run(outputs, feed_dict={data_ph: data, warp_ph: warp}) # Generate reference output via bilinear interpolation in numpy reference_output = np.zeros_like(out) for batch in xrange(batch_size): for c in xrange(data_channels): reference_output[batch, :, :, c] = _bilinearly_interpolate( data[batch, :, :, c], warp[batch, :, :, 0], warp[batch, :, :, 1]) self.assertAllClose(out, reference_output, rtol=tol, atol=tol)
def flow_gather(source_images, flows): """Gather from a tensor of images. Args: source_images: 5D tensor of images [B, H, W, D, 3] flows: 5D tensor of x/y offsets to gather for each slice (pixel offsets) Returns: warped_imgs_reshape: 5D tensor of gathered (warped) images [B, H, W, D, 3] """ batchsize = tf.shape(source_images)[0] height = tf.shape(source_images)[1] width = tf.shape(source_images)[2] num_depths = tf.shape(source_images)[3] source_images_reshape = tf.reshape( tf.transpose(source_images, [0, 3, 1, 2, 4]), [batchsize * num_depths, height, width, 3]) flows_reshape = tf.reshape(tf.transpose(flows, [0, 3, 1, 2, 4]), [batchsize * num_depths, height, width, 2]) _, h, w = tf.meshgrid(tf.range(tf.to_float(batchsize * num_depths), dtype=tf.float32), tf.range(tf.to_float(height), dtype=tf.float32), tf.range(tf.to_float(width), dtype=tf.float32), indexing='ij') coords_y = tf.clip_by_value(h + flows_reshape[Ellipsis, 0], 0.0, tf.to_float(height)) coords_x = tf.clip_by_value(w + flows_reshape[Ellipsis, 1], 0.0, tf.to_float(width)) sampling_coords = tf.stack([coords_x, coords_y], axis=-1) warped_imgs = contrib_resampler.resampler(source_images_reshape, sampling_coords) warped_imgs_reshape = tf.transpose( tf.reshape(warped_imgs, [batchsize, num_depths, height, width, 3]), [0, 2, 3, 1, 4]) return warped_imgs_reshape
def call(self, inputs, training=False): batch_size, height, width, channels = tf.shape(inputs) if training and np.random.random() > 0.5: return UpSampling2D( (self.target_height / height, self.target_width / width), interpolation="bilinear", ) assert batch_size <= self.max_batch_size grid = self.grid[:batch_size] grid_x = self.grid_x[:batch_size] grid_y = self.grid_y[:batch_size] input_sample = UpSampling2D( (self.target_height / height, self.target_width / width), interpolation="bilinear", ) offsets = self.filters(input_sample) offsets_pos = RELU()(offsets) offsets_neg = RELU()(-offsets) offsets_pool = self.pool(offsets_pos) - self.pool(offsets_neg) offsets_grid = tf.transpose( resampler(offsets_pool, grid), [0, 2, 3, 1] ) input_offsets = tf.concat( values=[grid_x, grid_y + offsets_grid], axis=3 ) rectified_input = resampler(inputs, input_offsets) if training: return rectified_input offsets = self.filters(rectified_input) offsets_pos = RELU()(offsets) offsets_neg = RELU()(-offsets) offsets_pool = self.pool(offsets_pos) - self.pool(offsets_neg) offsets_grid += tf.transpose( resampler(offsets_pool, grid), [0, 2, 3, 1] ) input_offsets = tf.concat( values=[grid_x, grid_y + offsets_grid], axis=3 ) rectified_input = resampler(inputs, input_offsets) return inputs
def _assertForwardOpMatchesExpected(self, image_np, warp_np, expected): with self.test_session() as sess, self.test_scope(): input_image = array_ops.placeholder(image_np.dtype) warp = array_ops.placeholder(warp_np.dtype) resampled = resampler.resampler(input_image, warp, name='resampler') out = sess.run(resampled, {input_image: image_np, warp: warp_np}) self.assertAllCloseAccordingToType( expected, out, rtol=5e-3, half_rtol=1e-2, bfloat16_rtol=3e-2)
def apply_line_prediction(inputs, features, blur_steps, learn_alpha=True, name=None): """Applies "Line Prediction" layer to input images.""" inputs.shape.assert_is_compatible_with([None, None, None, 6]) with tf.name_scope(name, 'blur_prediction', values=[inputs, features]): with tf.name_scope(None, 'input_frames', values=[inputs]): frames = [inputs[:, :, :, :3], inputs[:, :, :, 3:]] with tf.name_scope(None, 'frame_size', values=[inputs, features]): shape = tf.shape(inputs) height = shape[1] width = shape[2] with tf.name_scope(None, 'identity_warp', values=[]): x_idx, y_idx = tf.meshgrid(tf.range(width), tf.range(height)) identity_warp = tf.to_float(tf.stack([x_idx, y_idx], axis=-1)) identity_warp = identity_warp[tf.newaxis, :, :, tf.newaxis, :] warp_steps = tf.to_float(tf.range(blur_steps - 1) + 1) / (blur_steps - 1) warp_steps = warp_steps[tf.newaxis, tf.newaxis, tf.newaxis, :, tf.newaxis] max_warps = tf.to_float(tf.stack([width - 1, height - 1])) max_warps = max_warps[tf.newaxis, tf.newaxis, tf.newaxis, tf.newaxis, :] output_frames = [] for frame in frames: with tf.name_scope(None, 'predict_blurs', values=[features]): flow = tf.layers.conv2d(features, 2, 1, padding='same') if learn_alpha: alpha = tf.layers.conv2d( features, blur_steps, 1, padding='same', activation=tf.nn.softmax) with tf.name_scope(None, 'apply_blurs', values=[]): with tf.name_scope(None, 'warp', values=[frame, flow]): warps = identity_warp + flow[:, :, :, tf.newaxis, :] * warp_steps warps = tf.clip_by_value(warps, 0.0, max_warps) warped = contrib_resampler.resampler(frame, warps) warped = tf.concat([frame[:, :, :, tf.newaxis, :], warped], axis=3) with tf.name_scope(None, 'apply_alpha', values=[frame, flow]): if learn_alpha: mask = alpha[:, :, :, :, tf.newaxis] else: mask = 1.0 / blur_steps output_frames.append(tf.reduce_sum(warped * mask, axis=3)) with tf.name_scope(None, 'outputs', values=[output_frames]): output = tf.add_n(output_frames) / len(frames) return output
def _test_op_backward_pass(self, on_gpu, dtype, tol): np.random.seed(13) data_width = 5 data_height = 4 data_channels = 3 warp_width = 2 warp_height = 6 batch_size = 10 warp = _make_warp(batch_size, warp_height, warp_width, dtype.as_numpy_dtype) data_shape = (batch_size, data_height, data_width, data_channels) data = np.random.rand(*data_shape).astype(dtype.as_numpy_dtype) with self.test_session(use_gpu=on_gpu, force_gpu=False): data_tensor = constant_op.constant(data) warp_tensor = constant_op.constant(warp) output_tensor = resampler.resampler(data=data_tensor, warp=warp_tensor) grads = test.compute_gradient([data_tensor, warp_tensor], [ data_tensor.get_shape().as_list(), warp_tensor.get_shape().as_list() ], output_tensor, output_tensor.get_shape().as_list(), [data, warp]) if not on_gpu: # On CPU we perform numerical differentiation at the best available # precision, and compare against that. This is necessary for test to # pass for float16. data_tensor_64 = constant_op.constant(data, dtype=dtypes.float64) warp_tensor_64 = constant_op.constant(warp, dtype=dtypes.float64) output_tensor_64 = resampler.resampler(data=data_tensor_64, warp=warp_tensor_64) grads_64 = test.compute_gradient([data_tensor_64, warp_tensor_64], [ data_tensor.get_shape().as_list(), warp_tensor.get_shape().as_list() ], output_tensor_64, output_tensor.get_shape().as_list(), [data, warp]) for g, g_64 in zip(grads, grads_64): self.assertLess(np.fabs(g[0] - g_64[1]).max(), tol) else: for g in grads: self.assertLess(np.fabs(g[0] - g[1]).max(), tol)
def rotate_pano_horizontally(input_feature_map, yaw_angle): """Rotates input_feature_map by yaw_angle by horizontally translating pixels. The layer is differentiable with respect to yaw_angle and input_feature_map. yaw_angle is positive for CCW rotation about the z-axis where the coordinates are constructed with z-axis facing up. Args: input_feature_map: panoramic image or neural feature maps of shape [B, H, W, C]. yaw_angle: A tensor of shape `[B]` which represents the desired rotation of input_feature_map. yaw_angle is in units of radians. A positive yaw_angle rolls pixels left. Returns: A rotated feature map with dimensions `[B, H, W, C]` Reference: [1]: 'Spatial Transformer Networks', Jaderberg et. al, (https://arxiv.org/abs/1506.02025) """ # Number of input dimensions. tfshape = tf.shape(input_feature_map) batch_size = tfshape[0] height = tfshape[1] width = tfshape[2] float32_width = tf.cast(width, dtype=tf.float32) float32_height = tf.cast(height, dtype=tf.float32) x_offset = (yaw_angle / 2 / np.pi) * float32_width x_grid = tf.linspace(0., float32_width - 1, width) # (W) # 0.5 * original_image_width to match the convention described in comment x_pixel_coord = x_grid[tf.newaxis] + x_offset[:, tf.newaxis] # (B, W) x_pixel_coord = tf.tile(x_pixel_coord[:, tf.newaxis, :], [1, height, 1]) # (B, H, W) y_pixel_coord = tf.linspace(0., float32_height - 1, height)[tf.newaxis, :, tf.newaxis] # (1, H, 1) y_pixel_coord = tf.tile(y_pixel_coord, [batch_size, 1, width]) wrapped_x_pixel_coord = tf.floormod(x_pixel_coord, float32_width) # Because these are panoramas, we can concatenate the first column to the # right side. This allows us to interpolate values for coordinates that # correspond to pixels that connects the left and right edges of the # panorama. input_feature_map = tf.concat( [input_feature_map, input_feature_map[:, :, :1]], axis=2) return resampler.resampler( input_feature_map, tf.stack([wrapped_x_pixel_coord, y_pixel_coord], axis=-1))
def call(self, inputs): inputs_shape = tf.shape(inputs) assert len(inputs_shape) == 4 assert inputs_shape[2] == 1 height = inputs_shape[2] width = inputs_shape[3] widths = np.arange(width) * 2.0 / (width - 1) - 1 idx = int(npr.rand() * len(widths)) if idx > 0 and idx < width - 1: beta = npr.rand() / 4.0 previous = beta * widths[idx] + (1 - beta) * widths[idx - 1] current = beta * widths[idx - 1] + (1 - beta) * widths[idx] widths[idx - 1] = previous widths[idx] = current grid = np.meshgrid(widths, height, indexing="ij") grid = np.stack(grid, axis=-1) grid = np.transpose(grid, (1, 0, 2)) grid = np.expand_dims(grid, 0) grid = np.tile(grid, [inputs_shape[0], 1, 1, 1]) self.grid = tf.Variable(grid) inputs_offset = resampler(inputs, self.grid) return inputs_offset
def projective_inverse_warp(img, depth, pose, intrinsics, ret_flows=False): """Inverse warp a source image to the target image plane based on projection. Args: img: the source image [batch, height_s, width_s, 3] depth: depth map of the target image [batch, height_t, width_t] pose: target to source camera transformation matrix [batch, 4, 4] intrinsics: camera intrinsics [batch, 3, 3] ret_flows: whether to return the displacements/flows as well Returns: Source image inverse warped to the target image plane [batch, height_t, width_t, 3] """ num_depths = tf.shape(depth)[0] batch = tf.to_int32(tf.shape(img)[0] / num_depths) height = tf.shape(img)[1] width = tf.shape(img)[2] # Construct pixel grid coordinates pixel_coords = meshgrid_abs(batch * num_depths, height, width) # Convert pixel coordinates to the camera frame cam_coords = pixel2cam(depth, pixel_coords, intrinsics) # Construct a 4x4 intrinsic matrix (TODO: can it be 3x4?) filler = tf.constant([0.0, 0.0, 0.0, 1.0], shape=[1, 1, 4]) filler = tf.tile(filler, [batch * num_depths, 1, 1]) intrinsics = tf.concat( [intrinsics, tf.zeros([batch * num_depths, 3, 1])], axis=2) intrinsics = tf.concat([intrinsics, filler], axis=1) # Get a 4x4 transformation matrix from 'target' camera frame to 'source' # pixel frame. proj_tgt_cam_to_src_pixel = tf.matmul(intrinsics, pose) src_pixel_coords = cam2pixel(cam_coords, proj_tgt_cam_to_src_pixel) output_img = contrib_resampler.resampler(img, src_pixel_coords) if ret_flows: return output_img, src_pixel_coords - cam_coords else: return output_img
def rgbd_consistency_loss(frame1transformed_depth, frame1rgb, frame2depth, frame2rgb): """Computes a loss that penalizes RGB and depth inconsistencies betwen frames. This function computes 3 losses that penalize inconsistencies between two frames: depth, RGB, and structural similarity. It IS NOT SYMMETRIC with respect to both frames. In particular, to address occlusions, it only penalizes depth and RGB inconsistencies at pixels where frame1 is closer to the camera than frame2. (Why? see https://arxiv.org/abs/1904.04998). Therefore the intended usage pattern is running it twice - second time with the two frames swapped. Args: frame1transformed_depth: A transform_depth_map.TransformedDepthMap object representing the depth map of frame 1 after it was motion-transformed to frame 2, a motion transform that accounts for all camera and object motion that occurred between frame1 and frame2. The tensors inside frame1transformed_depth are of shape [B, H, W]. frame1rgb: A tf.Tensor of shape [B, H, W, C] containing the RGB image at frame1. frame2depth: A tf.Tensor of shape [B, H, W] containing the depth map at frame2. frame2rgb: A tf.Tensor of shape [B, H, W, C] containing the RGB image at frame2. Returns: A dicionary from string to tf.Tensor, with the following entries: depth_error: A tf scalar, the depth mismatch error between the two frames. rgb_error: A tf scalar, the rgb mismatch error between the two frames. ssim_error: A tf scalar, the strictural similarity mismatch error between the two frames. depth_proximity_weight: A tf.Tensor of shape [B, H, W], representing a function that peaks (at 1.0) for pixels where there is depth consistency between the two frames, and is small otherwise. frame1_closer_to_camera: A tf.Tensor of shape [B, H, W, 1], a mask that is 1.0 when the depth map of frame 1 has smaller depth than frame 2. """ pixel_xy = frame1transformed_depth.pixel_xy frame2depth_resampled = _resample_depth(frame2depth, pixel_xy) frame2rgb_resampled = contrib_resampler.resampler( frame2rgb, pixel_xy) # f1td.depth is the predicted depth at [pixel_y, pixel_x] for frame2. Now we # generate (by interpolation) the actual depth values for frame2's depth, at # the same locations, so that we can compare the two depths. # We penalize inconsistencies between the two frames' depth maps only if the # transformed depth map (of frame 1) falls closer to the camera than the # actual depth map (of frame 2). This is intended for avoiding penalizing # points that become occluded because of the transform. # So what about depth inconsistencies where frame1's depth map is FARTHER from # the camera than frame2's? These will be handled when we swap the roles of # frame 1 and 2 (more in https://arxiv.org/abs/1904.04998). frame1_closer_to_camera = tf.to_float( tf.logical_and( frame1transformed_depth.mask, tf.less(frame1transformed_depth.depth, frame2depth_resampled))) depth_error = tf.reduce_mean( tf.abs(frame2depth_resampled - frame1transformed_depth.depth) * frame1_closer_to_camera) rgb_error = ( tf.abs(frame2rgb_resampled - frame1rgb) * tf.expand_dims( frame1_closer_to_camera, -1)) rgb_error = tf.reduce_mean(rgb_error) # We generate a weight function that peaks (at 1.0) for pixels where when the # depth difference is less than its standard deviation across the frame, and # fall off to zero otherwise. This function is used later for weighing the # structural similarity loss term. We only want to demand structural # similarity for surfaces that are close to one another in the two frames. depth_error_second_moment = _weighted_average( tf.square(frame2depth_resampled - frame1transformed_depth.depth), frame1_closer_to_camera) + 1e-4 depth_proximity_weight = ( depth_error_second_moment / (tf.square(frame2depth_resampled - frame1transformed_depth.depth) + depth_error_second_moment) * tf.to_float(frame1transformed_depth.mask)) # If we don't stop the gradient training won't start. The reason is presumably # that then the network can push the depths apart instead of seeking RGB # consistency. depth_proximity_weight = tf.stop_gradient(depth_proximity_weight) ssim_error, avg_weight = weighted_ssim( frame2rgb_resampled, frame1rgb, depth_proximity_weight, c1=float('inf'), # These values of c1 and c2 work better than defaults. c2=9e-6) ssim_error = tf.reduce_mean(ssim_error * avg_weight) endpoints = { 'depth_error': depth_error, 'rgb_error': rgb_error, 'ssim_error': ssim_error, 'depth_proximity_weight': depth_proximity_weight, 'frame1_closer_to_camera': frame1_closer_to_camera } return endpoints
def test_op_errors(self): data_width = 7 data_height = 9 data_depth = 3 data_channels = 5 warp_width = 4 warp_height = 8 batch_size = 10 # Input data shape is not defined over a 2D grid, i.e. its shape is not like # (batch_size, data_height, data_width, data_channels). with self.test_session() as sess: data_shape = (batch_size, data_height, data_width, data_depth, data_channels) data = np.zeros(data_shape) warp_shape = (batch_size, warp_height, warp_width, 2) warp = np.zeros(warp_shape) outputs = resampler.resampler(constant_op.constant(data), constant_op.constant(warp)) with self.assertRaisesRegexp( errors_impl.UnimplementedError, "Only bilinear interpolation is currently " "supported."): sess.run(outputs) # Warp tensor must be at least a matrix, with shape [batch_size, 2]. with self.test_session() as sess: data_shape = (batch_size, data_height, data_width, data_channels) data = np.zeros(data_shape) warp_shape = (batch_size, ) warp = np.zeros(warp_shape) outputs = resampler.resampler(constant_op.constant(data), constant_op.constant(warp)) with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, "warp should be at least a matrix"): sess.run(outputs) # The batch size of the data and warp tensors must be the same. with self.test_session() as sess: data_shape = (batch_size, data_height, data_width, data_channels) data = np.zeros(data_shape) warp_shape = (batch_size + 1, warp_height, warp_width, 2) warp = np.zeros(warp_shape) outputs = resampler.resampler(constant_op.constant(data), constant_op.constant(warp)) with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, "Batch size of data and warp tensor"): sess.run(outputs) # The warp tensor must contain 2D coordinates, i.e. its shape last dimension # must be 2. with self.test_session() as sess: data_shape = (batch_size, data_height, data_width, data_channels) data = np.zeros(data_shape) warp_shape = (batch_size, warp_height, warp_width, 3) warp = np.zeros(warp_shape) outputs = resampler.resampler(constant_op.constant(data), constant_op.constant(warp)) with self.assertRaisesRegexp( errors_impl.UnimplementedError, "Only bilinear interpolation is supported, " "warping"): sess.run(outputs)
#y = bilinear_sampler(x, v, resize=True, crop=(0,4,0,4)) z = bilinear_sampler(x, v2) shape = tf.shape(x) N = shape[0] H_ = H = shape[1] W_ = W = shape[2] h = w = 0 n, h, w = _get_grid_array(N, H, W, h, w) # [N, H, W, 3] stacked = tf.stack([h, w], 3) #stacked = tf.expand_dims(stacked, axis=0) # [N, H, W, 1] #stacked = stacked+v stacked = tf.squeeze(stacked, [4]) # [1, 2, 3, 1] stacked = stacked + v z2 = resampler(x, stacked) z2 = tf.transpose(z2, [0, 2, 1, 3]) with tf.Session() as sess: #add coords #z3 = resampler(x_,) x_, z_, n, h, w, stacked, z2 = sess.run([x, z, n, h, w, stacked, z2]) #h+=2 #w+=2 print(n) print('............') print(h)
def bspline_warp(cps, image, degree, regularization=0, pano_pad=False): """Differentiable 2D alignment of a stack of nearby panoramas. Entry point for regularized b-spline surface warp with appropriate handling for boundary padding of panoramas. Includes the image resampling operation. Args: cps: Control points [bsz, H_CP, W_CP, d] defining the deformations. image: An image tensor [bsz, H, W, 3] from which we sample deformed coordinates. degree: Defines the degree of the b-spline interpolation. regularization: A float ranging from [0, 1] that smooths the extremes of the control points. The effect is that the network has some leeway in fitting the original control points exactly. pano_pad: When true pads the image and uses a cyclical horizontal warp. Useful for warping panorama images. Returns: A warped image based on deformations specified by control points at various positions. Has shape [bsz, H, W, d] Raises: ValueError: If degree is greater than 4 or num_knots - 1, or less than 0. InvalidArgumentError: If positions are not in the right range. """ if regularization < 0 or regularization > 1: raise ValueError("b-spline regularization must be between [0, 1]") if regularization > 0.: # Regularizing constraint on the local structure of control points. # New control points is: # regularization * ave_neighbor + (1-regularization) * cp cps_down = tf.concat([cps[:, 1:], cps[:, -1:]], axis=1) cps_up = tf.concat([cps[:, :1], cps[:, :-1]], axis=1) if pano_pad: cps_left = tf.roll(cps, shift=1, axis=2) cps_right = tf.roll(cps, shift=-1, axis=2) else: cps_left = tf.concat([cps[:, :, :1], cps[:, :, :-1]], axis=2) cps_right = tf.concat([cps[:, :, 1:], cps[:, :, -1:]], axis=2) cps_reg = (cps_left + cps_right + cps_up + cps_down) / 4. cps = cps * (1 - regularization) + cps_reg * (regularization) tf.summary.image("cps_h", cps[Ellipsis, :1]) tf.summary.image("cps_w", cps[Ellipsis, 1:]) batch_size, small_h, small_w, unused_d = cps.shape.as_list() unused_batch_size, big_h, big_w, unused_d = image.shape.as_list() # Control points are "normalized" in the sense that they're agnostic to the # resolution of the image being warped. cps = cps * np.array([big_h, big_w]) y_coord = tf.linspace(0., small_h - 3 - 1e-4, big_h - 4) y_coord = tf.concat( [tf.zeros([2]), y_coord, tf.ones([2]) * (small_h - 3 - 1e-4)], axis=0) y_coord = y_coord[:, tf.newaxis] if pano_pad: x_coord = tf.linspace(0., small_w + 1 - 1e-4, big_w)[tf.newaxis, :] else: x_coord = tf.linspace(0., small_w - 3 - 1e-4, big_w - 4) x_coord = tf.concat( [tf.zeros([ 2, ]), x_coord, tf.ones([ 2, ]) * (small_w - 3 - 1e-4)], axis=0) x_coord = x_coord[tf.newaxis, :] y_coord += tf.zeros_like(x_coord) x_coord += tf.zeros_like(y_coord) stacked_coords = tf.stack([y_coord, x_coord], axis=-1)[tf.newaxis] stacked_coords = tf.tile(stacked_coords, [batch_size, 1, 1, 1]) estimated_offsets = interpolate_2d(cps, stacked_coords, degree, [False, pano_pad]) tf.summary.image("y_flowfield", estimated_offsets[Ellipsis, :1]) tf.summary.image("x_flowfield", estimated_offsets[Ellipsis, 1:]) y_coord_sample = tf.range(0., big_h, 1)[:, tf.newaxis] x_coord_sample = tf.range(0., big_w, 1)[tf.newaxis, :] y_coord_sample += tf.zeros_like(x_coord_sample) x_coord_sample += tf.zeros_like(y_coord_sample) y_coord_sample += estimated_offsets[Ellipsis, 0] x_coord_sample += estimated_offsets[Ellipsis, 1] y_clipped = tf.clip_by_value(y_coord_sample, 0, big_h - 1) if pano_pad: x_clipped = tf.floormod(x_coord_sample, big_w) image = tf.concat([image, image[:, :, :1]], axis=2) else: x_clipped = tf.clip_by_value(x_coord_sample, 0, big_w - 1) stacked_resampler_coords = tf.stack([x_clipped, y_clipped], axis=-1) return contrib_resampler.resampler(image, stacked_resampler_coords)
Y1 = tf.reshape(Y, [1500 * 2100]) X1 = tf.reshape(X, [1500 * 2100]) pts = tf.stack([X1, Y1], axis=1) warp_pts = tf.matmul(pts, rotation) warp_Y = warp_pts[:, 1] warp_X = warp_pts[:, 0] warp_Y1 = tf.reshape(warp_Y, [1500, 2100]) warp_X1 = tf.reshape(warp_X, [1500, 2100]) warp_Y2 = tf.clip_by_value(warp_Y1, 0, 1500) warp_X2 = tf.clip_by_value(warp_X1, 0, 2100) warp_combined = tf.stack([warp_X2, warp_Y2], axis=2) warp = tf.cast(warp_combined, tf.float32) warp_inp = tf.expand_dims(warp, 0) resampled = tf.cast(resampler(data_inp, warp_inp), tf.uint8) # Start a new session to show example output. with tf.Session() as sess: # Required to get the filename matching to run. tf.global_variables_initializer().run() # Coordinate the loading of image files. coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) # Get an image tensor and print its value. output = sess.run(resampled) print output[0].shape plt.figure()
def test_op_errors(self): data_width = 7 data_height = 9 data_depth = 3 data_channels = 5 warp_width = 4 warp_height = 8 batch_size = 10 # Input data shape is not defined over a 2D grid, i.e. its shape is not like # (batch_size, data_height, data_width, data_channels). with self.test_session() as sess: data_shape = (batch_size, data_height, data_width, data_depth, data_channels) data = np.zeros(data_shape) warp_shape = (batch_size, warp_height, warp_width, 2) warp = np.zeros(warp_shape) outputs = resampler.resampler(constant_op.constant(data), constant_op.constant(warp)) with self.assertRaisesRegexp(errors_impl.UnimplementedError, "Only bilinear interpolation is currently " "supported."): sess.run(outputs) # Warp tensor must be at least a matrix, with shape [batch_size, 2]. with self.test_session() as sess: data_shape = (batch_size, data_height, data_width, data_channels) data = np.zeros(data_shape) warp_shape = (batch_size,) warp = np.zeros(warp_shape) outputs = resampler.resampler(constant_op.constant(data), constant_op.constant(warp)) with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, "warp should be at least a matrix"): sess.run(outputs) # The batch size of the data and warp tensors must be the same. with self.test_session() as sess: data_shape = (batch_size, data_height, data_width, data_channels) data = np.zeros(data_shape) warp_shape = (batch_size+1, warp_height, warp_width, 2) warp = np.zeros(warp_shape) outputs = resampler.resampler(constant_op.constant(data), constant_op.constant(warp)) with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, "Batch size of data and warp tensor"): sess.run(outputs) # The warp tensor must contain 2D coordinates, i.e. its shape last dimension # must be 2. with self.test_session() as sess: data_shape = (batch_size, data_height, data_width, data_channels) data = np.zeros(data_shape) warp_shape = (batch_size, warp_height, warp_width, 3) warp = np.zeros(warp_shape) outputs = resampler.resampler(constant_op.constant(data), constant_op.constant(warp)) with self.assertRaisesRegexp(errors_impl.UnimplementedError, "Only bilinear interpolation is supported, " "warping"): sess.run(outputs)
def motion_field_consistency_loss(frame1transformed_pixelxy, mask, rotation1, translation1, rotation2, translation2): """Computes a cycle consistency loss between two motion maps. Given two rotation and translation maps (of two frames), and a mapping from one frame to the other, this function assists in imposing that the fields at frame 1 represent the opposite motion of the ones in frame 2. In other words: At any given pixel on frame 1, if we apply the translation and rotation designated at that pixel, we land on some pixel in frame 2, and if we apply the translation and rotation designated there, we land back at the original pixel at frame 1. Args: frame1transformed_pixelxy: A tf.Tensor of shape [B, H, W, 2] representing the motion-transformed location of each pixel in frame 1. It is assumed (but not verified) that frame1transformed_pixelxy was obtained by properly applying rotation1 and translation1 on the depth map of frame 1. mask: A tf.Tensor of shape [B, H, W, 2] expressing the weight of each pixel in the calculation of the consistency loss. rotation1: A tf.Tensor of shape [B, 3] representing rotation angles. translation1: A tf.Tensor of shape [B, H, W, 3] representing translation vectors. rotation2: A tf.Tensor of shape [B, 3] representing rotation angles. translation2: A tf.Tensor of shape [B, H, W, 3] representing translation vectors. Returns: A dicionary from string to tf.Tensor, with the following entries: rotation_error: A tf scalar, the rotation consistency error. translation_error: A tf scalar, the translation consistency error. """ translation2resampled = contrib_resampler.resampler( translation2, tf.stop_gradient(frame1transformed_pixelxy)) rotation1field = tf.broadcast_to( _expand_dims_twice(rotation1, -2), tf.shape(translation1)) rotation2field = tf.broadcast_to( _expand_dims_twice(rotation2, -2), tf.shape(translation2)) rotation1matrix = transform_utils.matrix_from_angles(rotation1field) rotation2matrix = transform_utils.matrix_from_angles(rotation2field) rot_unit, trans_zero = transform_utils.combine( rotation2matrix, translation2resampled, rotation1matrix, translation1) eye = tf.eye(3, batch_shape=tf.shape(rot_unit)[:-2]) transform_utils.matrix_from_angles(rotation1field) # Delete this later transform_utils.matrix_from_angles(rotation2field) # Delete this later # We normalize the product of rotations by the product of their norms, to make # the loss agnostic of their magnitudes, only wanting them to be opposite in # directions. Otherwise the loss has a tendency to drive the rotations to # zero. rot_error = tf.reduce_mean(tf.square(rot_unit - eye), axis=(3, 4)) rot1_scale = tf.reduce_mean(tf.square(rotation1matrix - eye), axis=(3, 4)) rot2_scale = tf.reduce_mean(tf.square(rotation2matrix - eye), axis=(3, 4)) rot_error /= (1e-24 + rot1_scale + rot2_scale) rotation_error = tf.reduce_mean(rot_error) def norm(x): return tf.reduce_sum(tf.square(x), axis=-1) # Here again, we normalize by the magnitudes, for the same reason. translation_error = tf.reduce_mean( mask * norm(trans_zero) / (1e-24 + norm(translation1) + norm(translation2))) return { 'rotation_error': rotation_error, 'translation_error': translation_error }
def _resample_depth(depth, coordinates): depth = tf.expand_dims(depth, -1) result = contrib_resampler.resampler(depth, coordinates) return tf.squeeze(result, axis=3)