def true_fn(images, flow, mask): # Get a random new resolution to which the images will be scaled. orig_height = tf.shape(images)[-3] orig_width = tf.shape(images)[-2] new_height, new_width, _ = _get_random_scaled_resolution( orig_height=orig_height, orig_width=orig_width, min_scale=min_scale, max_scale=max_scale, max_strech=max_strech, probability_strech=probability_strech) # rescale the images (and flow) images = smurf_utils.resize(images, new_height, new_width, is_flow=False) if flow is not None: flow, mask = smurf_utils.resize(flow, new_height, new_width, is_flow=True, mask=mask) return images, flow, mask
def parse_train_supervised(self, proto, height, width, resize_gt_flow): """Parse proto from byte-encoding to the correct type and shape. Args: proto: Encoded data in proto / tf-sequence-example format. height: Desired image height. width: Desired image width. resize_gt_flow: Indicates if ground truth flow should be resized. Returns: A dictionary containing: 'images': a sequence of tf.Tensor images 'flow': a ground truth flow field in uv format 'flow_valid': a mask indicating which pixels have ground truth flow """ parsed_data = self.parse_eval(proto) images = parsed_data['images'] flow_uv = parsed_data['flow'] mask_valid = parsed_data['flow_valid'] # Resize images and flow. if height is not None and width is not None: images = smurf_utils.resize(images, height, width, is_flow=False) if resize_gt_flow: flow_uv, mask_valid = smurf_utils.resize(flow_uv, height, width, is_flow=True, mask=mask_valid) return {'images': images, 'flow': flow_uv, 'flow_valid': mask_valid}
def parse_train(self, proto, height, width): """Parse features from byte-encoding to the correct type and shape. Args: proto: Encoded data in proto / tf-sequence-example. height: int, desired image height. width: int, desired image width. Returns: A sequence of images as tf.Tensor of shape [2, height, width, 3]. """ _, sequence_parsed = tf.io.parse_single_sequence_example( proto, context_features=self._context_features, sequence_features=self._sequence_features) # Deserialize images to float32 tensors. images = tf.map_fn( _deserialize_png, sequence_parsed['images'], dtype=tf.float32) # Resize images. if height is not None and width is not None: images = smurf_utils.resize(images, height, width, is_flow=False) return {'images': images}
def test_resize_sparse_flow(self): flow = tf.constant( [[[1, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]], [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]], [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]], [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]], [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]], [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]], [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]], [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]]], dtype=tf.float32) mask = tf.constant([[[1], [0], [0], [0], [0], [0], [0], [0]], [[0], [0], [0], [0], [0], [0], [0], [0]], [[0], [0], [0], [0], [0], [0], [0], [0]], [[0], [0], [0], [0], [0], [0], [0], [0]], [[0], [0], [0], [0], [0], [0], [0], [0]], [[0], [0], [0], [0], [0], [0], [0], [0]], [[0], [0], [0], [0], [0], [0], [0], [0]], [[0], [0], [0], [0], [0], [0], [0], [0]]], dtype=tf.float32) flow_result = tf.constant([[[0.25, 0], [0, 0]], [[0, 0], [0, 0]]], dtype=tf.float32) mask_result = tf.constant([[[1], [0]], [[0], [0]]], dtype=tf.float32) flow_resized, mask_resized = smurf_utils.resize(flow, 2, 2, is_flow=True, mask=mask) flow_okay = tf.reduce_all(tf.math.equal(flow_resized, flow_result)).numpy() mask_okay = tf.reduce_all(tf.math.equal(mask_resized, mask_result)).numpy() self.assertTrue(flow_okay) self.assertTrue(mask_okay)
def true_fn(images, flow, mask): # choose a random scale factor and compute new resolution orig_height = tf.shape(images)[-3] orig_width = tf.shape(images)[-2] new_height, new_width, scale = _get_random_scaled_resolution( orig_height=orig_height, orig_width=orig_width, min_scale=min_scale, max_scale=max_scale, max_strech=0.0, probability_strech=0.0) # rescale only the second image image_1, image_2 = tf.unstack(images) image_2 = smurf_utils.resize(image_2, new_height, new_width, is_flow=False) # Crop either first or second image to have matching dimensions if scale < 1.0: image_1 = _center_crop(image_1, new_height, new_width) else: image_2 = _center_crop(image_2, orig_height, orig_width) images = tf.stack([image_1, image_2]) if flow is not None: # get current locations (with the origin in the image center) positions = _positions_center_origin(orig_height, orig_width) # compute scale factor of the actual new image resolution scale_flow_h = tf.cast(new_height, tf.float32) / tf.cast( orig_height, tf.float32) scale_flow_w = tf.cast(new_width, tf.float32) / tf.cast( orig_width, tf.float32) scale_flow = tf.stack([scale_flow_h, scale_flow_w]) # compute augmented flow (multiply by mask to zero invalid flow locations) flow = ((positions + flow) * scale_flow - positions) * mask if scale < 1.0: # in case we downsample the image we crop the reference image to keep # the same shape flow = _center_crop(flow, new_height, new_width) mask = _center_crop(mask, new_height, new_width) return images, flow, mask
def transform(images, is_flow, crop_height, crop_width, resize): height = images.shape[-3] width = images.shape[-2] op5 = tf.compat.v1.assert_greater( height, 2 * crop_height, message='Image height is too small for cropping.') op6 = tf.compat.v1.assert_greater( width, 2 * crop_width, message='Image width is too small for cropping.') with tf.control_dependencies([op5, op6]): images = images[:, crop_height:height - crop_height, crop_width:width - crop_width, :] if resize: images = smurf_utils.resize(images, height, width, is_flow=is_flow) images.set_shape((images.shape[0], height, width, images.shape[3])) else: images.set_shape((images.shape[0], height - 2 * crop_height, width - 2 * crop_width, images.shape[3])) return images
def parse_data(proto, height, width): """Parse features from byte-encoding to the correct type and shape. Args: proto: Encoded data in proto / tf-sequence-example format. height: int, desired image height. width: int, desired image width. Returns: A sequence of images as tf.Tensor of shape [sequence length, height, width, 3]. """ # Parse context and image sequence from protobuffer. unused_context_parsed, sequence_parsed = tf.io.parse_single_sequence_example( proto, context_features={ 'height': tf.io.FixedLenFeature([], tf.int64), 'width': tf.io.FixedLenFeature([], tf.int64) }, sequence_features={ 'images': tf.io.FixedLenSequenceFeature([], tf.string) }) # Deserialize images to float32 tensors. def deserialize(image_raw): image_uint = tf.image.decode_png(image_raw) image_float = tf.image.convert_image_dtype(image_uint, tf.float32) return image_float images = tf.map_fn(deserialize, sequence_parsed['images'], dtype=tf.float32) # Resize images. if height is not None and width is not None: images = smurf_utils.resize(images, height, width, is_flow=False) return images
def random_crop(images, flow, mask, crop_height, crop_width, relative_offset, probability_crop_offset): """Performs a random crop with the given height and width.""" # early return if crop_height or crop_width is not specified if crop_height is None or crop_width is None: return images, flow, mask orig_height = tf.shape(images)[-3] orig_width = tf.shape(images)[-2] # check if crop size fits the image size scale = 1.0 ratio = tf.cast(crop_height, tf.float32) / tf.cast(orig_height, tf.float32) scale = tf.math.maximum(scale, ratio) ratio = tf.cast(crop_width, tf.float32) / tf.cast(orig_width, tf.float32) scale = tf.math.maximum(scale, ratio) # compute minimum required hight new_height = tf.cast( tf.math.ceil(tf.cast(orig_height, tf.float32) * scale), tf.int32) new_width = tf.cast( tf.math.ceil(tf.cast(orig_width, tf.float32) * scale), tf.int32) # perform resize (scales with 1 if not required) images = smurf_utils.resize(images, new_height, new_width, is_flow=False) # compute joint offset max_offset_h = new_height - tf.cast(crop_height, dtype=tf.int32) max_offset_w = new_width - tf.cast(crop_width, dtype=tf.int32) joint_offset_h = tf.random.uniform([], maxval=max_offset_h+1, dtype=tf.int32) joint_offset_w = tf.random.uniform([], maxval=max_offset_w+1, dtype=tf.int32) # compute relative offset min_relative_offset_h = tf.math.maximum( joint_offset_h - relative_offset, 0) max_relative_offset_h = tf.math.minimum( joint_offset_h + relative_offset, max_offset_h) min_relative_offset_w = tf.math.maximum( joint_offset_w - relative_offset, 0) max_relative_offset_w = tf.math.minimum( joint_offset_w + relative_offset, max_offset_w) relative_offset_h = tf.random.uniform( [], minval=min_relative_offset_h, maxval=max_relative_offset_h+1, dtype=tf.int32) relative_offset_w = tf.random.uniform( [], minval=min_relative_offset_w, maxval=max_relative_offset_w+1, dtype=tf.int32) set_crop_offset = tf.random.uniform([]) < probability_crop_offset relative_offset_h = tf.cond( set_crop_offset, lambda: relative_offset_h, lambda: joint_offset_h) relative_offset_w = tf.cond( set_crop_offset, lambda: relative_offset_w, lambda: joint_offset_w) # crop both images image_1, image_2 = tf.unstack(images) image_1 = tf.image.crop_to_bounding_box( image_1, offset_height=joint_offset_h, offset_width=joint_offset_w, target_height=crop_height, target_width=crop_width) image_2 = tf.image.crop_to_bounding_box( image_2, offset_height=relative_offset_h, offset_width=relative_offset_w, target_height=crop_height, target_width=crop_width) images = tf.stack([image_1, image_2]) if flow is not None: # perform resize (scales with 1 if not required) flow, mask = smurf_utils.resize( flow, new_height, new_width, is_flow=True, mask=mask) # crop flow and mask flow = tf.image.crop_to_bounding_box( flow, offset_height=joint_offset_h, offset_width=joint_offset_w, target_height=crop_height, target_width=crop_width) mask = tf.image.crop_to_bounding_box( mask, offset_height=joint_offset_h, offset_width=joint_offset_w, target_height=crop_height, target_width=crop_width) # correct flow for relative shift (/crop) flow_delta = tf.stack( [tf.cast(relative_offset_h - joint_offset_h, tf.float32), tf.cast(relative_offset_w - joint_offset_w, tf.float32)]) flow = (flow - flow_delta) * mask return images, flow, mask, joint_offset_h, joint_offset_w
def batch_infer_no_tf_function(self, images, input_height=None, input_width=None, resize_flow_to_img_res=True, infer_occlusion=False, infer_bw=False): """Infer flow for two images. Args: images: tf.tensor of shape [batchsize, 2, height, width, 3]. input_height: height at which the model should be applied if different from image height. input_width: width at which the model should be applied if different from image width resize_flow_to_img_res: bool, if True, return the flow resized to the same resolution as (image1, image2). If False, return flow at the whatever resolution the model natively predicts it. infer_occlusion: bool, if True, return both flow and a soft occlusion mask, else return just flow. infer_bw: bool, if True, return flow in the reverse direction Returns: Optical flow for each pixel in image1 pointing to image2. """ orig_height, orig_width = images.shape[-3:-1] if input_height is None: input_height = orig_height if input_width is None: input_width = orig_width # Ensure a feasible computation resolution. If specified size is not # feasible with the model, change it to a slightly higher resolution. if self._flow_architecture == 'pwc': divisible_by_num = pow(2.0, self._num_levels) elif self._flow_architecture == 'raft': divisible_by_num = 8.0 else: divisible_by_num = 1. if (input_height % divisible_by_num != 0 or input_width % divisible_by_num != 0): print('Cannot process images at a resolution of '+str(input_height)+ 'x'+str(input_width)+', since the height and/or width is not a ' 'multiple of '+str(divisible_by_num)+'.') # compute a feasible resolution input_height = int( math.ceil(float(input_height) / divisible_by_num) * divisible_by_num) input_width = int( math.ceil(float(input_width) / divisible_by_num) * divisible_by_num) print('Inference will be run at a resolution of '+str(input_height)+ 'x'+str(input_width)+'.') # Resize images to desired input height and width. if input_height != orig_height or input_width != orig_width: images = smurf_utils.resize( images, input_height, input_width, is_flow=False) feature_dict = self._feature_model( images[:, 0], images[:, 1], bidirectional=infer_occlusion) # Compute flow in frame of image1. # noinspection PyCallingNonCallable flow = self._flow_model(feature_dict, training=False)[0] if infer_occlusion or infer_bw: # noinspection PyCallingNonCallable flow_backward = self._flow_model( feature_dict, training=False, backward=True)[0] occlusion_mask = self.infer_occlusion(flow, flow_backward) occlusion_mask = smurf_utils.resize( occlusion_mask, orig_height, orig_width, is_flow=False) # Resize and rescale flow to original resolution. This always needs to be # done because flow is generated at a lower resolution. if resize_flow_to_img_res: flow = smurf_utils.resize(flow, orig_height, orig_width, is_flow=True) if infer_bw: flow_backward = smurf_utils.resize(flow_backward, orig_height, orig_width, is_flow=True) # TODO: A dictionary or object output here would be preferable to tuples. if infer_occlusion and infer_bw: return flow, occlusion_mask, flow_backward if infer_bw: return flow, flow_backward if infer_occlusion: return flow, occlusion_mask return flow
def parse_data(proto, include_flow, height=None, width=None, include_occlusion=False, include_invalid=False, resize_gt_flow=True, include_image_path=False, gt_flow_shape=None, include_segments=False): """Parse a data proto with flow. Args: proto: path to data proto file include_flow: bool, whether or not to include flow in the output height: int or None height to resize image to width: int or None width to resize image to include_occlusion: bool, whether or not to also return occluded pixels (will throw error if occluded pixels are not present) include_invalid: bool, whether or not to also return invalid pixels (will throw error if invalid pixels are not present) resize_gt_flow: bool, wether or not to resize flow ground truth as the image include_image_path: bool, if True, return the string for the key "image1_path" alongside the data. gt_flow_shape: list, shape of the original ground truth flow (only required to set a fixed ground truth flow shape for tensorflow estimator in case of supervised training at full resolution resize_gt_flow=False) include_segments: bool, if True, include the Sintel segmentation data. Returns: images, flow: A tuple of (image1, image2), flow """ # Parse context and image sequence from protobuffer. context_features = { 'height': tf.io.FixedLenFeature([], tf.int64), 'width': tf.io.FixedLenFeature([], tf.int64), } sequence_features = { 'images': tf.io.FixedLenSequenceFeature([], tf.string), } if include_invalid: sequence_features['invalid_masks'] = tf.io.FixedLenSequenceFeature( [], tf.string) if include_segments: sequence_features['segments'] = tf.io.FixedLenSequenceFeature( [], tf.string) sequence_features['segments_invalid'] = tf.io.FixedLenSequenceFeature( [], tf.string) if include_image_path: context_features['image1_path'] = tf.io.FixedLenFeature((), tf.string) if include_flow: context_features['flow_uv'] = tf.io.FixedLenFeature([], tf.string) if include_occlusion: context_features['occlusion_mask'] = tf.io.FixedLenFeature([], tf.string) context_parsed, sequence_parsed = tf.io.parse_single_sequence_example( proto, context_features=context_features, sequence_features=sequence_features, ) def deserialize(s, dtype, dims): return tf.reshape( tf.io.decode_raw(s, dtype), [context_parsed['height'], context_parsed['width'], dims]) images = tf.map_fn(lambda s: deserialize(s, tf.uint8, 3), sequence_parsed['images'], dtype=tf.uint8) images = tf.image.convert_image_dtype(images, tf.float32) if height is not None and width is not None: images = smurf_utils.resize(images, height, width, is_flow=False) output = {'images': images} if include_flow: flow_uv = deserialize(context_parsed['flow_uv'], tf.float32, 2) flow_uv = flow_uv[Ellipsis, ::-1] # Flying things has some images with erroneously large flow. # Mask out any values above / below 1000. invalid_cond = tf.math.logical_or(tf.greater(flow_uv, 1000), tf.less(flow_uv, -1000)) mask = tf.where(invalid_cond, tf.zeros_like(flow_uv), tf.ones_like(flow_uv)) flow_valid = tf.reduce_min(mask, axis=-1, keepdims=True) if height is not None and width is not None and resize_gt_flow: flow_uv = smurf_utils.resize(flow_uv, height, width, is_flow=True) flow_valid = smurf_utils.resize(flow_valid, height, width, is_flow=False) else: if gt_flow_shape is not None: flow_uv.set_shape(gt_flow_shape) flow_valid.set_shape((gt_flow_shape[0], gt_flow_shape[1], 1)) # To be consistent with SMURF internals, we flip the ordering of flow. # create valid mask flow_valid = tf.ones_like(flow_uv[Ellipsis, :1], dtype=tf.float32) output['flow_valid'] = flow_valid output['flow'] = flow_uv if include_occlusion: occlusion_mask = deserialize(context_parsed['occlusion_mask'], tf.uint8, 1) if height is not None and width is not None: occlusion_mask = smurf_utils.resize(occlusion_mask, height, width, is_flow=False) output['occlusions'] = occlusion_mask if include_invalid: invalid_masks = tf.map_fn(lambda s: deserialize(s, tf.uint8, 1), sequence_parsed['invalid_masks'], dtype=tf.uint8) if height is not None and width is not None: invalid_masks = smurf_utils.resize(invalid_masks, height, width, is_flow=False) output['flow_valid'] = 1. - invalid_masks if include_image_path: output['image1_path'] = context_parsed['image1_path'] if include_segments: segments = tf.map_fn(lambda s: deserialize(s, tf.uint8, 3), sequence_parsed['segments'], dtype=tf.uint8) segments = tf.image.convert_image_dtype(segments, tf.float32) segments_invalid = tf.map_fn(lambda s: deserialize(s, tf.uint8, 1), sequence_parsed['segments_invalid'], dtype=tf.uint8) segments_invalid = tf.image.convert_image_dtype( segments_invalid, tf.float32) segments = tf.image.resize(segments, (height, width), method='nearest') segments_invalid = tf.image.resize(segments_invalid, (height, width), method='nearest') output['segments'] = segments output['segments_invalid'] = segments_invalid return output
def parse_supervised_train_data(proto, height, width, resize_gt_flow): """Parse proto from byte-encoding to the correct type and shape. Args: proto: Encoded data in proto / tf-sequence-example format. height: int, desired image height. width: int, desired image width. resize_gt_flow: bool, wether or not to resize flow according to the images Returns: A tuple of tf.Tensors for images, flow_uv, flow_valid, where uv represents the flow field and valid a mask for which entries are valid (this uses the occ version that includes all flow vectors). The images and the corresponding flow field are resized to the specified [height, width]. """ # Reuse the evaluation parser to parse the supervised data. data_dict = parse_eval_data(proto) images = data_dict['images'] flow_uv_occ = data_dict['flow_uv_occ'] flow_valid_occ = data_dict['flow_valid_occ'] flow_valid_occ = tf.cast(flow_valid_occ, tf.float32) if not resize_gt_flow or height is None or width is None: # Crop to a size that fits all KITTI 2015 image resolutions. Because the # first 156 sequences have a resolution of 375x1242,the remaining 44 # sequences include resolutions of 370x1224, 374x1238, and 376x1241. _, orig_height, orig_width, _ = tf.unstack(tf.shape(images)) offset_height = tf.cast((orig_height - 370) / 2, tf.int32) offset_width = tf.cast((orig_width - 1224) / 2, tf.int32) images = tf.image.crop_to_bounding_box(images, offset_height=offset_height, offset_width=offset_width, target_height=370, target_width=1224) flow_uv_occ = tf.image.crop_to_bounding_box( flow_uv_occ, offset_height=offset_height, offset_width=offset_width, target_height=370, target_width=1224) flow_valid_occ = tf.image.crop_to_bounding_box( flow_valid_occ, offset_height=offset_height, offset_width=offset_width, target_height=370, target_width=1224) # resize images if height is not None and width is not None: images = smurf_utils.resize(images, height, width, is_flow=False) if resize_gt_flow and height is not None and width is not None: # resize flow and swap label order flow_uv, flow_valid = smurf_utils.resize(flow_uv_occ[Ellipsis, ::-1], height, width, is_flow=True, mask=flow_valid_occ) else: # only swap label order flow_uv = flow_uv_occ[Ellipsis, ::-1] flow_valid = flow_valid_occ # set shape to work with tf estimator flow_uv.set_shape([370, 1224, 2]) flow_valid.set_shape([370, 1224, 1]) return {'images': images, 'flow': flow_uv, 'flow_valid': flow_valid}