Example #1
0
    def true_fn(images, flow, mask):
        # Get a random new resolution to which the images will be scaled.
        orig_height = tf.shape(images)[-3]
        orig_width = tf.shape(images)[-2]
        new_height, new_width, _ = _get_random_scaled_resolution(
            orig_height=orig_height,
            orig_width=orig_width,
            min_scale=min_scale,
            max_scale=max_scale,
            max_strech=max_strech,
            probability_strech=probability_strech)

        # rescale the images (and flow)
        images = smurf_utils.resize(images,
                                    new_height,
                                    new_width,
                                    is_flow=False)

        if flow is not None:
            flow, mask = smurf_utils.resize(flow,
                                            new_height,
                                            new_width,
                                            is_flow=True,
                                            mask=mask)
        return images, flow, mask
    def parse_train_supervised(self, proto, height, width, resize_gt_flow):
        """Parse proto from byte-encoding to the correct type and shape.

    Args:
      proto: Encoded data in proto / tf-sequence-example format.
      height: Desired image height.
      width: Desired image width.
      resize_gt_flow: Indicates if ground truth flow should be resized.

    Returns:
      A dictionary containing:
        'images': a sequence of tf.Tensor images
        'flow': a ground truth flow field in uv format
        'flow_valid': a mask indicating which pixels have ground truth flow
    """
        parsed_data = self.parse_eval(proto)
        images = parsed_data['images']
        flow_uv = parsed_data['flow']
        mask_valid = parsed_data['flow_valid']

        # Resize images and flow.
        if height is not None and width is not None:
            images = smurf_utils.resize(images, height, width, is_flow=False)
            if resize_gt_flow:
                flow_uv, mask_valid = smurf_utils.resize(flow_uv,
                                                         height,
                                                         width,
                                                         is_flow=True,
                                                         mask=mask_valid)

        return {'images': images, 'flow': flow_uv, 'flow_valid': mask_valid}
  def parse_train(self, proto, height, width):
    """Parse features from byte-encoding to the correct type and shape.

    Args:
      proto: Encoded data in proto / tf-sequence-example.
      height: int, desired image height.
      width: int, desired image width.

    Returns:
      A sequence of images as tf.Tensor of shape [2, height, width, 3].
    """
    _, sequence_parsed = tf.io.parse_single_sequence_example(
        proto,
        context_features=self._context_features,
        sequence_features=self._sequence_features)

    # Deserialize images to float32 tensors.
    images = tf.map_fn(
        _deserialize_png, sequence_parsed['images'], dtype=tf.float32)

    # Resize images.
    if height is not None and width is not None:
      images = smurf_utils.resize(images, height, width, is_flow=False)

    return {'images': images}
 def test_resize_sparse_flow(self):
     flow = tf.constant(
         [[[1, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]],
          [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]],
          [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]],
          [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]],
          [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]],
          [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]],
          [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]],
          [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]]],
         dtype=tf.float32)
     mask = tf.constant([[[1], [0], [0], [0], [0], [0], [0], [0]],
                         [[0], [0], [0], [0], [0], [0], [0], [0]],
                         [[0], [0], [0], [0], [0], [0], [0], [0]],
                         [[0], [0], [0], [0], [0], [0], [0], [0]],
                         [[0], [0], [0], [0], [0], [0], [0], [0]],
                         [[0], [0], [0], [0], [0], [0], [0], [0]],
                         [[0], [0], [0], [0], [0], [0], [0], [0]],
                         [[0], [0], [0], [0], [0], [0], [0], [0]]],
                        dtype=tf.float32)
     flow_result = tf.constant([[[0.25, 0], [0, 0]], [[0, 0], [0, 0]]],
                               dtype=tf.float32)
     mask_result = tf.constant([[[1], [0]], [[0], [0]]], dtype=tf.float32)
     flow_resized, mask_resized = smurf_utils.resize(flow,
                                                     2,
                                                     2,
                                                     is_flow=True,
                                                     mask=mask)
     flow_okay = tf.reduce_all(tf.math.equal(flow_resized,
                                             flow_result)).numpy()
     mask_okay = tf.reduce_all(tf.math.equal(mask_resized,
                                             mask_result)).numpy()
     self.assertTrue(flow_okay)
     self.assertTrue(mask_okay)
Example #5
0
    def true_fn(images, flow, mask):
        # choose a random scale factor and compute new resolution
        orig_height = tf.shape(images)[-3]
        orig_width = tf.shape(images)[-2]
        new_height, new_width, scale = _get_random_scaled_resolution(
            orig_height=orig_height,
            orig_width=orig_width,
            min_scale=min_scale,
            max_scale=max_scale,
            max_strech=0.0,
            probability_strech=0.0)

        # rescale only the second image
        image_1, image_2 = tf.unstack(images)
        image_2 = smurf_utils.resize(image_2,
                                     new_height,
                                     new_width,
                                     is_flow=False)
        # Crop either first or second image to have matching dimensions
        if scale < 1.0:
            image_1 = _center_crop(image_1, new_height, new_width)
        else:
            image_2 = _center_crop(image_2, orig_height, orig_width)
        images = tf.stack([image_1, image_2])

        if flow is not None:
            # get current locations (with the origin in the image center)
            positions = _positions_center_origin(orig_height, orig_width)

            # compute scale factor of the actual new image resolution
            scale_flow_h = tf.cast(new_height, tf.float32) / tf.cast(
                orig_height, tf.float32)
            scale_flow_w = tf.cast(new_width, tf.float32) / tf.cast(
                orig_width, tf.float32)
            scale_flow = tf.stack([scale_flow_h, scale_flow_w])

            # compute augmented flow (multiply by mask to zero invalid flow locations)
            flow = ((positions + flow) * scale_flow - positions) * mask

            if scale < 1.0:
                # in case we downsample the image we crop the reference image to keep
                # the same shape
                flow = _center_crop(flow, new_height, new_width)
                mask = _center_crop(mask, new_height, new_width)
        return images, flow, mask
  def transform(images, is_flow, crop_height, crop_width, resize):

    height = images.shape[-3]
    width = images.shape[-2]

    op5 = tf.compat.v1.assert_greater(
        height,
        2 * crop_height,
        message='Image height is too small for cropping.')
    op6 = tf.compat.v1.assert_greater(
        width, 2 * crop_width, message='Image width is too small for cropping.')
    with tf.control_dependencies([op5, op6]):
      images = images[:, crop_height:height - crop_height,
                      crop_width:width - crop_width, :]
    if resize:
      images = smurf_utils.resize(images, height, width, is_flow=is_flow)
      images.set_shape((images.shape[0], height, width, images.shape[3]))
    else:
      images.set_shape((images.shape[0], height - 2 * crop_height,
                        width - 2 * crop_width, images.shape[3]))
    return images
Example #7
0
def parse_data(proto, height, width):
    """Parse features from byte-encoding to the correct type and shape.

  Args:
    proto: Encoded data in proto / tf-sequence-example format.
    height: int, desired image height.
    width: int, desired image width.

  Returns:
    A sequence of images as tf.Tensor of shape
    [sequence length, height, width, 3].
  """

    # Parse context and image sequence from protobuffer.
    unused_context_parsed, sequence_parsed = tf.io.parse_single_sequence_example(
        proto,
        context_features={
            'height': tf.io.FixedLenFeature([], tf.int64),
            'width': tf.io.FixedLenFeature([], tf.int64)
        },
        sequence_features={
            'images': tf.io.FixedLenSequenceFeature([], tf.string)
        })

    # Deserialize images to float32 tensors.
    def deserialize(image_raw):
        image_uint = tf.image.decode_png(image_raw)
        image_float = tf.image.convert_image_dtype(image_uint, tf.float32)
        return image_float

    images = tf.map_fn(deserialize,
                       sequence_parsed['images'],
                       dtype=tf.float32)

    # Resize images.
    if height is not None and width is not None:
        images = smurf_utils.resize(images, height, width, is_flow=False)

    return images
def random_crop(images,
                flow,
                mask,
                crop_height,
                crop_width,
                relative_offset,
                probability_crop_offset):
  """Performs a random crop with the given height and width."""
  # early return if crop_height or crop_width is not specified
  if crop_height is None or crop_width is None:
    return images, flow, mask

  orig_height = tf.shape(images)[-3]
  orig_width = tf.shape(images)[-2]

  # check if crop size fits the image size
  scale = 1.0
  ratio = tf.cast(crop_height, tf.float32) / tf.cast(orig_height, tf.float32)
  scale = tf.math.maximum(scale, ratio)
  ratio = tf.cast(crop_width, tf.float32) / tf.cast(orig_width, tf.float32)
  scale = tf.math.maximum(scale, ratio)
  # compute minimum required hight
  new_height = tf.cast(
      tf.math.ceil(tf.cast(orig_height, tf.float32) * scale), tf.int32)
  new_width = tf.cast(
      tf.math.ceil(tf.cast(orig_width, tf.float32) * scale), tf.int32)
  # perform resize (scales with 1 if not required)
  images = smurf_utils.resize(images, new_height, new_width, is_flow=False)

  # compute joint offset
  max_offset_h = new_height - tf.cast(crop_height, dtype=tf.int32)
  max_offset_w = new_width - tf.cast(crop_width, dtype=tf.int32)
  joint_offset_h = tf.random.uniform([], maxval=max_offset_h+1, dtype=tf.int32)
  joint_offset_w = tf.random.uniform([], maxval=max_offset_w+1, dtype=tf.int32)

  # compute relative offset
  min_relative_offset_h = tf.math.maximum(
      joint_offset_h - relative_offset, 0)
  max_relative_offset_h = tf.math.minimum(
      joint_offset_h + relative_offset, max_offset_h)
  min_relative_offset_w = tf.math.maximum(
      joint_offset_w - relative_offset, 0)
  max_relative_offset_w = tf.math.minimum(
      joint_offset_w + relative_offset, max_offset_w)

  relative_offset_h = tf.random.uniform(
      [], minval=min_relative_offset_h, maxval=max_relative_offset_h+1,
      dtype=tf.int32)
  relative_offset_w = tf.random.uniform(
      [], minval=min_relative_offset_w, maxval=max_relative_offset_w+1,
      dtype=tf.int32)

  set_crop_offset = tf.random.uniform([]) < probability_crop_offset
  relative_offset_h = tf.cond(
      set_crop_offset, lambda: relative_offset_h, lambda: joint_offset_h)
  relative_offset_w = tf.cond(
      set_crop_offset, lambda: relative_offset_w, lambda: joint_offset_w)

  # crop both images
  image_1, image_2 = tf.unstack(images)
  image_1 = tf.image.crop_to_bounding_box(
      image_1, offset_height=joint_offset_h, offset_width=joint_offset_w,
      target_height=crop_height, target_width=crop_width)
  image_2 = tf.image.crop_to_bounding_box(
      image_2, offset_height=relative_offset_h, offset_width=relative_offset_w,
      target_height=crop_height, target_width=crop_width)
  images = tf.stack([image_1, image_2])

  if flow is not None:
    # perform resize (scales with 1 if not required)
    flow, mask = smurf_utils.resize(
        flow, new_height, new_width, is_flow=True, mask=mask)

    # crop flow and mask
    flow = tf.image.crop_to_bounding_box(
        flow,
        offset_height=joint_offset_h,
        offset_width=joint_offset_w,
        target_height=crop_height,
        target_width=crop_width)
    mask = tf.image.crop_to_bounding_box(
        mask,
        offset_height=joint_offset_h,
        offset_width=joint_offset_w,
        target_height=crop_height,
        target_width=crop_width)

    # correct flow for relative shift (/crop)
    flow_delta = tf.stack(
        [tf.cast(relative_offset_h - joint_offset_h, tf.float32),
         tf.cast(relative_offset_w - joint_offset_w, tf.float32)])
    flow = (flow - flow_delta) * mask
  return images, flow, mask, joint_offset_h, joint_offset_w
Example #9
0
  def batch_infer_no_tf_function(self,
                                 images,
                                 input_height=None,
                                 input_width=None,
                                 resize_flow_to_img_res=True,
                                 infer_occlusion=False,
                                 infer_bw=False):
    """Infer flow for two images.

    Args:
      images: tf.tensor of shape [batchsize, 2, height, width, 3].
      input_height: height at which the model should be applied if different
        from image height.
      input_width: width at which the model should be applied if different from
        image width
      resize_flow_to_img_res: bool, if True, return the flow resized to the same
        resolution as (image1, image2). If False, return flow at the whatever
        resolution the model natively predicts it.
      infer_occlusion: bool, if True, return both flow and a soft occlusion
        mask, else return just flow.
      infer_bw: bool, if True, return flow in the reverse direction

    Returns:
      Optical flow for each pixel in image1 pointing to image2.
    """
    orig_height, orig_width = images.shape[-3:-1]

    if input_height is None:
      input_height = orig_height
    if input_width is None:
      input_width = orig_width

    # Ensure a feasible computation resolution. If specified size is not
    # feasible with the model, change it to a slightly higher resolution.
    if self._flow_architecture == 'pwc':
      divisible_by_num = pow(2.0, self._num_levels)
    elif self._flow_architecture == 'raft':
      divisible_by_num = 8.0
    else:
      divisible_by_num = 1.

    if (input_height % divisible_by_num != 0 or
        input_width % divisible_by_num != 0):
      print('Cannot process images at a resolution of '+str(input_height)+
            'x'+str(input_width)+', since the height and/or width is not a '
            'multiple of '+str(divisible_by_num)+'.')
      # compute a feasible resolution
      input_height = int(
          math.ceil(float(input_height) / divisible_by_num) * divisible_by_num)
      input_width = int(
          math.ceil(float(input_width) / divisible_by_num) * divisible_by_num)
      print('Inference will be run at a resolution of '+str(input_height)+
            'x'+str(input_width)+'.')

    # Resize images to desired input height and width.
    if input_height != orig_height or input_width != orig_width:
      images = smurf_utils.resize(
          images, input_height, input_width, is_flow=False)

    feature_dict = self._feature_model(
        images[:, 0], images[:, 1], bidirectional=infer_occlusion)

    # Compute flow in frame of image1.
    # noinspection PyCallingNonCallable
    flow = self._flow_model(feature_dict, training=False)[0]

    if infer_occlusion or infer_bw:
      # noinspection PyCallingNonCallable
      flow_backward = self._flow_model(
          feature_dict, training=False, backward=True)[0]
      occlusion_mask = self.infer_occlusion(flow, flow_backward)
      occlusion_mask = smurf_utils.resize(
          occlusion_mask, orig_height, orig_width, is_flow=False)

    # Resize and rescale flow to original resolution. This always needs to be
    # done because flow is generated at a lower resolution.
    if resize_flow_to_img_res:
      flow = smurf_utils.resize(flow, orig_height, orig_width, is_flow=True)
      if infer_bw:
        flow_backward = smurf_utils.resize(flow_backward, orig_height,
                                           orig_width,
                                           is_flow=True)

    # TODO: A dictionary or object output here would be preferable to tuples.
    if infer_occlusion and infer_bw:
      return flow, occlusion_mask, flow_backward

    if infer_bw:
      return flow, flow_backward

    if infer_occlusion:
      return flow, occlusion_mask

    return flow
def parse_data(proto,
               include_flow,
               height=None,
               width=None,
               include_occlusion=False,
               include_invalid=False,
               resize_gt_flow=True,
               include_image_path=False,
               gt_flow_shape=None,
               include_segments=False):
    """Parse a data proto with flow.

  Args:
    proto: path to data proto file
    include_flow: bool, whether or not to include flow in the output
    height: int or None height to resize image to
    width: int or None width to resize image to
    include_occlusion: bool, whether or not to also return occluded pixels (will
      throw error if occluded pixels are not present)
    include_invalid: bool, whether or not to also return invalid pixels (will
      throw error if invalid pixels are not present)
    resize_gt_flow: bool, wether or not to resize flow ground truth as the image
    include_image_path: bool, if True, return the string for the key
      "image1_path" alongside the data.
    gt_flow_shape: list, shape of the original ground truth flow (only required
      to set a fixed ground truth flow shape for tensorflow estimator in case of
      supervised training at full resolution resize_gt_flow=False)
    include_segments: bool, if True, include the Sintel segmentation data.

  Returns:
    images, flow: A tuple of (image1, image2), flow
  """

    # Parse context and image sequence from protobuffer.
    context_features = {
        'height': tf.io.FixedLenFeature([], tf.int64),
        'width': tf.io.FixedLenFeature([], tf.int64),
    }
    sequence_features = {
        'images': tf.io.FixedLenSequenceFeature([], tf.string),
    }

    if include_invalid:
        sequence_features['invalid_masks'] = tf.io.FixedLenSequenceFeature(
            [], tf.string)

    if include_segments:
        sequence_features['segments'] = tf.io.FixedLenSequenceFeature(
            [], tf.string)
        sequence_features['segments_invalid'] = tf.io.FixedLenSequenceFeature(
            [], tf.string)

    if include_image_path:
        context_features['image1_path'] = tf.io.FixedLenFeature((), tf.string)

    if include_flow:
        context_features['flow_uv'] = tf.io.FixedLenFeature([], tf.string)

    if include_occlusion:
        context_features['occlusion_mask'] = tf.io.FixedLenFeature([],
                                                                   tf.string)

    context_parsed, sequence_parsed = tf.io.parse_single_sequence_example(
        proto,
        context_features=context_features,
        sequence_features=sequence_features,
    )

    def deserialize(s, dtype, dims):
        return tf.reshape(
            tf.io.decode_raw(s, dtype),
            [context_parsed['height'], context_parsed['width'], dims])

    images = tf.map_fn(lambda s: deserialize(s, tf.uint8, 3),
                       sequence_parsed['images'],
                       dtype=tf.uint8)

    images = tf.image.convert_image_dtype(images, tf.float32)
    if height is not None and width is not None:
        images = smurf_utils.resize(images, height, width, is_flow=False)
    output = {'images': images}

    if include_flow:
        flow_uv = deserialize(context_parsed['flow_uv'], tf.float32, 2)
        flow_uv = flow_uv[Ellipsis, ::-1]
        # Flying things has some images with erroneously large flow.
        # Mask out any values above / below 1000.
        invalid_cond = tf.math.logical_or(tf.greater(flow_uv, 1000),
                                          tf.less(flow_uv, -1000))
        mask = tf.where(invalid_cond, tf.zeros_like(flow_uv),
                        tf.ones_like(flow_uv))
        flow_valid = tf.reduce_min(mask, axis=-1, keepdims=True)
        if height is not None and width is not None and resize_gt_flow:
            flow_uv = smurf_utils.resize(flow_uv, height, width, is_flow=True)
            flow_valid = smurf_utils.resize(flow_valid,
                                            height,
                                            width,
                                            is_flow=False)
        else:
            if gt_flow_shape is not None:
                flow_uv.set_shape(gt_flow_shape)
                flow_valid.set_shape((gt_flow_shape[0], gt_flow_shape[1], 1))
        # To be consistent with SMURF internals, we flip the ordering of flow.
        # create valid mask
        flow_valid = tf.ones_like(flow_uv[Ellipsis, :1], dtype=tf.float32)
        output['flow_valid'] = flow_valid
        output['flow'] = flow_uv

    if include_occlusion:
        occlusion_mask = deserialize(context_parsed['occlusion_mask'],
                                     tf.uint8, 1)
        if height is not None and width is not None:
            occlusion_mask = smurf_utils.resize(occlusion_mask,
                                                height,
                                                width,
                                                is_flow=False)
        output['occlusions'] = occlusion_mask

    if include_invalid:
        invalid_masks = tf.map_fn(lambda s: deserialize(s, tf.uint8, 1),
                                  sequence_parsed['invalid_masks'],
                                  dtype=tf.uint8)
        if height is not None and width is not None:
            invalid_masks = smurf_utils.resize(invalid_masks,
                                               height,
                                               width,
                                               is_flow=False)
        output['flow_valid'] = 1. - invalid_masks

    if include_image_path:
        output['image1_path'] = context_parsed['image1_path']

    if include_segments:
        segments = tf.map_fn(lambda s: deserialize(s, tf.uint8, 3),
                             sequence_parsed['segments'],
                             dtype=tf.uint8)
        segments = tf.image.convert_image_dtype(segments, tf.float32)
        segments_invalid = tf.map_fn(lambda s: deserialize(s, tf.uint8, 1),
                                     sequence_parsed['segments_invalid'],
                                     dtype=tf.uint8)
        segments_invalid = tf.image.convert_image_dtype(
            segments_invalid, tf.float32)
        segments = tf.image.resize(segments, (height, width), method='nearest')
        segments_invalid = tf.image.resize(segments_invalid, (height, width),
                                           method='nearest')
        output['segments'] = segments
        output['segments_invalid'] = segments_invalid

    return output
Example #11
0
def parse_supervised_train_data(proto, height, width, resize_gt_flow):
    """Parse proto from byte-encoding to the correct type and shape.

  Args:
    proto: Encoded data in proto / tf-sequence-example format.
    height: int, desired image height.
    width: int, desired image width.
    resize_gt_flow: bool, wether or not to resize flow according to the images

  Returns:
    A tuple of tf.Tensors for images, flow_uv, flow_valid, where uv represents
    the flow field and valid a mask for which entries are valid (this uses the
    occ version that includes all flow vectors). The images and the
    corresponding flow field are resized to the specified [height, width].
  """
    # Reuse the evaluation parser to parse the supervised data.
    data_dict = parse_eval_data(proto)
    images = data_dict['images']
    flow_uv_occ = data_dict['flow_uv_occ']
    flow_valid_occ = data_dict['flow_valid_occ']
    flow_valid_occ = tf.cast(flow_valid_occ, tf.float32)

    if not resize_gt_flow or height is None or width is None:
        # Crop to a size that fits all KITTI 2015 image resolutions. Because the
        # first 156 sequences have a resolution of 375x1242,the remaining 44
        # sequences include resolutions of 370x1224, 374x1238, and 376x1241.
        _, orig_height, orig_width, _ = tf.unstack(tf.shape(images))
        offset_height = tf.cast((orig_height - 370) / 2, tf.int32)
        offset_width = tf.cast((orig_width - 1224) / 2, tf.int32)
        images = tf.image.crop_to_bounding_box(images,
                                               offset_height=offset_height,
                                               offset_width=offset_width,
                                               target_height=370,
                                               target_width=1224)
        flow_uv_occ = tf.image.crop_to_bounding_box(
            flow_uv_occ,
            offset_height=offset_height,
            offset_width=offset_width,
            target_height=370,
            target_width=1224)
        flow_valid_occ = tf.image.crop_to_bounding_box(
            flow_valid_occ,
            offset_height=offset_height,
            offset_width=offset_width,
            target_height=370,
            target_width=1224)

    # resize images
    if height is not None and width is not None:
        images = smurf_utils.resize(images, height, width, is_flow=False)

    if resize_gt_flow and height is not None and width is not None:
        # resize flow and swap label order
        flow_uv, flow_valid = smurf_utils.resize(flow_uv_occ[Ellipsis, ::-1],
                                                 height,
                                                 width,
                                                 is_flow=True,
                                                 mask=flow_valid_occ)
    else:
        # only swap label order
        flow_uv = flow_uv_occ[Ellipsis, ::-1]
        flow_valid = flow_valid_occ
        # set shape to work with tf estimator
        flow_uv.set_shape([370, 1224, 2])
        flow_valid.set_shape([370, 1224, 1])

    return {'images': images, 'flow': flow_uv, 'flow_valid': flow_valid}