Exemple #1
0
 def test_resize_sparse_flow(self):
     flow = tf.constant(
         [[[1, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]],
          [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]],
          [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]],
          [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]],
          [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]],
          [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]],
          [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]],
          [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]]],
         dtype=tf.float32)
     mask = tf.constant([[[1], [0], [0], [0], [0], [0], [0], [0]],
                         [[0], [0], [0], [0], [0], [0], [0], [0]],
                         [[0], [0], [0], [0], [0], [0], [0], [0]],
                         [[0], [0], [0], [0], [0], [0], [0], [0]],
                         [[0], [0], [0], [0], [0], [0], [0], [0]],
                         [[0], [0], [0], [0], [0], [0], [0], [0]],
                         [[0], [0], [0], [0], [0], [0], [0], [0]],
                         [[0], [0], [0], [0], [0], [0], [0], [0]]],
                        dtype=tf.float32)
     flow_result = tf.constant([[[0.25, 0], [0, 0]], [[0, 0], [0, 0]]],
                               dtype=tf.float32)
     mask_result = tf.constant([[[1], [0]], [[0], [0]]], dtype=tf.float32)
     flow_resized, mask_resized = uflow_utils.resize(flow,
                                                     2,
                                                     2,
                                                     is_flow=True,
                                                     mask=mask)
     flow_okay = tf.reduce_all(tf.math.equal(flow_resized,
                                             flow_result)).numpy()
     mask_okay = tf.reduce_all(tf.math.equal(mask_resized,
                                             mask_result)).numpy()
     self.assertTrue(flow_okay)
     self.assertTrue(mask_okay)
Exemple #2
0
def random_scale(images, flow=None, mask=None, min_scale=1.0, max_scale=1.0):
  """Performs a random scaling in the given range."""
  # choose a random scale factor and compute new resolution
  orig_height = tf.shape(images)[-3]
  orig_width = tf.shape(images)[-2]
  scale = tf.random.uniform([],
                            minval=min_scale,
                            maxval=max_scale,
                            dtype=tf.float32)
  new_height = tf.cast(
      tf.math.ceil(tf.cast(orig_height, tf.float32) * scale), tf.int32)
  new_width = tf.cast(
      tf.math.ceil(tf.cast(orig_width, tf.float32) * scale), tf.int32)

  # rescale the images (and flow)
  images = uflow_utils.resize(images, new_height, new_width, is_flow=False)
  if flow is not None:
    flow, mask = uflow_utils.resize(
        flow, new_height, new_width, is_flow=True, mask=mask)
  return images, flow, mask
def random_scale_second(images,
                        flow=None,
                        mask=None,
                        min_scale=1.0,
                        max_scale=1.0):
    """Performs a random scaling on the second image in the given range."""
    # choose a random scale factor and compute new resolution
    orig_height = tf.shape(images)[-3]
    orig_width = tf.shape(images)[-2]
    scale = tf.random.uniform([],
                              minval=min_scale,
                              maxval=max_scale,
                              dtype=tf.float32)
    new_height = tf.cast(
        tf.math.ceil(tf.cast(orig_height, tf.float32) * scale), tf.int32)
    new_width = tf.cast(tf.math.ceil(tf.cast(orig_width, tf.float32) * scale),
                        tf.int32)

    # rescale only the second image
    image_1, image_2 = tf.unstack(images)
    image_2 = uflow_utils.resize(image_2, new_height, new_width, is_flow=False)
    # crop either first or second image to have matching dimensions
    if scale < 1.0:
        image_1 = _center_crop(image_1, new_height, new_width)
    else:
        image_2 = _center_crop(image_2, orig_height, orig_width)
    images = tf.stack([image_1, image_2])

    if flow is not None:
        # get current locations (with the origin in the image center)
        positions = _positions_center_origin(orig_height, orig_width)

        # compute scale factor of the actual new image resolution
        scale_flow_h = tf.cast(new_height, tf.float32) / tf.cast(
            orig_height, tf.float32)
        scale_flow_w = tf.cast(new_width, tf.float32) / tf.cast(
            orig_width, tf.float32)
        scale_flow = tf.stack([scale_flow_h, scale_flow_w])

        # compute augmented flow (multiply by mask to zero invalid flow locations)
        flow = ((positions + flow) * scale_flow - positions) * mask

        if scale < 1.0:
            # in case we downsample the image we crop the reference image to keep the
            # same shape
            flow = _center_crop(flow, new_height, new_width)
            mask = _center_crop(mask, new_height, new_width)
    return images, flow, mask
    def transform(images, i_or_ij, is_flow, crop_height, crop_width,
                  shift_heights, shift_widths, resize):
        # Expect (i, j) for flows and masks and i for images.
        if isinstance(i_or_ij, int):
            i = i_or_ij
            # Flow needs i and j.
            assert not is_flow
        else:
            i, j = i_or_ij

        if is_flow:
            shifts = tf.stack([shift_heights, shift_widths], axis=-1)
            flow_offset = shifts[i] - shifts[j]
            images = images + tf.cast(flow_offset, tf.float32)

        shift_height = shift_heights[i]
        shift_width = shift_widths[i]
        height = images.shape[-3]
        width = images.shape[-2]

        # Assert that the cropped bounding box does not go out of the image frame.
        op1 = tf.compat.v1.assert_greater_equal(crop_height + shift_height, 0)
        op2 = tf.compat.v1.assert_greater_equal(crop_width + shift_width, 0)
        op3 = tf.compat.v1.assert_less_equal(
            height - crop_height + shift_height, height)
        op4 = tf.compat.v1.assert_less_equal(width - crop_width + shift_width,
                                             width)
        op5 = tf.compat.v1.assert_greater(
            height,
            2 * crop_height,
            message='Image height is too small for cropping.')
        op6 = tf.compat.v1.assert_greater(
            width,
            2 * crop_width,
            message='Image width is too small for cropping.')
        with tf.control_dependencies([op1, op2, op3, op4, op5, op6]):
            images = images[:, crop_height + shift_height:height -
                            crop_height + shift_height, crop_width +
                            shift_width:width - crop_width + shift_width, :]
        if resize:
            images = uflow_utils.resize(images, height, width, is_flow=is_flow)
            images.set_shape((images.shape[0], height, width, images.shape[3]))
        else:
            images.set_shape((images.shape[0], height - 2 * crop_height,
                              width - 2 * crop_width, images.shape[3]))
        return images
def parse_data(proto, height, width):
    """Parse features from byte-encoding to the correct type and shape.

  Args:
    proto: Encoded data in proto / tf-sequence-example format.
    height: int, desired image height.
    width: int, desired image width.

  Returns:
    A sequence of images as tf.Tensor of shape
    [sequence length, height, width, 3].
  """

    # Parse context and image sequence from protobuffer.
    unused_context_parsed, sequence_parsed = tf.io.parse_single_sequence_example(
        proto,
        context_features={
            'height': tf.io.FixedLenFeature([], tf.int64),
            'width': tf.io.FixedLenFeature([], tf.int64)
        },
        sequence_features={
            'images': tf.io.FixedLenSequenceFeature([], tf.string)
        })

    # Deserialize images to float32 tensors.
    def deserialize(image_raw):
        image_uint = tf.image.decode_png(image_raw)
        image_float = tf.image.convert_image_dtype(image_uint, tf.float32)
        return image_float

    images = tf.map_fn(deserialize,
                       sequence_parsed['images'],
                       dtype=tf.float32)

    # Resize images.
    images = uflow_utils.resize(images, height, width, is_flow=False)

    return images
def parse_data(proto,
               include_flow,
               height=None,
               width=None,
               include_occlusion=False,
               include_invalid=False,
               resize_gt_flow=True,
               gt_flow_shape=None):
    """Parse a data proto with flow.

  Args:
    proto: path to data proto file
    include_flow: bool, whether or not to include flow in the output
    height: int or None height to resize image to
    width: int or None width to resize image to
    include_occlusion: bool, whether or not to also return occluded pixels (will
      throw error if occluded pixels are not present)
    include_invalid: bool, whether or not to also return invalid pixels (will
      throw error if invalid pixels are not present)
    resize_gt_flow: bool, wether or not to resize flow ground truth as the image
    gt_flow_shape: list, shape of the original ground truth flow (only required
      to set a fixed ground truth flow shape for tensorflow estimator in case of
      supervised training at full resolution resize_gt_flow=False)

  Returns:
    images, flow: A tuple of (image1, image2), flow
  """

    # Parse context and image sequence from protobuffer.
    context_features = {
        'height': tf.io.FixedLenFeature([], tf.int64),
        'width': tf.io.FixedLenFeature([], tf.int64),
    }
    sequence_features = {
        'images': tf.io.FixedLenSequenceFeature([], tf.string),
    }

    if include_invalid:
        sequence_features['invalid_masks'] = tf.io.FixedLenSequenceFeature(
            [], tf.string)

    if include_flow:
        context_features['flow_uv'] = tf.io.FixedLenFeature([], tf.string)

    if include_occlusion:
        context_features['occlusion_mask'] = tf.io.FixedLenFeature([],
                                                                   tf.string)

    context_parsed, sequence_parsed = tf.io.parse_single_sequence_example(
        proto,
        context_features=context_features,
        sequence_features=sequence_features,
    )

    def deserialize(s, dtype, dims):
        return tf.reshape(
            tf.io.decode_raw(s, dtype),
            [context_parsed['height'], context_parsed['width'], dims])

    images = tf.map_fn(lambda s: deserialize(s, tf.uint8, 3),
                       sequence_parsed['images'],
                       dtype=tf.uint8)

    images = tf.image.convert_image_dtype(images, tf.float32)
    if height is not None and width is not None:
        images = uflow_utils.resize(images, height, width, is_flow=False)
    output = [images]

    if include_flow:
        flow_uv = deserialize(context_parsed['flow_uv'], tf.float32, 2)
        flow_uv = flow_uv[Ellipsis, ::-1]
        if height is not None and width is not None and resize_gt_flow:
            flow_uv = uflow_utils.resize(flow_uv, height, width, is_flow=True)
        else:
            if gt_flow_shape is not None:
                flow_uv.set_shape(gt_flow_shape)
        # To be consistent with uflow internals, we flip the ordering of flow.
        output.append(flow_uv)
        # create valid mask
        flow_valid = tf.ones_like(flow_uv[Ellipsis, :1], dtype=tf.float32)
        output.append(flow_valid)

    if include_occlusion:
        occlusion_mask = deserialize(context_parsed['occlusion_mask'],
                                     tf.uint8, 1)
        if height is not None and width is not None:
            occlusion_mask = uflow_utils.resize(occlusion_mask,
                                                height,
                                                width,
                                                is_flow=False)
        output.append(occlusion_mask)

    if include_invalid:
        invalid_masks = tf.map_fn(lambda s: deserialize(s, tf.uint8, 1),
                                  sequence_parsed['invalid_masks'],
                                  dtype=tf.uint8)
        if height is not None and width is not None:
            invalid_masks = uflow_utils.resize(invalid_masks,
                                               height,
                                               width,
                                               is_flow=False)
        output.append(invalid_masks)

    # Only put the output in a list if there are more than one items in there.
    if len(output) == 1:
        output = output[0]

    return output
def parse_supervised_train_data(proto, height, width, resize_gt_flow):
  """Parse proto from byte-encoding to the correct type and shape.

  Args:
    proto: Encoded data in proto / tf-sequence-example format.
    height: int, desired image height.
    width: int, desired image width.
    resize_gt_flow: bool, wether or not to resize flow according to the images

  Returns:
    A tuple of tf.Tensors for images, flow_uv, flow_valid, where uv represents
    the flow field and valid a mask for which entries are valid (this uses the
    occ version that includes all flow vectors). The images and the
    corresponding flow field are resized to the specified [height, width].
  """
  images, flow_uv_occ, _, flow_valid_occ, _ = parse_eval_data(proto)
  flow_valid_occ = tf.cast(flow_valid_occ, tf.float32)

  if not resize_gt_flow or height is None or width is None:
    # Crop to a size that fits all KITTI 2015 image resolutions. Because the
    # first 156 sequences have a resolution of 375x1242,the remaining 44
    # sequences include resolutions of 370x1224, 374x1238, and 376x1241.
    _, orig_height, orig_width, _ = tf.unstack(tf.shape(images))
    offset_height = tf.cast((orig_height - 370) / 2, tf.int32)
    offset_width = tf.cast((orig_width - 1224) / 2, tf.int32)
    images = tf.image.crop_to_bounding_box(
        images,
        offset_height=offset_height,
        offset_width=offset_width,
        target_height=370,
        target_width=1224)
    flow_uv_occ = tf.image.crop_to_bounding_box(
        flow_uv_occ,
        offset_height=offset_height,
        offset_width=offset_width,
        target_height=370,
        target_width=1224)
    flow_valid_occ = tf.image.crop_to_bounding_box(
        flow_valid_occ,
        offset_height=offset_height,
        offset_width=offset_width,
        target_height=370,
        target_width=1224)

  # resize images
  if height is not None and width is not None:
    images = uflow_utils.resize(images, height, width, is_flow=False)

  if resize_gt_flow and height is not None and width is not None:
    # resize flow and swap label order
    flow_uv, flow_valid = uflow_utils.resize(
        flow_uv_occ[Ellipsis, ::-1],
        height,
        width,
        is_flow=True,
        mask=flow_valid_occ)
  else:
    # only swap label order
    flow_uv = flow_uv_occ[Ellipsis, ::-1]
    flow_valid = flow_valid_occ
    # set shape to work with tf estimator
    flow_uv.set_shape([370, 1224, 2])
    flow_valid.set_shape([370, 1224, 1])

  return images, flow_uv, flow_valid
Exemple #8
0
def random_crop(images, flow=None, mask=None, crop_height=None, crop_width=None,
                relative_offset=0):
  """Performs a random crop with the given height and width."""
  # early return if crop_height or crop_width is not specified
  if crop_height is None or crop_width is None:
    return images, flow, mask

  orig_height = tf.shape(images)[-3]
  orig_width = tf.shape(images)[-2]

  # check if crop size fits the image size
  scale = 1.0
  ratio = tf.cast(crop_height, tf.float32) / tf.cast(orig_height, tf.float32)
  scale = tf.math.maximum(scale, ratio)
  ratio = tf.cast(crop_width, tf.float32) / tf.cast(orig_width, tf.float32)
  scale = tf.math.maximum(scale, ratio)
  # compute minimum required hight
  new_height = tf.cast(
      tf.math.ceil(tf.cast(orig_height, tf.float32) * scale), tf.int32)
  new_width = tf.cast(
      tf.math.ceil(tf.cast(orig_width, tf.float32) * scale), tf.int32)
  # perform resize (scales with 1 if not required)
  images = uflow_utils.resize(images, new_height, new_width, is_flow=False)

  # compute joint offset
  max_offset_h = new_height - tf.cast(crop_height, dtype=tf.int32)
  max_offset_w = new_width - tf.cast(crop_width, dtype=tf.int32)
  joint_offset_h = tf.random.uniform([], maxval=max_offset_h+1, dtype=tf.int32)
  joint_offset_w = tf.random.uniform([], maxval=max_offset_w+1, dtype=tf.int32)

  # compute relative offset
  min_relative_offset_h = tf.math.maximum(
      joint_offset_h - relative_offset, 0)
  max_relative_offset_h = tf.math.minimum(
      joint_offset_h + relative_offset, max_offset_h)
  min_relative_offset_w = tf.math.maximum(
      joint_offset_w - relative_offset, 0)
  max_relative_offset_w = tf.math.minimum(
      joint_offset_w + relative_offset, max_offset_w)
  relative_offset_h = tf.random.uniform(
      [], minval=min_relative_offset_h, maxval=max_relative_offset_h+1,
      dtype=tf.int32)
  relative_offset_w = tf.random.uniform(
      [], minval=min_relative_offset_w, maxval=max_relative_offset_w+1,
      dtype=tf.int32)

  # crop both images
  image_1, image_2 = tf.unstack(images)
  image_1 = tf.image.crop_to_bounding_box(
      image_1, offset_height=joint_offset_h, offset_width=joint_offset_w,
      target_height=crop_height, target_width=crop_width)
  image_2 = tf.image.crop_to_bounding_box(
      image_2, offset_height=relative_offset_h, offset_width=relative_offset_w,
      target_height=crop_height, target_width=crop_width)
  images = tf.stack([image_1, image_2])

  if flow is not None:
    # perform resize (scales with 1 if not required)
    flow, mask = uflow_utils.resize(
        flow, new_height, new_width, is_flow=True, mask=mask)

    # crop flow and mask
    flow = tf.image.crop_to_bounding_box(
        flow,
        offset_height=joint_offset_h,
        offset_width=joint_offset_w,
        target_height=crop_height,
        target_width=crop_width)
    mask = tf.image.crop_to_bounding_box(
        mask,
        offset_height=joint_offset_h,
        offset_width=joint_offset_w,
        target_height=crop_height,
        target_width=crop_width)

    # correct flow for relative shift (/crop)
    flow_delta = tf.stack(
        [tf.cast(relative_offset_h - joint_offset_h, tf.float32),
         tf.cast(relative_offset_w - joint_offset_w, tf.float32)])
    flow = (flow - flow_delta) * mask
  return images, flow, mask
Exemple #9
0
    def batch_infer_no_tf_function(self,
                                   images,
                                   input_height=None,
                                   input_width=None,
                                   resize_flow_to_img_res=True,
                                   infer_occlusion=False):
        """Infers flow from two images.

    Args:
      images: tf.tensor of shape [batchsize, 2, height, width, 3].
      input_height: height at which the model should be applied if different
        from image height.
      input_width: width at which the model should be applied if different from
        image width
      resize_flow_to_img_res: bool, if True, return the flow resized to the same
        resolution as (image1, image2). If False, return flow at the whatever
        resolution the model natively predicts it.
      infer_occlusion: bool, if True, return both flow and a soft occlusion
        mask, else return just flow.

    Returns:
      Optical flow for each pixel in image1 pointing to image2.
    """

        batch_size, seq_len, orig_height, orig_width, image_channels = images.shape.as_list(
        )

        if input_height is None:
            input_height = orig_height
        if input_width is None:
            input_width = orig_width

        # Ensure a feasible computation resolution. If specified size is not
        # feasible with the model, change it to a slightly higher resolution.
        divisible_by_num = pow(2.0, self._num_levels)
        if (input_height % divisible_by_num != 0
                or input_width % divisible_by_num != 0):
            print('Cannot process images at a resolution of ' +
                  str(input_height) + 'x' + str(input_width) +
                  ', since the height and/or width is not a '
                  'multiple of ' + str(divisible_by_num) + '.')
            # compute a feasible resolution
            input_height = int(
                math.ceil(float(input_height) / divisible_by_num) *
                divisible_by_num)
            input_width = int(
                math.ceil(float(input_width) / divisible_by_num) *
                divisible_by_num)
            print('Inference will be run at a resolution of ' +
                  str(input_height) + 'x' + str(input_width) + '.')

        # Resize images to desired input height and width.
        if input_height != orig_height or input_width != orig_width:
            images = uflow_utils.resize(images,
                                        input_height,
                                        input_width,
                                        is_flow=False)

        # Flatten images by folding sequence length into the batch dimension, apply
        # the feature network and undo the flattening.
        images_flattened = tf.reshape(
            images,
            [batch_size * seq_len, input_height, input_width, image_channels])
        # noinspection PyCallingNonCallable
        features_flattened = self._feature_model(
            images_flattened, split_features_by_sample=False)
        features = [
            tf.reshape(f, [batch_size, seq_len] + f.shape.as_list()[1:])
            for f in features_flattened
        ]

        features1, features2 = [[f[:, i] for f in features] for i in range(2)]

        # Compute flow in frame of image1.
        # noinspection PyCallingNonCallable
        flow = self._flow_model(features1, features2, training=False)[0]

        if infer_occlusion:
            # noinspection PyCallingNonCallable
            flow_backward = self._flow_model(features2,
                                             features1,
                                             training=False)[0]
            occlusion_mask = self.infer_occlusion(flow, flow_backward)
            occlusion_mask = uflow_utils.resize(occlusion_mask,
                                                orig_height,
                                                orig_width,
                                                is_flow=False)

        # Resize and rescale flow to original resolution. This always needs to be
        # done because flow is generated at a lower resolution.
        if resize_flow_to_img_res:
            flow = uflow_utils.resize(flow,
                                      orig_height,
                                      orig_width,
                                      is_flow=True)

        if infer_occlusion:
            return flow, occlusion_mask

        return flow