Esempio n. 1
0
 def compute_max_spans(points, point_masks):
   """Computes maximum point set spans in any direction."""
   num_points = tf.shape(points)[-2]
   point_masks = data_utils.tile_last_dims(
       tf.expand_dims(tf.expand_dims(point_masks, axis=-1), axis=-1),
       last_dim_multiples=[num_points, point_dim])
   diffs = tf.math.abs(
       data_utils.tile_last_dims(
           tf.expand_dims(points, axis=-2), last_dim_multiples=[num_points, 1])
       - data_utils.tile_last_dims(
           tf.expand_dims(points, axis=-3),
           last_dim_multiples=[num_points, 1, 1]))
   diffs = tf.where(point_masks, diffs, tf.zeros_like(diffs))
   max_spans = tf.squeeze(
       tf.math.reduce_max(diffs, axis=[-3, -2, -1], keepdims=True), axis=[-2])
   return max_spans
Esempio n. 2
0
def compute_procrustes_alignment_params(target_points, source_points):
    """Computes procrustes alignment parameters.

  Args:
    target_points: A tensor for target points. Shape = [..., num_points,
      point_dim].
    source_points: A tensor for source points. Shape = [..., num_points,
      point_dim].

  Returns:
    rotations: A tensor for rotations. Shape = [..., point_dim, point_dim].
    scales: A tensor for scales. Shape = [..., 1, 1].
    translations: A tensor for translations. Shape = [..., 1, point_dim].
  """
    # standardized_target_points: Shape = [..., num_points, point_dim].
    # target_offsets: Shape = [..., 1, point_dim].
    # target_scales: Shape = [..., 1, 1].
    standardized_target_points, target_offsets, target_scales = (
        standardize_points(target_points))
    # standardized_source_points: Shape = [..., num_points, point_dim].
    # source_offsets: Shape = [..., 1, point_dim].
    # source_scales: Shape = [..., 1, point_dim].
    standardized_source_points, source_offsets, source_scales = (
        standardize_points(source_points))
    # Shape = [..., point_dim, point_dim].
    a = tf.linalg.matmul(standardized_target_points,
                         standardized_source_points,
                         transpose_a=True)
    # s: Shape = [..., point_dim].
    # u: Shape = [..., point_dim, point_dim].
    # v: Shape = [..., point_dim, point_dim].
    s, u, v = tf.linalg.svd(a)
    # Shape = [..., point_dim, point_dim].
    r = tf.linalg.matmul(v, u, transpose_b=True)
    # Shape = [...].
    det_r = tf.linalg.det(r)
    # Shape = [...].
    signs = tf.math.sign(det_r)
    # Shape = [..., 1].
    signs = tf.expand_dims(signs, axis=-1)
    # Shape = [..., point_dim - 1].
    point_dim = target_points.shape.as_list()[-1]
    ones = data_utils.tile_last_dims(tf.ones_like(signs),
                                     last_dim_multiples=[point_dim - 1])
    # Shape = [..., point_dim].
    signs = tf.concat([ones, signs], axis=-1)
    s *= signs
    # Shape = [..., 1, point_dim].
    signs = tf.expand_dims(signs, axis=-2)
    v *= signs
    # Shape = [..., point_dim, point_dim].
    rotations = tf.linalg.matmul(v, u, transpose_b=True)
    # Shape = [..., 1, 1].
    scales = (tf.expand_dims(tf.math.reduce_sum(s, axis=-1, keepdims=True),
                             axis=-1) * target_scales / source_scales)
    # Shape = [..., 1, point_dim].
    translations = target_offsets - scales * tf.linalg.matmul(
        source_offsets, rotations)
    return rotations, scales, translations
def naive_normalize_points(points, point_masks):
    """Naively normalizes points by shifting and scaling.

  Args:
    points: A tensor for points. Shape = [..., num_points, point_dim].
    point_masks: A tensor for point validities. Shape = [..., num_points].

  Returns:
    points: A tensor for normalized points. Shape = [..., num_points,
      point_dim].
  """
    point_masks = tf.cast(point_masks, dtype=tf.bool)
    point_dim = tf.shape(points)[-1]

    def compute_centers(points, point_masks):
        """Computes centers of valid points."""
        valid_points = tf.boolean_mask(points, point_masks)
        return tf.math.reduce_mean(valid_points, axis=-2, keepdims=True)

    def compute_max_spans(points, point_masks):
        """Computes maximum point set spans in any direction."""
        num_points = tf.shape(points)[-2]
        point_masks = data_utils.tile_last_dims(
            tf.expand_dims(tf.expand_dims(point_masks, axis=-1), axis=-1),
            last_dim_multiples=[num_points, point_dim])
        diffs = tf.math.abs(
            data_utils.tile_last_dims(tf.expand_dims(points, axis=-2),
                                      last_dim_multiples=[num_points, 1]) -
            data_utils.tile_last_dims(tf.expand_dims(points, axis=-3),
                                      last_dim_multiples=[num_points, 1, 1]))
        diffs = tf.where(point_masks, diffs, tf.zeros_like(diffs))
        max_spans = tf.squeeze(tf.math.reduce_max(diffs,
                                                  axis=[-3, -2, -1],
                                                  keepdims=True),
                               axis=[-2])
        return max_spans

    centers = compute_centers(points, point_masks)
    max_spans = compute_max_spans(points, point_masks)
    points = (points - centers) / tf.math.maximum(1e-12, max_spans)
    points = tf.where(
        data_utils.tile_last_dims(tf.expand_dims(point_masks, axis=-1),
                                  last_dim_multiples=[point_dim]), points,
        tf.zeros_like(points))
    return points
def override_points(points, from_indices_list, to_indices):
    """Overrides points with other points.

  Points at `to_indices` will be overridden with centers of points from
  `from_indices_list`.

  For example:

    from_indices_list = [[0, 1], [2]]
    to_indices = [3, 4]
    updated_points = override_points(from_indices_list, to_indices)

  Will result in:
    updated_points[..., 3, :] ==
      ((points[..., 0, :] + points[..., 1, :]) / 2 + points[..., 2, :]) / 2
    updated_points[..., 4, :] ==
      ((points[..., 0, :] + points[..., 1, :]) / 2 + points[..., 2, :]) / 2

  Args:
    points: A tensor for points to override. Shape = [..., num_points,
      point_dim].
    from_indices_list: A list of integer lists for point indices to compute
      overriding points.
    to_indices: A list of integers for point indices to be overridden.

  Returns:
    A tensor for updated points.
  """
    overriding_points = [
        get_points(points, from_indices) for from_indices in from_indices_list
    ]
    overriding_points = tf.concat(overriding_points, axis=-2)
    overriding_points = tf.math.reduce_mean(overriding_points,
                                            axis=-2,
                                            keepdims=True)
    overriding_points = data_utils.tile_last_dims(
        overriding_points, last_dim_multiples=[len(to_indices), 1])
    return data_utils.update_sub_tensor(
        points,
        indices=to_indices,
        axis=-2,
        update_func=lambda _: overriding_points)
Esempio n. 5
0
def process_decoded_image_sizes(decoded_tensors,
                                sequence_length,
                                common_module=common):
    """Processes decoded image sizes.

  Args:
    decoded_tensors: A dictionary for decoded tensors.
    sequence_length: An integer for input sequence length.
    common_module: A Python module that defines common constants.

  Returns:
    A dictionary for processed 2D keypoint tensors.
  """
    image_heights = decoded_tensors[common_module.TFSE_KEY_IMAGE_HEIGHT]
    image_widths = decoded_tensors[common_module.TFSE_KEY_IMAGE_WIDTH]
    image_sizes = tf.stack([image_heights, image_widths], axis=-1)
    image_sizes = data_utils.tile_last_dims(
        tf.expand_dims(image_sizes, axis=-2),
        last_dim_multiples=[sequence_length, 1])
    return {
        common_module.KEY_IMAGE_SIZES: image_sizes,
    }
def create_model_input(keypoints_2d,
                       keypoint_masks_2d,
                       keypoints_3d,
                       model_input_keypoint_type,
                       model_input_keypoint_mask_type=(
                           common.MODEL_INPUT_KEYPOINT_MASK_TYPE_NO_USE),
                       normalize_keypoints_2d=True,
                       keypoint_profile_2d=None,
                       uniform_keypoint_jittering_max_offset_2d=0.0,
                       gaussian_keypoint_jittering_offset_stddev_2d=0.0,
                       keypoint_dropout_probs=(0.0, 0.0),
                       structured_keypoint_mask_processor=None,
                       set_on_mask_for_non_anchors=False,
                       mix_mask_sub_batches=False,
                       rescale_features=False,
                       forced_mask_on_part_names=None,
                       forced_mask_off_part_names=None,
                       keypoint_profile_3d=None,
                       azimuth_range=(-math.pi, math.pi),
                       elevation_range=(-math.pi / 6.0, math.pi / 6.0),
                       roll_range=(-math.pi / 6.0, math.pi / 6.0),
                       normalized_camera_depth_range=(),
                       sequential_inputs=False,
                       seed=None):
    """Creates model input features from input data.

  Args:
    keypoints_2d: A tensor for input 2D keyopints. Shape = [..., num_keypoints,
      2]. Use None if irrelevant.
    keypoint_masks_2d: A tensor for input 2D keypoint masks. Shape = [...,
      num_keypoints]. Use None if irrelevant.
    keypoints_3d: A tensor for input 3D keyopints. Shape = [..., num_keypoints,
      3]. Use None if irrelevant.
    model_input_keypoint_type: An enum string for model input type. See
      `MODEL_INPUT_TYPE_*` for supported values.
    model_input_keypoint_mask_type: An enum string for model input keypoint mask
      type. See `MODEL_INPUT_KEYPOINT_MASK_TYPE_*` for supported values.
    normalize_keypoints_2d: A boolean for whether to normalize 2D keypoints at
      the end.
    keypoint_profile_2d: A KeypointProfile2D object for input 2D keypoints.
      Required for normalizing 2D keypoints, 3D-to-2D projection, or forcing
      masks on/off.
    uniform_keypoint_jittering_max_offset_2d: A float for maximum 2D keypoint
      jittering offset. Random jittering offset within
      [-uniform_keypoint_jittering_max_offset_2d,
      uniform_keypoint_jittering_max_offset_2d] is to be added to each keypoint
      2D. Note that the jittering happens after the 2D normalization. Ignored if
      non-positive.
    gaussian_keypoint_jittering_offset_stddev_2d: A float for standard deviation
      of Gaussian 2D keypoint jittering offset. Random jittering offset sampled
      from N(0, gaussian_keypoint_jittering_offset_stddev_2d) is to be added to
      each keypoint. Note that the jittering happens after the 2D normalization.
      Ignored if non-positive.
    keypoint_dropout_probs: A tuple of floats for the keypoint random dropout
      probabilities in the format (probability_to_apply, probability_to_drop).
      We perform stratified dropout as first select instances with
      `probability_to_apply` and then drop their keypoints with
      `probability_to_drop`. When sequential_input is True, there might be a
      third element indicating the probability of using sequence-level dropout.
      Only used when keypoint scores are relevant.
    structured_keypoint_mask_processor: A Python function for generating
      keypoint masks with structured dropout. Ignored if None.
    set_on_mask_for_non_anchors: A boolean for whether to always use on (1)
      masks for non-anchor samples. We assume the second from the left tensor
      dimension is for anchor/non-anchor, and the non-anchor samples start at
      the second element along that dimension.
    mix_mask_sub_batches: A boolean for whether to apply sub-batch mixing to
      processed masks and all-one masks.
    rescale_features: A boolean for whether to rescale features by the ratio
      between total number of mask elements and kept mask elements.
    forced_mask_on_part_names: A list of standard names of parts of which the
      masks are forced on (by setting value to 1.0). See
      `KeypointProfile.get_standard_part_index` for standard part names.
    forced_mask_off_part_names: A list of standard names of parts of which the
      masks are forced off (by setting value to 0.0). See
      `KeypointProfile.get_standard_part_index` for standard part names.
    keypoint_profile_3d: A KeypointProfile3D object for input 3D keypoints. Only
      used when 3D-to-2D projection is involved.
    azimuth_range: A tuple for minimum and maximum azimuth angles to randomly
      rotate 3D keypoints with. For non-sequential inputs, a 2-tuple for
      (minimum angle, maximum angle) is expected. For sequence inputs, uses
      2-tuple to independently sample starting and ending camera angles, or uses
      4-tuple for (minimum starting angle, maximum starting angle, minimum angle
      increment, maximum angle increment) to first sample starting angles and
      add random delta angles to them as ending angles.
    elevation_range: A tuple for minimum and maximum elevation angles to
      randomly rotate 3D keypoints with. For non-sequential inputs, a 2-tuple
      for (minimum angle, maximum angle) is expected. For sequence inputs, uses
      2-tuple to independently sample starting and ending camera angles, or uses
      4-tuple for (minimum starting angle, maximum starting angle, minimum angle
      increment, maximum angle increment) to first sample starting angles and
      add random delta angles to them as ending angles.
    roll_range: A tuple for minimum and maximum roll angles to randomly rotate
      3D keypoints with. For non-sequential inputs, a 2-tuple for (minimum
      angle, maximum angle) is expected. For sequence inputs, uses 2-tuple to
      independently sample starting and ending camera angles, or uses 4-tuple
      for (minimum starting angle, maximum starting angle, minimum angle
      increment, maximum angle increment) to first sample starting angles and
      add random delta angles to them as ending angles.
    normalized_camera_depth_range: A tuple for minimum and maximum normalized
      camera depth for random camera augmentation. If empty, uses constant depth
      as 1 over the 2D pose normalization scale unit.
    sequential_inputs: A boolean flag indicating whether the inputs are
      sequential. If True, the input keypoints are supposed to be in shape [...,
      sequence_length, num_keypoints, keypoint_dim].
    seed: An integer for random seed.

  Returns:
    features: A tensor for input features. Shape = [..., feature_dim].
    side_outputs: A dictionary for side outputs, which includes
      `offset_points_2d` (shape = [..., 1, 2]) and `scale_distances_2d` (shape =
      [..., 1, 1]) if `normalize_keypoints_2d` is True.

  Raises:
    ValueError: If `model_input_keypoint_type` is not supported.
    ValueError: If `keypoint_dropout_probs` is not of length 2 or 3.
    ValueError: If `keypoint_profile_2d` is not specified when normalizing 2D
      keypoints.
    ValueError: If keypoint profile name is not 'LEGACY_2DCOCO13', '2DSTD13',
      or 'INTERNAL_2DSTD13' when applying structured keypoint dropout.
    ValueError: If number of instances is not 1 or 2.
    ValueError: If `keypoint_profile_2d` is not specified when forcing keypoint
      masks on.
  """
    keypoints_2d, keypoint_masks_2d = preprocess_keypoints_2d(
        keypoints_2d,
        keypoint_masks_2d,
        keypoints_3d,
        model_input_keypoint_type,
        keypoint_profile_2d=keypoint_profile_2d,
        keypoint_profile_3d=keypoint_profile_3d,
        azimuth_range=azimuth_range,
        elevation_range=elevation_range,
        roll_range=roll_range,
        normalized_camera_depth_range=normalized_camera_depth_range,
        sequential_inputs=sequential_inputs,
        seed=seed)

    side_outputs = {}

    if len(keypoint_dropout_probs) not in [2, 3]:
        raise ValueError('Invalid keypoint dropout probability tuple: `%s`.' %
                         str(keypoint_dropout_probs))

    if keypoint_dropout_probs[0] > 0.0 and keypoint_dropout_probs[1] > 0.0:
        instance_keypoint_masks_2d = apply_stratified_instance_keypoint_dropout(
            keypoint_masks_2d,
            probability_to_apply=keypoint_dropout_probs[0],
            probability_to_drop=keypoint_dropout_probs[1],
            seed=seed)

        if (sequential_inputs and len(keypoint_dropout_probs) == 3
                and keypoint_dropout_probs[2] > 0.0):
            sequence_keypoint_masks_2d = apply_stratified_sequence_keypoint_dropout(
                keypoint_masks_2d,
                probability_to_apply=keypoint_dropout_probs[0],
                probability_to_drop=keypoint_dropout_probs[1],
                seed=seed)
            sequence_axis = sequence_keypoint_masks_2d.shape.ndims - 1
            keypoint_masks_2d = data_utils.mix_batch(
                [sequence_keypoint_masks_2d], [instance_keypoint_masks_2d],
                axis=sequence_axis,
                keep_lhs_prob=keypoint_dropout_probs[2],
                seed=seed)[0]
        else:
            keypoint_masks_2d = instance_keypoint_masks_2d

    if structured_keypoint_mask_processor is not None:
        keypoint_masks_2d = structured_keypoint_mask_processor(
            keypoint_masks=keypoint_masks_2d,
            keypoint_profile=keypoint_profile_2d,
            seed=seed)

    if normalize_keypoints_2d:
        if keypoint_profile_2d is None:
            raise ValueError(
                'Failed to normalize 2D keypoints due to unspecified '
                'keypoint profile.')
        keypoints_2d, offset_points, scale_distances = (
            keypoint_profile_2d.normalize(keypoints_2d, keypoint_masks_2d))
        side_outputs.update({
            common.KEY_OFFSET_POINTS_2D: offset_points,
            common.KEY_SCALE_DISTANCES_2D: scale_distances
        })

    if uniform_keypoint_jittering_max_offset_2d > 0.0:
        keypoints_2d = _add_uniform_keypoint_jittering(
            keypoints_2d,
            max_jittering_offset=uniform_keypoint_jittering_max_offset_2d,
            seed=seed)

    if gaussian_keypoint_jittering_offset_stddev_2d > 0.0:
        keypoints_2d = _add_gaussian_keypoint_jittering(
            keypoints_2d,
            jittering_offset_stddev=
            gaussian_keypoint_jittering_offset_stddev_2d,
            seed=seed)

    if set_on_mask_for_non_anchors:
        non_anchor_indices = list(
            range(1,
                  keypoint_masks_2d.shape.as_list()[1]))
        if non_anchor_indices:
            keypoint_masks_2d = data_utils.update_sub_tensor(
                keypoint_masks_2d,
                indices=non_anchor_indices,
                axis=1,
                update_func=tf.ones_like)

    if mix_mask_sub_batches:
        keypoint_masks_2d = data_utils.mix_batch(
            [tf.ones_like(keypoint_masks_2d)], [keypoint_masks_2d], axis=1)[0]

    if forced_mask_on_part_names:
        keypoint_masks_2d = _override_keypoint_masks(
            keypoint_masks_2d,
            keypoint_profile=keypoint_profile_2d,
            part_names=forced_mask_on_part_names,
            overriding_func=tf.ones_like)

    if forced_mask_off_part_names:
        keypoint_masks_2d = _override_keypoint_masks(
            keypoint_masks_2d,
            keypoint_profile=keypoint_profile_2d,
            part_names=forced_mask_off_part_names,
            overriding_func=tf.zeros_like)

    if model_input_keypoint_mask_type in [
            common.MODEL_INPUT_KEYPOINT_MASK_TYPE_MASK_KEYPOINTS,
            common.MODEL_INPUT_KEYPOINT_MASK_TYPE_MASK_KEYPOINTS_AND_AS_INPUT
    ]:
        # Mask out invalid keypoints.
        keypoints_2d = tf.where(
            data_utils.tile_last_dims(
                tf.expand_dims(tf.math.equal(keypoint_masks_2d, 1.0), axis=-1),
                last_dim_multiples=[tf.shape(keypoints_2d)[-1]]), keypoints_2d,
            tf.zeros_like(keypoints_2d))

    side_outputs.update({
        common.KEY_PREPROCESSED_KEYPOINTS_2D:
        keypoints_2d,
        common.KEY_PREPROCESSED_KEYPOINT_MASKS_2D:
        keypoint_masks_2d,
    })

    features = keypoints_2d
    if model_input_keypoint_mask_type in [
            common.MODEL_INPUT_KEYPOINT_MASK_TYPE_AS_INPUT,
            common.MODEL_INPUT_KEYPOINT_MASK_TYPE_MASK_KEYPOINTS_AND_AS_INPUT
    ]:
        features = tf.concat(
            [keypoints_2d,
             tf.expand_dims(keypoint_masks_2d, axis=-1)],
            axis=-1)

    if rescale_features:
        # Scale up features to compensate for any keypoint masking.
        feature_rescales = keypoint_masks_2d.shape.as_list()[-1] / (
            tf.math.maximum(
                1e-12,
                tf.math.reduce_sum(keypoint_masks_2d, axis=-1, keepdims=True)))
        features *= tf.expand_dims(feature_rescales, axis=-1)

    features = data_utils.flatten_last_dims(features, num_last_dims=2)
    return features, side_outputs
Esempio n. 7
0
 def test_tile_last_dims(self):
     # Shape = [2, 1, 2, 1].
     x = tf.constant([[[[1], [2]]], [[[3], [4]]]])
     tiled_x = data_utils.tile_last_dims(x, last_dim_multiples=[2, 2])
     self.assertAllEqual(tiled_x, [[[[1, 1], [2, 2], [1, 1], [2, 2]]],
                                   [[[3, 3], [4, 4], [3, 3], [4, 4]]]])