def compute_max_spans(points, point_masks): """Computes maximum point set spans in any direction.""" num_points = tf.shape(points)[-2] point_masks = data_utils.tile_last_dims( tf.expand_dims(tf.expand_dims(point_masks, axis=-1), axis=-1), last_dim_multiples=[num_points, point_dim]) diffs = tf.math.abs( data_utils.tile_last_dims( tf.expand_dims(points, axis=-2), last_dim_multiples=[num_points, 1]) - data_utils.tile_last_dims( tf.expand_dims(points, axis=-3), last_dim_multiples=[num_points, 1, 1])) diffs = tf.where(point_masks, diffs, tf.zeros_like(diffs)) max_spans = tf.squeeze( tf.math.reduce_max(diffs, axis=[-3, -2, -1], keepdims=True), axis=[-2]) return max_spans
def compute_procrustes_alignment_params(target_points, source_points): """Computes procrustes alignment parameters. Args: target_points: A tensor for target points. Shape = [..., num_points, point_dim]. source_points: A tensor for source points. Shape = [..., num_points, point_dim]. Returns: rotations: A tensor for rotations. Shape = [..., point_dim, point_dim]. scales: A tensor for scales. Shape = [..., 1, 1]. translations: A tensor for translations. Shape = [..., 1, point_dim]. """ # standardized_target_points: Shape = [..., num_points, point_dim]. # target_offsets: Shape = [..., 1, point_dim]. # target_scales: Shape = [..., 1, 1]. standardized_target_points, target_offsets, target_scales = ( standardize_points(target_points)) # standardized_source_points: Shape = [..., num_points, point_dim]. # source_offsets: Shape = [..., 1, point_dim]. # source_scales: Shape = [..., 1, point_dim]. standardized_source_points, source_offsets, source_scales = ( standardize_points(source_points)) # Shape = [..., point_dim, point_dim]. a = tf.linalg.matmul(standardized_target_points, standardized_source_points, transpose_a=True) # s: Shape = [..., point_dim]. # u: Shape = [..., point_dim, point_dim]. # v: Shape = [..., point_dim, point_dim]. s, u, v = tf.linalg.svd(a) # Shape = [..., point_dim, point_dim]. r = tf.linalg.matmul(v, u, transpose_b=True) # Shape = [...]. det_r = tf.linalg.det(r) # Shape = [...]. signs = tf.math.sign(det_r) # Shape = [..., 1]. signs = tf.expand_dims(signs, axis=-1) # Shape = [..., point_dim - 1]. point_dim = target_points.shape.as_list()[-1] ones = data_utils.tile_last_dims(tf.ones_like(signs), last_dim_multiples=[point_dim - 1]) # Shape = [..., point_dim]. signs = tf.concat([ones, signs], axis=-1) s *= signs # Shape = [..., 1, point_dim]. signs = tf.expand_dims(signs, axis=-2) v *= signs # Shape = [..., point_dim, point_dim]. rotations = tf.linalg.matmul(v, u, transpose_b=True) # Shape = [..., 1, 1]. scales = (tf.expand_dims(tf.math.reduce_sum(s, axis=-1, keepdims=True), axis=-1) * target_scales / source_scales) # Shape = [..., 1, point_dim]. translations = target_offsets - scales * tf.linalg.matmul( source_offsets, rotations) return rotations, scales, translations
def naive_normalize_points(points, point_masks): """Naively normalizes points by shifting and scaling. Args: points: A tensor for points. Shape = [..., num_points, point_dim]. point_masks: A tensor for point validities. Shape = [..., num_points]. Returns: points: A tensor for normalized points. Shape = [..., num_points, point_dim]. """ point_masks = tf.cast(point_masks, dtype=tf.bool) point_dim = tf.shape(points)[-1] def compute_centers(points, point_masks): """Computes centers of valid points.""" valid_points = tf.boolean_mask(points, point_masks) return tf.math.reduce_mean(valid_points, axis=-2, keepdims=True) def compute_max_spans(points, point_masks): """Computes maximum point set spans in any direction.""" num_points = tf.shape(points)[-2] point_masks = data_utils.tile_last_dims( tf.expand_dims(tf.expand_dims(point_masks, axis=-1), axis=-1), last_dim_multiples=[num_points, point_dim]) diffs = tf.math.abs( data_utils.tile_last_dims(tf.expand_dims(points, axis=-2), last_dim_multiples=[num_points, 1]) - data_utils.tile_last_dims(tf.expand_dims(points, axis=-3), last_dim_multiples=[num_points, 1, 1])) diffs = tf.where(point_masks, diffs, tf.zeros_like(diffs)) max_spans = tf.squeeze(tf.math.reduce_max(diffs, axis=[-3, -2, -1], keepdims=True), axis=[-2]) return max_spans centers = compute_centers(points, point_masks) max_spans = compute_max_spans(points, point_masks) points = (points - centers) / tf.math.maximum(1e-12, max_spans) points = tf.where( data_utils.tile_last_dims(tf.expand_dims(point_masks, axis=-1), last_dim_multiples=[point_dim]), points, tf.zeros_like(points)) return points
def override_points(points, from_indices_list, to_indices): """Overrides points with other points. Points at `to_indices` will be overridden with centers of points from `from_indices_list`. For example: from_indices_list = [[0, 1], [2]] to_indices = [3, 4] updated_points = override_points(from_indices_list, to_indices) Will result in: updated_points[..., 3, :] == ((points[..., 0, :] + points[..., 1, :]) / 2 + points[..., 2, :]) / 2 updated_points[..., 4, :] == ((points[..., 0, :] + points[..., 1, :]) / 2 + points[..., 2, :]) / 2 Args: points: A tensor for points to override. Shape = [..., num_points, point_dim]. from_indices_list: A list of integer lists for point indices to compute overriding points. to_indices: A list of integers for point indices to be overridden. Returns: A tensor for updated points. """ overriding_points = [ get_points(points, from_indices) for from_indices in from_indices_list ] overriding_points = tf.concat(overriding_points, axis=-2) overriding_points = tf.math.reduce_mean(overriding_points, axis=-2, keepdims=True) overriding_points = data_utils.tile_last_dims( overriding_points, last_dim_multiples=[len(to_indices), 1]) return data_utils.update_sub_tensor( points, indices=to_indices, axis=-2, update_func=lambda _: overriding_points)
def process_decoded_image_sizes(decoded_tensors, sequence_length, common_module=common): """Processes decoded image sizes. Args: decoded_tensors: A dictionary for decoded tensors. sequence_length: An integer for input sequence length. common_module: A Python module that defines common constants. Returns: A dictionary for processed 2D keypoint tensors. """ image_heights = decoded_tensors[common_module.TFSE_KEY_IMAGE_HEIGHT] image_widths = decoded_tensors[common_module.TFSE_KEY_IMAGE_WIDTH] image_sizes = tf.stack([image_heights, image_widths], axis=-1) image_sizes = data_utils.tile_last_dims( tf.expand_dims(image_sizes, axis=-2), last_dim_multiples=[sequence_length, 1]) return { common_module.KEY_IMAGE_SIZES: image_sizes, }
def create_model_input(keypoints_2d, keypoint_masks_2d, keypoints_3d, model_input_keypoint_type, model_input_keypoint_mask_type=( common.MODEL_INPUT_KEYPOINT_MASK_TYPE_NO_USE), normalize_keypoints_2d=True, keypoint_profile_2d=None, uniform_keypoint_jittering_max_offset_2d=0.0, gaussian_keypoint_jittering_offset_stddev_2d=0.0, keypoint_dropout_probs=(0.0, 0.0), structured_keypoint_mask_processor=None, set_on_mask_for_non_anchors=False, mix_mask_sub_batches=False, rescale_features=False, forced_mask_on_part_names=None, forced_mask_off_part_names=None, keypoint_profile_3d=None, azimuth_range=(-math.pi, math.pi), elevation_range=(-math.pi / 6.0, math.pi / 6.0), roll_range=(-math.pi / 6.0, math.pi / 6.0), normalized_camera_depth_range=(), sequential_inputs=False, seed=None): """Creates model input features from input data. Args: keypoints_2d: A tensor for input 2D keyopints. Shape = [..., num_keypoints, 2]. Use None if irrelevant. keypoint_masks_2d: A tensor for input 2D keypoint masks. Shape = [..., num_keypoints]. Use None if irrelevant. keypoints_3d: A tensor for input 3D keyopints. Shape = [..., num_keypoints, 3]. Use None if irrelevant. model_input_keypoint_type: An enum string for model input type. See `MODEL_INPUT_TYPE_*` for supported values. model_input_keypoint_mask_type: An enum string for model input keypoint mask type. See `MODEL_INPUT_KEYPOINT_MASK_TYPE_*` for supported values. normalize_keypoints_2d: A boolean for whether to normalize 2D keypoints at the end. keypoint_profile_2d: A KeypointProfile2D object for input 2D keypoints. Required for normalizing 2D keypoints, 3D-to-2D projection, or forcing masks on/off. uniform_keypoint_jittering_max_offset_2d: A float for maximum 2D keypoint jittering offset. Random jittering offset within [-uniform_keypoint_jittering_max_offset_2d, uniform_keypoint_jittering_max_offset_2d] is to be added to each keypoint 2D. Note that the jittering happens after the 2D normalization. Ignored if non-positive. gaussian_keypoint_jittering_offset_stddev_2d: A float for standard deviation of Gaussian 2D keypoint jittering offset. Random jittering offset sampled from N(0, gaussian_keypoint_jittering_offset_stddev_2d) is to be added to each keypoint. Note that the jittering happens after the 2D normalization. Ignored if non-positive. keypoint_dropout_probs: A tuple of floats for the keypoint random dropout probabilities in the format (probability_to_apply, probability_to_drop). We perform stratified dropout as first select instances with `probability_to_apply` and then drop their keypoints with `probability_to_drop`. When sequential_input is True, there might be a third element indicating the probability of using sequence-level dropout. Only used when keypoint scores are relevant. structured_keypoint_mask_processor: A Python function for generating keypoint masks with structured dropout. Ignored if None. set_on_mask_for_non_anchors: A boolean for whether to always use on (1) masks for non-anchor samples. We assume the second from the left tensor dimension is for anchor/non-anchor, and the non-anchor samples start at the second element along that dimension. mix_mask_sub_batches: A boolean for whether to apply sub-batch mixing to processed masks and all-one masks. rescale_features: A boolean for whether to rescale features by the ratio between total number of mask elements and kept mask elements. forced_mask_on_part_names: A list of standard names of parts of which the masks are forced on (by setting value to 1.0). See `KeypointProfile.get_standard_part_index` for standard part names. forced_mask_off_part_names: A list of standard names of parts of which the masks are forced off (by setting value to 0.0). See `KeypointProfile.get_standard_part_index` for standard part names. keypoint_profile_3d: A KeypointProfile3D object for input 3D keypoints. Only used when 3D-to-2D projection is involved. azimuth_range: A tuple for minimum and maximum azimuth angles to randomly rotate 3D keypoints with. For non-sequential inputs, a 2-tuple for (minimum angle, maximum angle) is expected. For sequence inputs, uses 2-tuple to independently sample starting and ending camera angles, or uses 4-tuple for (minimum starting angle, maximum starting angle, minimum angle increment, maximum angle increment) to first sample starting angles and add random delta angles to them as ending angles. elevation_range: A tuple for minimum and maximum elevation angles to randomly rotate 3D keypoints with. For non-sequential inputs, a 2-tuple for (minimum angle, maximum angle) is expected. For sequence inputs, uses 2-tuple to independently sample starting and ending camera angles, or uses 4-tuple for (minimum starting angle, maximum starting angle, minimum angle increment, maximum angle increment) to first sample starting angles and add random delta angles to them as ending angles. roll_range: A tuple for minimum and maximum roll angles to randomly rotate 3D keypoints with. For non-sequential inputs, a 2-tuple for (minimum angle, maximum angle) is expected. For sequence inputs, uses 2-tuple to independently sample starting and ending camera angles, or uses 4-tuple for (minimum starting angle, maximum starting angle, minimum angle increment, maximum angle increment) to first sample starting angles and add random delta angles to them as ending angles. normalized_camera_depth_range: A tuple for minimum and maximum normalized camera depth for random camera augmentation. If empty, uses constant depth as 1 over the 2D pose normalization scale unit. sequential_inputs: A boolean flag indicating whether the inputs are sequential. If True, the input keypoints are supposed to be in shape [..., sequence_length, num_keypoints, keypoint_dim]. seed: An integer for random seed. Returns: features: A tensor for input features. Shape = [..., feature_dim]. side_outputs: A dictionary for side outputs, which includes `offset_points_2d` (shape = [..., 1, 2]) and `scale_distances_2d` (shape = [..., 1, 1]) if `normalize_keypoints_2d` is True. Raises: ValueError: If `model_input_keypoint_type` is not supported. ValueError: If `keypoint_dropout_probs` is not of length 2 or 3. ValueError: If `keypoint_profile_2d` is not specified when normalizing 2D keypoints. ValueError: If keypoint profile name is not 'LEGACY_2DCOCO13', '2DSTD13', or 'INTERNAL_2DSTD13' when applying structured keypoint dropout. ValueError: If number of instances is not 1 or 2. ValueError: If `keypoint_profile_2d` is not specified when forcing keypoint masks on. """ keypoints_2d, keypoint_masks_2d = preprocess_keypoints_2d( keypoints_2d, keypoint_masks_2d, keypoints_3d, model_input_keypoint_type, keypoint_profile_2d=keypoint_profile_2d, keypoint_profile_3d=keypoint_profile_3d, azimuth_range=azimuth_range, elevation_range=elevation_range, roll_range=roll_range, normalized_camera_depth_range=normalized_camera_depth_range, sequential_inputs=sequential_inputs, seed=seed) side_outputs = {} if len(keypoint_dropout_probs) not in [2, 3]: raise ValueError('Invalid keypoint dropout probability tuple: `%s`.' % str(keypoint_dropout_probs)) if keypoint_dropout_probs[0] > 0.0 and keypoint_dropout_probs[1] > 0.0: instance_keypoint_masks_2d = apply_stratified_instance_keypoint_dropout( keypoint_masks_2d, probability_to_apply=keypoint_dropout_probs[0], probability_to_drop=keypoint_dropout_probs[1], seed=seed) if (sequential_inputs and len(keypoint_dropout_probs) == 3 and keypoint_dropout_probs[2] > 0.0): sequence_keypoint_masks_2d = apply_stratified_sequence_keypoint_dropout( keypoint_masks_2d, probability_to_apply=keypoint_dropout_probs[0], probability_to_drop=keypoint_dropout_probs[1], seed=seed) sequence_axis = sequence_keypoint_masks_2d.shape.ndims - 1 keypoint_masks_2d = data_utils.mix_batch( [sequence_keypoint_masks_2d], [instance_keypoint_masks_2d], axis=sequence_axis, keep_lhs_prob=keypoint_dropout_probs[2], seed=seed)[0] else: keypoint_masks_2d = instance_keypoint_masks_2d if structured_keypoint_mask_processor is not None: keypoint_masks_2d = structured_keypoint_mask_processor( keypoint_masks=keypoint_masks_2d, keypoint_profile=keypoint_profile_2d, seed=seed) if normalize_keypoints_2d: if keypoint_profile_2d is None: raise ValueError( 'Failed to normalize 2D keypoints due to unspecified ' 'keypoint profile.') keypoints_2d, offset_points, scale_distances = ( keypoint_profile_2d.normalize(keypoints_2d, keypoint_masks_2d)) side_outputs.update({ common.KEY_OFFSET_POINTS_2D: offset_points, common.KEY_SCALE_DISTANCES_2D: scale_distances }) if uniform_keypoint_jittering_max_offset_2d > 0.0: keypoints_2d = _add_uniform_keypoint_jittering( keypoints_2d, max_jittering_offset=uniform_keypoint_jittering_max_offset_2d, seed=seed) if gaussian_keypoint_jittering_offset_stddev_2d > 0.0: keypoints_2d = _add_gaussian_keypoint_jittering( keypoints_2d, jittering_offset_stddev= gaussian_keypoint_jittering_offset_stddev_2d, seed=seed) if set_on_mask_for_non_anchors: non_anchor_indices = list( range(1, keypoint_masks_2d.shape.as_list()[1])) if non_anchor_indices: keypoint_masks_2d = data_utils.update_sub_tensor( keypoint_masks_2d, indices=non_anchor_indices, axis=1, update_func=tf.ones_like) if mix_mask_sub_batches: keypoint_masks_2d = data_utils.mix_batch( [tf.ones_like(keypoint_masks_2d)], [keypoint_masks_2d], axis=1)[0] if forced_mask_on_part_names: keypoint_masks_2d = _override_keypoint_masks( keypoint_masks_2d, keypoint_profile=keypoint_profile_2d, part_names=forced_mask_on_part_names, overriding_func=tf.ones_like) if forced_mask_off_part_names: keypoint_masks_2d = _override_keypoint_masks( keypoint_masks_2d, keypoint_profile=keypoint_profile_2d, part_names=forced_mask_off_part_names, overriding_func=tf.zeros_like) if model_input_keypoint_mask_type in [ common.MODEL_INPUT_KEYPOINT_MASK_TYPE_MASK_KEYPOINTS, common.MODEL_INPUT_KEYPOINT_MASK_TYPE_MASK_KEYPOINTS_AND_AS_INPUT ]: # Mask out invalid keypoints. keypoints_2d = tf.where( data_utils.tile_last_dims( tf.expand_dims(tf.math.equal(keypoint_masks_2d, 1.0), axis=-1), last_dim_multiples=[tf.shape(keypoints_2d)[-1]]), keypoints_2d, tf.zeros_like(keypoints_2d)) side_outputs.update({ common.KEY_PREPROCESSED_KEYPOINTS_2D: keypoints_2d, common.KEY_PREPROCESSED_KEYPOINT_MASKS_2D: keypoint_masks_2d, }) features = keypoints_2d if model_input_keypoint_mask_type in [ common.MODEL_INPUT_KEYPOINT_MASK_TYPE_AS_INPUT, common.MODEL_INPUT_KEYPOINT_MASK_TYPE_MASK_KEYPOINTS_AND_AS_INPUT ]: features = tf.concat( [keypoints_2d, tf.expand_dims(keypoint_masks_2d, axis=-1)], axis=-1) if rescale_features: # Scale up features to compensate for any keypoint masking. feature_rescales = keypoint_masks_2d.shape.as_list()[-1] / ( tf.math.maximum( 1e-12, tf.math.reduce_sum(keypoint_masks_2d, axis=-1, keepdims=True))) features *= tf.expand_dims(feature_rescales, axis=-1) features = data_utils.flatten_last_dims(features, num_last_dims=2) return features, side_outputs
def test_tile_last_dims(self): # Shape = [2, 1, 2, 1]. x = tf.constant([[[[1], [2]]], [[[3], [4]]]]) tiled_x = data_utils.tile_last_dims(x, last_dim_multiples=[2, 2]) self.assertAllEqual(tiled_x, [[[[1, 1], [2, 2], [1, 1], [2, 2]]], [[[3, 3], [4, 4], [3, 3], [4, 4]]]])