Ejemplo n.º 1
0
    def call(self, inputs, training=False):
        """Computes a forward pass.

    Args:
      inputs: An input tensor.
      training: A boolean indicating whether the call is for training or not.

    Returns:
      An output tensor and a list of output activations from all the layers.
    """
        activations = {}
        x = inputs

        for block in self.blocks:
            x = block(x, training=training)
            activations[block.name] = x

        output = activations['embedder']
        if len(self.embedder_output_shape) > 1:
            output = data_utils.recursively_expand_dims(
                output, axes=[-1] * (len(self.embedder_output_shape) - 1))
            output = data_utils.reshape_by_last_dims(
                output, last_dim_shape=self.embedder_output_shape)
            activations['embedder'] = output

        return output, activations
def apply_stratified_sequence_keypoint_dropout(keypoint_masks,
                                               probability_to_apply,
                                               probability_to_drop,
                                               seed=None):
    """Applies stratified keypoint dropout on each sequence.

  We perform stratified dropout as first select instances with
  `probability_to_apply` and then drop their keypoints with
  `probability_to_drop`.

  Args:
    keypoint_masks: A tensor for input keypoint masks. Shape = [...,
      sequence_length, num_keypoints].
    probability_to_apply: A float for the probability to perform dropout on a
      sequence.
    probability_to_drop: A float for the probability to perform dropout on a
      keypoint.
    seed: An integer for random seed.

  Returns:
    A tensor for output 2D keypoint masks.

  Raises:
    ValueError: If any dropout probability is non-positive.
  """
    if probability_to_apply <= 0.0 or probability_to_drop <= 0.0:
        raise ValueError('Invalid dropout probabilities: (%f, %f)' %
                         (probability_to_apply, probability_to_drop))

    # Shape = [...].
    keep_sequence_chances = tf.random.uniform(tf.shape(keypoint_masks)[:-2],
                                              minval=0.0,
                                              maxval=1.0,
                                              seed=seed)
    # Shape = [..., 1, 1].
    drop_sequence_masks = data_utils.recursively_expand_dims(
        keep_sequence_chances < probability_to_apply, [-1, -1])

    # Shape = [..., 1, num_keypoints].
    shape = tf.concat(
        [tf.shape(keypoint_masks)[:-2], [1], [tf.shape(keypoint_masks)[-1]]],
        axis=-1)
    keep_keypoint_chances = tf.random.uniform(shape,
                                              minval=0.0,
                                              maxval=1.0,
                                              seed=seed)
    drop_keypoint_masks = keep_keypoint_chances < probability_to_drop

    keep_masks = tf.math.logical_not(
        tf.math.logical_and(drop_sequence_masks, drop_keypoint_masks))

    return tf.where(keep_masks, keypoint_masks, tf.zeros_like(keypoint_masks))
Ejemplo n.º 3
0
def multi_head_logits(input_features, output_sizes, name, **kwargs):
    """Builds a multi-head logit layer with potential bottleneck layer.

  Args:
    input_features: A tensor for input features. Shape =
      [..., sequence_length, feature_dim].
    output_sizes: A dictionary for output sizes in the format {output_name:
      output_size}, where `output_size` can be an integer or a list.
    name: A string for the name scope.
    **kwargs: A dictionary for additional arguments. Supported arguments include
      `num_hidden_nodes`, `weight_initializer`, `bias_initializer`,
      `weight_max_norm`, `use_batch_norm`, `dropout_rate`, `num_fcs_per_block`,
      and `num_fc_blocks`.

  Returns:
    outputs: A dictionary for the output logits.
  """
    outputs = {}
    for output_name, output_size in output_sizes.items():
        if isinstance(output_size, int):
            output_size = [output_size]
        outputs[output_name] = linear(
            input_features,
            output_size=np.prod(output_size),
            weight_max_norm=kwargs.get('weight_max_norm', 0.0),
            weight_initializer=kwargs.get('weight_initializer',
                                          tf.initializers.he_normal()),
            bias_initializer=kwargs.get('bias_initializer',
                                        tf.initializers.he_normal()),
            name=name + '/OutputLogits/' + output_name)
        if len(output_size) > 1:
            outputs[output_name] = data_utils.recursively_expand_dims(
                outputs[output_name], axes=[-1] * (len(output_size) - 1))
            outputs[output_name] = data_utils.reshape_by_last_dims(
                outputs[output_name], last_dim_shape=output_size)
    return outputs
Ejemplo n.º 4
0
 def test_recursively_expand_dims(self):
     # Shape = [2, 3].
     x = tf.constant([[1, 2, 3], [4, 5, 6]])
     # Shape = [2, 1, 3, 1]
     expanded_x = data_utils.recursively_expand_dims(x, axes=[-1, 1])
     self.assertAllEqual(expanded_x, [[[[1], [2], [3]]], [[[4], [5], [6]]]])
Ejemplo n.º 5
0
def simple_model(input_features,
                 output_sizes,
                 is_training,
                 name='SimpleModel',
                 num_bottleneck_nodes=0,
                 **kwargs):
    """Implements `simple base` model with outputs.

  Note that the code differs from the original architecture by disabling dropout
  and maximum weight norms by default.

  Args:
    input_features: A tensor for input features. Shape = [..., feature_dim].
    output_sizes: A dictionary for output sizes in the format {output_name:
      output_size}, where `output_size` can be an integer or a list.
    is_training: A boolean for whether it is in training mode.
    name: A string for the name scope.
    num_bottleneck_nodes: An integer for size of the bottleneck layer to be
      added before the output layer(s). No bottleneck layer will be added if
      non-positive.
    **kwargs: A dictionary of additional arguments passed to `simple_base`.

  Returns:
    outputs: A dictionary for output tensors in the format {output_name:
      output_tensors}. Output tensor shape = [..., output_size].
    activations: A dictionary of addition activation tensors for pre-output
      model activations. Keys include 'base_activations' and optionally
      'bottleneck_activations'.
  """
    net = simple_base(input_features,
                      is_training=is_training,
                      name=name,
                      **kwargs)
    activations = {'base_activations': net}

    if num_bottleneck_nodes > 0:
        net = linear(net,
                     output_size=num_bottleneck_nodes,
                     weight_max_norm=kwargs.get('weight_max_norm', 0.0),
                     weight_initializer=kwargs.get(
                         'weight_initializer', tf.initializers.he_normal()),
                     bias_initializer=kwargs.get('bias_initializer',
                                                 tf.initializers.he_normal()),
                     name=name + '/BottleneckLogits')
        activations['bottleneck_activations'] = net

    outputs = {}
    for output_name, output_size in output_sizes.items():
        if isinstance(output_size, int):
            output_size = [output_size]
        outputs[output_name] = linear(
            net,
            output_size=np.prod(output_size),
            weight_max_norm=kwargs.get('weight_max_norm', 0.0),
            weight_initializer=kwargs.get('weight_initializer',
                                          tf.initializers.he_normal()),
            bias_initializer=kwargs.get('bias_initializer',
                                        tf.initializers.he_normal()),
            name=name + '/OutputLogits/' + output_name)
        if len(output_size) > 1:
            outputs[output_name] = data_utils.recursively_expand_dims(
                outputs[output_name], axes=[-1] * (len(output_size) - 1))
            outputs[output_name] = data_utils.reshape_by_last_dims(
                outputs[output_name], last_dim_shape=output_size)
    return outputs, activations
Ejemplo n.º 6
0
def randomly_rotate_and_project_3d_to_2d(keypoints_3d,
                                         azimuth_range,
                                         elevation_range,
                                         roll_range,
                                         normalized_camera_depth_range,
                                         sequential_inputs=False,
                                         seed=None):
    """Randomly rotates and projects 3D keypoints to 2D.

  Note that the default camera z will be added to the keypoint depths before
  projection, which underlyingly assumes the input 3D keypoints are centered at
  camera origin. This function, however, does not normalize the input 3D
  keypoints.

  Args:
    keypoints_3d: A tensor for 3D keypoints. Shape = [..., num_keypoints, 3].
    azimuth_range: A 2-tuple for minimum and maximum azimuth angles to randomly
      rotate 3D keypoints with. For sequential inputs, also supports 4-tuple for
      minimum/maximum angles as well as minimum/maximum angle deltas between
      starting and ending angles.
    elevation_range: A 2-tuple for minimum and maximum elevation angles to
      randomly rotate 3D keypoints with. For sequential inputs, also supports
      4-tuple for minimum/maximum angles as well as minimum/maximum angle deltas
      between starting and ending angles.
    roll_range: A 2-tuple for minimum and maximum roll angles to randomly rotate
      3D keypoints with. For sequential inputs, also supports 4-tuple for
      minimum/maximum angles as well as minimum/maximum angle deltas between
      starting and ending angles.
    normalized_camera_depth_range: A tuple for minimum and maximum normalized
      camera depth for random camera augmentation.
    sequential_inputs: A boolean flag indicating whether the inputs are
      sequential. If True, the input keypoints are supposed to be in shape [...,
      sequence_length, num_keypoints, 3].
    seed: An integer for random seed.

  Returns:
    keypoints_2d: A tensor for projected 2D keypoints from randomly rotated 3D
      keypoints.
  """
    keypoints_3d = randomly_rotate_3d(keypoints_3d,
                                      azimuth_range=azimuth_range,
                                      elevation_range=elevation_range,
                                      roll_range=roll_range,
                                      sequential_inputs=sequential_inputs,
                                      seed=seed)

    # Transform to default camera coordinate.
    default_rotation_to_camera = tf.constant([
        [0.0, 0.0, -1.0],
        [-1.0, 0.0, 0.0],
        [0.0, 1.0, 0.0],
    ])
    operator = tf.linalg.LinearOperatorFullMatrix(default_rotation_to_camera)
    keypoints_3d = operator.matvec(keypoints_3d)

    # Move to default depth.
    if sequential_inputs:
        # Currently we only support sequence-level const depth.
        # TODO(liuti): Support varying depth for a sequence.
        normalized_camera_depths = tf.random.uniform(
            tf.shape(keypoints_3d)[:-3],
            minval=normalized_camera_depth_range[0],
            maxval=normalized_camera_depth_range[1],
            seed=seed)
        normalized_camera_depths = data_utils.recursively_expand_dims(
            normalized_camera_depths, axes=[-1, -1])
    else:
        normalized_camera_depths = tf.random.uniform(
            tf.shape(keypoints_3d)[:-2],
            minval=normalized_camera_depth_range[0],
            maxval=normalized_camera_depth_range[1],
            seed=seed)
        normalized_camera_depths = tf.expand_dims(normalized_camera_depths,
                                                  axis=-1)

    default_centers = tf.stack([
        tf.zeros_like(normalized_camera_depths),
        tf.zeros_like(normalized_camera_depths),
        normalized_camera_depths,
    ],
                               axis=-1)
    keypoints_3d += default_centers

    # Project to 2D.
    return keypoints_3d[Ellipsis, :-1] / tf.math.maximum(
        1e-12, keypoints_3d[Ellipsis, -1:])