def call(self, inputs, training=False): """Computes a forward pass. Args: inputs: An input tensor. training: A boolean indicating whether the call is for training or not. Returns: An output tensor and a list of output activations from all the layers. """ activations = {} x = inputs for block in self.blocks: x = block(x, training=training) activations[block.name] = x output = activations['embedder'] if len(self.embedder_output_shape) > 1: output = data_utils.recursively_expand_dims( output, axes=[-1] * (len(self.embedder_output_shape) - 1)) output = data_utils.reshape_by_last_dims( output, last_dim_shape=self.embedder_output_shape) activations['embedder'] = output return output, activations
def apply_stratified_sequence_keypoint_dropout(keypoint_masks, probability_to_apply, probability_to_drop, seed=None): """Applies stratified keypoint dropout on each sequence. We perform stratified dropout as first select instances with `probability_to_apply` and then drop their keypoints with `probability_to_drop`. Args: keypoint_masks: A tensor for input keypoint masks. Shape = [..., sequence_length, num_keypoints]. probability_to_apply: A float for the probability to perform dropout on a sequence. probability_to_drop: A float for the probability to perform dropout on a keypoint. seed: An integer for random seed. Returns: A tensor for output 2D keypoint masks. Raises: ValueError: If any dropout probability is non-positive. """ if probability_to_apply <= 0.0 or probability_to_drop <= 0.0: raise ValueError('Invalid dropout probabilities: (%f, %f)' % (probability_to_apply, probability_to_drop)) # Shape = [...]. keep_sequence_chances = tf.random.uniform(tf.shape(keypoint_masks)[:-2], minval=0.0, maxval=1.0, seed=seed) # Shape = [..., 1, 1]. drop_sequence_masks = data_utils.recursively_expand_dims( keep_sequence_chances < probability_to_apply, [-1, -1]) # Shape = [..., 1, num_keypoints]. shape = tf.concat( [tf.shape(keypoint_masks)[:-2], [1], [tf.shape(keypoint_masks)[-1]]], axis=-1) keep_keypoint_chances = tf.random.uniform(shape, minval=0.0, maxval=1.0, seed=seed) drop_keypoint_masks = keep_keypoint_chances < probability_to_drop keep_masks = tf.math.logical_not( tf.math.logical_and(drop_sequence_masks, drop_keypoint_masks)) return tf.where(keep_masks, keypoint_masks, tf.zeros_like(keypoint_masks))
def multi_head_logits(input_features, output_sizes, name, **kwargs): """Builds a multi-head logit layer with potential bottleneck layer. Args: input_features: A tensor for input features. Shape = [..., sequence_length, feature_dim]. output_sizes: A dictionary for output sizes in the format {output_name: output_size}, where `output_size` can be an integer or a list. name: A string for the name scope. **kwargs: A dictionary for additional arguments. Supported arguments include `num_hidden_nodes`, `weight_initializer`, `bias_initializer`, `weight_max_norm`, `use_batch_norm`, `dropout_rate`, `num_fcs_per_block`, and `num_fc_blocks`. Returns: outputs: A dictionary for the output logits. """ outputs = {} for output_name, output_size in output_sizes.items(): if isinstance(output_size, int): output_size = [output_size] outputs[output_name] = linear( input_features, output_size=np.prod(output_size), weight_max_norm=kwargs.get('weight_max_norm', 0.0), weight_initializer=kwargs.get('weight_initializer', tf.initializers.he_normal()), bias_initializer=kwargs.get('bias_initializer', tf.initializers.he_normal()), name=name + '/OutputLogits/' + output_name) if len(output_size) > 1: outputs[output_name] = data_utils.recursively_expand_dims( outputs[output_name], axes=[-1] * (len(output_size) - 1)) outputs[output_name] = data_utils.reshape_by_last_dims( outputs[output_name], last_dim_shape=output_size) return outputs
def test_recursively_expand_dims(self): # Shape = [2, 3]. x = tf.constant([[1, 2, 3], [4, 5, 6]]) # Shape = [2, 1, 3, 1] expanded_x = data_utils.recursively_expand_dims(x, axes=[-1, 1]) self.assertAllEqual(expanded_x, [[[[1], [2], [3]]], [[[4], [5], [6]]]])
def simple_model(input_features, output_sizes, is_training, name='SimpleModel', num_bottleneck_nodes=0, **kwargs): """Implements `simple base` model with outputs. Note that the code differs from the original architecture by disabling dropout and maximum weight norms by default. Args: input_features: A tensor for input features. Shape = [..., feature_dim]. output_sizes: A dictionary for output sizes in the format {output_name: output_size}, where `output_size` can be an integer or a list. is_training: A boolean for whether it is in training mode. name: A string for the name scope. num_bottleneck_nodes: An integer for size of the bottleneck layer to be added before the output layer(s). No bottleneck layer will be added if non-positive. **kwargs: A dictionary of additional arguments passed to `simple_base`. Returns: outputs: A dictionary for output tensors in the format {output_name: output_tensors}. Output tensor shape = [..., output_size]. activations: A dictionary of addition activation tensors for pre-output model activations. Keys include 'base_activations' and optionally 'bottleneck_activations'. """ net = simple_base(input_features, is_training=is_training, name=name, **kwargs) activations = {'base_activations': net} if num_bottleneck_nodes > 0: net = linear(net, output_size=num_bottleneck_nodes, weight_max_norm=kwargs.get('weight_max_norm', 0.0), weight_initializer=kwargs.get( 'weight_initializer', tf.initializers.he_normal()), bias_initializer=kwargs.get('bias_initializer', tf.initializers.he_normal()), name=name + '/BottleneckLogits') activations['bottleneck_activations'] = net outputs = {} for output_name, output_size in output_sizes.items(): if isinstance(output_size, int): output_size = [output_size] outputs[output_name] = linear( net, output_size=np.prod(output_size), weight_max_norm=kwargs.get('weight_max_norm', 0.0), weight_initializer=kwargs.get('weight_initializer', tf.initializers.he_normal()), bias_initializer=kwargs.get('bias_initializer', tf.initializers.he_normal()), name=name + '/OutputLogits/' + output_name) if len(output_size) > 1: outputs[output_name] = data_utils.recursively_expand_dims( outputs[output_name], axes=[-1] * (len(output_size) - 1)) outputs[output_name] = data_utils.reshape_by_last_dims( outputs[output_name], last_dim_shape=output_size) return outputs, activations
def randomly_rotate_and_project_3d_to_2d(keypoints_3d, azimuth_range, elevation_range, roll_range, normalized_camera_depth_range, sequential_inputs=False, seed=None): """Randomly rotates and projects 3D keypoints to 2D. Note that the default camera z will be added to the keypoint depths before projection, which underlyingly assumes the input 3D keypoints are centered at camera origin. This function, however, does not normalize the input 3D keypoints. Args: keypoints_3d: A tensor for 3D keypoints. Shape = [..., num_keypoints, 3]. azimuth_range: A 2-tuple for minimum and maximum azimuth angles to randomly rotate 3D keypoints with. For sequential inputs, also supports 4-tuple for minimum/maximum angles as well as minimum/maximum angle deltas between starting and ending angles. elevation_range: A 2-tuple for minimum and maximum elevation angles to randomly rotate 3D keypoints with. For sequential inputs, also supports 4-tuple for minimum/maximum angles as well as minimum/maximum angle deltas between starting and ending angles. roll_range: A 2-tuple for minimum and maximum roll angles to randomly rotate 3D keypoints with. For sequential inputs, also supports 4-tuple for minimum/maximum angles as well as minimum/maximum angle deltas between starting and ending angles. normalized_camera_depth_range: A tuple for minimum and maximum normalized camera depth for random camera augmentation. sequential_inputs: A boolean flag indicating whether the inputs are sequential. If True, the input keypoints are supposed to be in shape [..., sequence_length, num_keypoints, 3]. seed: An integer for random seed. Returns: keypoints_2d: A tensor for projected 2D keypoints from randomly rotated 3D keypoints. """ keypoints_3d = randomly_rotate_3d(keypoints_3d, azimuth_range=azimuth_range, elevation_range=elevation_range, roll_range=roll_range, sequential_inputs=sequential_inputs, seed=seed) # Transform to default camera coordinate. default_rotation_to_camera = tf.constant([ [0.0, 0.0, -1.0], [-1.0, 0.0, 0.0], [0.0, 1.0, 0.0], ]) operator = tf.linalg.LinearOperatorFullMatrix(default_rotation_to_camera) keypoints_3d = operator.matvec(keypoints_3d) # Move to default depth. if sequential_inputs: # Currently we only support sequence-level const depth. # TODO(liuti): Support varying depth for a sequence. normalized_camera_depths = tf.random.uniform( tf.shape(keypoints_3d)[:-3], minval=normalized_camera_depth_range[0], maxval=normalized_camera_depth_range[1], seed=seed) normalized_camera_depths = data_utils.recursively_expand_dims( normalized_camera_depths, axes=[-1, -1]) else: normalized_camera_depths = tf.random.uniform( tf.shape(keypoints_3d)[:-2], minval=normalized_camera_depth_range[0], maxval=normalized_camera_depth_range[1], seed=seed) normalized_camera_depths = tf.expand_dims(normalized_camera_depths, axis=-1) default_centers = tf.stack([ tf.zeros_like(normalized_camera_depths), tf.zeros_like(normalized_camera_depths), normalized_camera_depths, ], axis=-1) keypoints_3d += default_centers # Project to 2D. return keypoints_3d[Ellipsis, :-1] / tf.math.maximum( 1e-12, keypoints_3d[Ellipsis, -1:])