def fit_gaussian(embeddings, damping=1e-7, full_covariance=False): """Fits a unimodal Gaussian distribution to `embeddings`. Args: embeddings: A [batch_size, embedding_dim] tf.Tensor of embeddings. damping: The scale of the covariance damping coefficient. full_covariance: Whether to use a full or diagonal covariance. Returns: Parameter estimates (means and log variances) for a Gaussian model. """ if full_covariance: num, dim = tf.split(tf.shape(input=embeddings), num_or_size_splits=2) num, dim = tf.squeeze(num), tf.squeeze(dim) sample_mean = tf.reduce_mean(input_tensor=embeddings, axis=0) centered_embeddings = embeddings - sample_mean sample_covariance = tf.einsum('ij,ik->kj', centered_embeddings, centered_embeddings) # Outer product. sample_covariance += damping * tf.eye(dim) # Positive definiteness. sample_covariance /= tf.cast(num, dtype=tf.float32) # Scale by N. return sample_mean, sample_covariance else: sample_mean, sample_variances = tf.nn.moments(x=embeddings) log_variances = tf.math.log(sample_variances + damping * tf.ones_like(sample_variances)) return sample_mean, log_variances
def compute_prototypes(embeddings, onehot_labels): """Compute class prototypes over the last dimension of embeddings. Args: embeddings: Tensor of examples of shape [num_examples] + embedding_shape onehot_labels: Tensor of one-hot encoded labels of shape [num_examples, num_classes]. Returns: prototypes: Tensor of class prototypes of shape [num_classes, embedding_size]. """ # Sums each class' embeddings. [num classes] + embedding shape. embedding_indices = 'klm'[:len(embeddings.shape) - 1] class_sums = tf.einsum('ij,i{0}->j{0}'.format(embedding_indices), onehot_labels, embeddings) # The prototype of each class is the averaged embedding of its examples. class_num_images = tf.reduce_sum(input_tensor=onehot_labels, axis=0) # [way]. prototypes = tf.math.divide_no_nan( class_sums, tf.reshape(class_num_images, [-1] + [1] * (len(embeddings.shape) - 1))) return prototypes
def _attend(self, query, key, value, key_class_id): """Transformer attention function.""" with tf.name_scope('attend'): q_shape = tf.shape(query) v_shape = tf.shape(value) n_q = q_shape[0] h_q = q_shape[1] w_q = q_shape[2] d = q_shape[3] n_v = v_shape[0] h_v = v_shape[1] w_v = v_shape[2] c = v_shape[3] q = tf.reshape(query, [-1, d]) # [n_q*Hq*Wq, d] k = tf.reshape(key, [-1, d]) # [n_v*Hv*Wv, d] x [Nq*Hq*Wq, d] --> [n_v*Hv*Wv, Nq*Hq*Wq] logits = tf.matmul(k, q, transpose_b=True) d_scale = tf.rsqrt(tf.cast(d, logits.dtype)) # logits: [n_v, Hv*Wv, n_q*Hq*Wq] logits = tf.reshape(d_scale * logits, [n_v, h_v * w_v, -1]) # attn: [n_v, Hv*Wv, n_q*Hq*Wq] attn = self.get_support_set_softmax(logits, key_class_id) # aggregate: v = tf.reshape(value, [n_v, h_v * w_v, c]) # [n_v, Hv*Wv, n_q*Hq*Wq] x [n_v, Hv*Wv, c] --> [n_v, n_q*Hq*Wq, c] v_agg = tf.einsum('ijk,ijl->ikl', attn, v) v_agg = tf.reshape(v_agg, [n_v, n_q, h_q, w_q, c]) v_agg.set_shape([None, None, None, None, value.shape[-1]]) return v_agg # [N_c, n_q, Hq, Wq, c]
def compute_motion_labels(scene, frame0, frame1, frame_start_index, points_key, box_margin=0.1): """Compute motion label for each point. Args: scene: dict of tensor containing scene. frame0: dict of tensor containing points and objects. frame1: dict of tensor containing points and objects. frame_start_index: starting frame index. points_key: A string corresponding to the tensor of point positions in inputs. box_margin: A margin value to enlarge box, so that surrounding points are included. Returns: A motion tensor of [N, 3] shape. """ point_positions = frame0[points_key] frame0_object_names = frame0['objects/name'] frame1_object_names = frame1['objects/name'] bool_matrix = tf.math.equal( tf.expand_dims(frame0_object_names, axis=1), tf.expand_dims(frame1_object_names, axis=0)) match_indices = tf.where(bool_matrix) # object box level box_dimension = tf.gather( frame0['objects/shape/dimension'], match_indices[:, 0], axis=0) boxes_length = box_dimension[:, 0:1] boxes_width = box_dimension[:, 1:2] boxes_height = box_dimension[:, 2:3] boxes_rotation_matrix = tf.gather( frame0['objects/pose/R'], match_indices[:, 0], axis=0) boxes_center = tf.gather( frame0['objects/pose/t'], match_indices[:, 0], axis=0) frame1_box_rotation_matrix = tf.gather( frame1['objects/pose/R'], match_indices[:, 1], axis=0) frame1_box_center = tf.gather( frame1['objects/pose/t'], match_indices[:, 1], axis=0) # frame level frame0_rotation = scene['frames/pose/R'][frame_start_index] frame1_rotation = scene['frames/pose/R'][frame_start_index + 1] frame0_translation = scene['frames/pose/t'][frame_start_index] frame1_translation = scene['frames/pose/t'][frame_start_index + 1] frame1_box_center_global = tf.tensordot( frame1_box_center, frame1_rotation, axes=(1, 1)) + frame1_translation frame1_box_center_in_frame0 = tf.tensordot( frame1_box_center_global - frame0_translation, frame0_rotation, axes=(1, 0)) # only find index on boxes that are matched between two frames points_box_index = box_utils.map_points_to_boxes( points=point_positions, boxes_length=boxes_length, boxes_height=boxes_height, boxes_width=boxes_width, boxes_rotation_matrix=boxes_rotation_matrix, boxes_center=boxes_center, box_margin=box_margin) # TODO(huangrui): disappered object box have 0 motion. # Probably consider set to nan or ignore_label. # 1. gather points in surviving matched box only, # and replicate rotation/t to same length; # 2. get points in box frame, apply new rotation/t per point; # 3. new location minus old location -> motion vector; # 4. scatter it to a larger motion_vector with 0 for # points ouside of matched boxes. # Need to limit boxes to those matched boxes. # otherwise the points_box_index will contain useless box. # index in all point array, of points that are inside the box. points_inside_box_index = tf.where(points_box_index + 1)[:, 0] box_index = tf.gather(points_box_index, points_inside_box_index) points_inside_box = tf.gather(point_positions, points_inside_box_index) box_rotation_per_point = tf.gather(boxes_rotation_matrix, box_index) box_center_per_point = tf.gather(boxes_center, box_index) # Tensor [N, 3, 3] and [N, 3]. note we are transform points reversely. points_in_box_frame = tf.einsum('ikj,ik->ij', box_rotation_per_point, points_inside_box - box_center_per_point) # Transform rotation of box from frame1 coordinate to frame0 coordinate # note, transpose is implemented via changing summation axis frame1_box_rotation_matrix_global = tf.transpose( tf.tensordot(frame1_rotation, frame1_box_rotation_matrix, axes=(1, 1)), perm=(1, 0, 2)) frame1_box_rotation_matrix_in_frame0 = tf.transpose( tf.tensordot( frame0_rotation, frame1_box_rotation_matrix_global, axes=(0, 1)), perm=(1, 0, 2)) # this is the points_position_after_following_frame1_box's motion. frame1_box_rotation_in_frame0_per_point = tf.gather( frame1_box_rotation_matrix_in_frame0, box_index) frame1_box_center_in_frame0_per_point = tf.gather(frame1_box_center_in_frame0, box_index) points_in_box_frame1 = tf.einsum( 'ijk,ik->ij', frame1_box_rotation_in_frame0_per_point, points_in_box_frame) + frame1_box_center_in_frame0_per_point motion_vector = points_in_box_frame1 - points_inside_box scattered_vector = tf.scatter_nd( indices=tf.expand_dims(points_inside_box_index, axis=1), updates=motion_vector, shape=tf.shape(point_positions, out_type=tf.dtypes.int64)) return scattered_vector
def fit_gaussian_mixture(embeddings, responsibilities, damping=1e-7, full_covariance=False): """Fits a unimodal Gaussian distribution `embeddings`. Args: embeddings: A [batch_size, embedding_dim] tf.Tensor of embeddings. responsibilities: The per-component responsibilities. damping: The scale of the covariance damping coefficient. full_covariance: Whether to use a full or diagonal covariance. Returns: Parameter estimates for a Gaussian mixture model. """ num, dim = tf.split(tf.shape(input=embeddings), num_or_size_splits=2) num, dim = tf.squeeze(num), tf.squeeze(dim) num_classes = responsibilities.shape[1] mixing_proportion = tf.einsum('jk->k', responsibilities) mixing_proportion /= tf.cast(num, dtype=tf.float32) mixing_logits = tf.math.log(mixing_proportion) sample_mean = tf.einsum('ij,ik->jk', responsibilities, embeddings) sample_mean /= tf.reduce_sum( input_tensor=responsibilities, axis=0)[:, tf.newaxis] centered_embeddings = ( embeddings[:, tf.newaxis, :] - sample_mean[tf.newaxis, :, :]) if full_covariance: sample_covariance = tf.einsum('ijk,ijl->ijkl', centered_embeddings, centered_embeddings) # Outer product. sample_covariance += damping * tf.eye(dim) # Positive definiteness. weighted_covariance = tf.einsum('ij,ijkl->jkl', responsibilities, sample_covariance) weighted_covariance /= tf.reduce_sum( input_tensor=responsibilities, axis=0)[:, tf.newaxis, tf.newaxis] return ( _split_and_squeeze(sample_mean, num_splits=num_classes), _split_and_squeeze(weighted_covariance, num_splits=num_classes), [mixing_logits], ) else: avg_x_squared = ( tf.matmul(responsibilities, embeddings**2, transpose_a=True) / tf.reduce_sum(input_tensor=responsibilities, axis=0)[:, tf.newaxis]) avg_means_squared = sample_mean**2 avg_x_means = ( sample_mean * tf.matmul(responsibilities, embeddings, transpose_a=True) / tf.reduce_sum(input_tensor=responsibilities, axis=0)[:, tf.newaxis]) sample_variances = ( avg_x_squared - 2 * avg_x_means + avg_means_squared + damping * tf.ones(dim)) log_variances = tf.math.log(sample_variances) return ( _split_and_squeeze(sample_mean, num_splits=num_classes), _split_and_squeeze(log_variances, num_splits=num_classes), [mixing_logits], )