Beispiel #1
0
def fit_gaussian(embeddings, damping=1e-7, full_covariance=False):
  """Fits a unimodal Gaussian distribution to `embeddings`.

  Args:
    embeddings: A [batch_size, embedding_dim] tf.Tensor of embeddings.
    damping: The scale of the covariance damping coefficient.
    full_covariance: Whether to use a full or diagonal covariance.

  Returns:
    Parameter estimates (means and log variances) for a Gaussian model.
  """
  if full_covariance:
    num, dim = tf.split(tf.shape(input=embeddings), num_or_size_splits=2)
    num, dim = tf.squeeze(num), tf.squeeze(dim)
    sample_mean = tf.reduce_mean(input_tensor=embeddings, axis=0)
    centered_embeddings = embeddings - sample_mean
    sample_covariance = tf.einsum('ij,ik->kj', centered_embeddings,
                                  centered_embeddings)  # Outer product.
    sample_covariance += damping * tf.eye(dim)  # Positive definiteness.
    sample_covariance /= tf.cast(num, dtype=tf.float32)  # Scale by N.
    return sample_mean, sample_covariance
  else:
    sample_mean, sample_variances = tf.nn.moments(x=embeddings)
    log_variances = tf.math.log(sample_variances +
                                damping * tf.ones_like(sample_variances))
    return sample_mean, log_variances
def compute_prototypes(embeddings, onehot_labels):
    """Compute class prototypes over the last dimension of embeddings.

  Args:
    embeddings: Tensor of examples of shape [num_examples] + embedding_shape
    onehot_labels: Tensor of one-hot encoded labels of shape [num_examples,
      num_classes].

  Returns:
    prototypes: Tensor of class prototypes of shape [num_classes,
    embedding_size].
  """
    # Sums each class' embeddings. [num classes] + embedding shape.
    embedding_indices = 'klm'[:len(embeddings.shape) - 1]
    class_sums = tf.einsum('ij,i{0}->j{0}'.format(embedding_indices),
                           onehot_labels, embeddings)

    # The prototype of each class is the averaged embedding of its examples.
    class_num_images = tf.reduce_sum(input_tensor=onehot_labels,
                                     axis=0)  # [way].
    prototypes = tf.math.divide_no_nan(
        class_sums,
        tf.reshape(class_num_images, [-1] + [1] * (len(embeddings.shape) - 1)))

    return prototypes
  def _attend(self, query, key, value, key_class_id):
    """Transformer attention function."""
    with tf.name_scope('attend'):
      q_shape = tf.shape(query)
      v_shape = tf.shape(value)

      n_q = q_shape[0]
      h_q = q_shape[1]
      w_q = q_shape[2]
      d = q_shape[3]

      n_v = v_shape[0]
      h_v = v_shape[1]
      w_v = v_shape[2]
      c = v_shape[3]

      q = tf.reshape(query, [-1, d])  # [n_q*Hq*Wq, d]
      k = tf.reshape(key, [-1, d])

      # [n_v*Hv*Wv, d] x [Nq*Hq*Wq, d]  --> [n_v*Hv*Wv, Nq*Hq*Wq]
      logits = tf.matmul(k, q, transpose_b=True)
      d_scale = tf.rsqrt(tf.cast(d, logits.dtype))

      # logits: [n_v, Hv*Wv, n_q*Hq*Wq]
      logits = tf.reshape(d_scale * logits, [n_v, h_v * w_v, -1])

      # attn: [n_v, Hv*Wv, n_q*Hq*Wq]
      attn = self.get_support_set_softmax(logits, key_class_id)

      # aggregate:
      v = tf.reshape(value, [n_v, h_v * w_v, c])

      # [n_v, Hv*Wv, n_q*Hq*Wq] x [n_v, Hv*Wv, c]  --> [n_v, n_q*Hq*Wq, c]
      v_agg = tf.einsum('ijk,ijl->ikl', attn, v)
      v_agg = tf.reshape(v_agg, [n_v, n_q, h_q, w_q, c])
      v_agg.set_shape([None, None, None, None, value.shape[-1]])

      return v_agg  # [N_c, n_q, Hq, Wq, c]
def compute_motion_labels(scene,
                          frame0,
                          frame1,
                          frame_start_index,
                          points_key,
                          box_margin=0.1):
  """Compute motion label for each point.

  Args:
    scene: dict of tensor containing scene.
    frame0: dict of tensor containing points and objects.
    frame1: dict of tensor containing points and objects.
    frame_start_index: starting frame index.
    points_key:  A string corresponding to the tensor of point positions in
      inputs.
    box_margin: A margin value to enlarge box, so that surrounding points are
      included.

  Returns:
    A motion tensor of [N, 3] shape.

  """
  point_positions = frame0[points_key]
  frame0_object_names = frame0['objects/name']
  frame1_object_names = frame1['objects/name']
  bool_matrix = tf.math.equal(
      tf.expand_dims(frame0_object_names, axis=1),
      tf.expand_dims(frame1_object_names, axis=0))
  match_indices = tf.where(bool_matrix)

  # object box level
  box_dimension = tf.gather(
      frame0['objects/shape/dimension'], match_indices[:, 0], axis=0)
  boxes_length = box_dimension[:, 0:1]
  boxes_width = box_dimension[:, 1:2]
  boxes_height = box_dimension[:, 2:3]
  boxes_rotation_matrix = tf.gather(
      frame0['objects/pose/R'], match_indices[:, 0], axis=0)
  boxes_center = tf.gather(
      frame0['objects/pose/t'], match_indices[:, 0], axis=0)
  frame1_box_rotation_matrix = tf.gather(
      frame1['objects/pose/R'], match_indices[:, 1], axis=0)
  frame1_box_center = tf.gather(
      frame1['objects/pose/t'], match_indices[:, 1], axis=0)

  # frame level
  frame0_rotation = scene['frames/pose/R'][frame_start_index]
  frame1_rotation = scene['frames/pose/R'][frame_start_index + 1]
  frame0_translation = scene['frames/pose/t'][frame_start_index]
  frame1_translation = scene['frames/pose/t'][frame_start_index + 1]

  frame1_box_center_global = tf.tensordot(
      frame1_box_center, frame1_rotation, axes=(1, 1)) + frame1_translation
  frame1_box_center_in_frame0 = tf.tensordot(
      frame1_box_center_global - frame0_translation,
      frame0_rotation,
      axes=(1, 0))

  # only find index on boxes that are matched between two frames
  points_box_index = box_utils.map_points_to_boxes(
      points=point_positions,
      boxes_length=boxes_length,
      boxes_height=boxes_height,
      boxes_width=boxes_width,
      boxes_rotation_matrix=boxes_rotation_matrix,
      boxes_center=boxes_center,
      box_margin=box_margin)

  # TODO(huangrui): disappered object box have 0 motion.
  # Probably consider set to nan or ignore_label.

  # 1. gather points in surviving matched box only,
  #    and replicate rotation/t to same length;
  # 2. get points in box frame, apply new rotation/t per point;
  # 3. new location minus old location -> motion vector;
  # 4. scatter it to a larger motion_vector with 0 for
  #    points ouside of matched boxes.

  # Need to limit boxes to those matched boxes.
  # otherwise the points_box_index will contain useless box.

  # index in all point array, of points that are inside the box.
  points_inside_box_index = tf.where(points_box_index + 1)[:, 0]
  box_index = tf.gather(points_box_index, points_inside_box_index)
  points_inside_box = tf.gather(point_positions, points_inside_box_index)
  box_rotation_per_point = tf.gather(boxes_rotation_matrix, box_index)
  box_center_per_point = tf.gather(boxes_center, box_index)
  # Tensor [N, 3, 3] and [N, 3]. note we are transform points reversely.
  points_in_box_frame = tf.einsum('ikj,ik->ij', box_rotation_per_point,
                                  points_inside_box - box_center_per_point)

  # Transform rotation of box from frame1 coordinate to frame0 coordinate
  # note, transpose is implemented via changing summation axis
  frame1_box_rotation_matrix_global = tf.transpose(
      tf.tensordot(frame1_rotation, frame1_box_rotation_matrix, axes=(1, 1)),
      perm=(1, 0, 2))
  frame1_box_rotation_matrix_in_frame0 = tf.transpose(
      tf.tensordot(
          frame0_rotation, frame1_box_rotation_matrix_global, axes=(0, 1)),
      perm=(1, 0, 2))

  # this is the points_position_after_following_frame1_box's motion.
  frame1_box_rotation_in_frame0_per_point = tf.gather(
      frame1_box_rotation_matrix_in_frame0, box_index)
  frame1_box_center_in_frame0_per_point = tf.gather(frame1_box_center_in_frame0,
                                                    box_index)

  points_in_box_frame1 = tf.einsum(
      'ijk,ik->ij', frame1_box_rotation_in_frame0_per_point,
      points_in_box_frame) + frame1_box_center_in_frame0_per_point
  motion_vector = points_in_box_frame1 - points_inside_box

  scattered_vector = tf.scatter_nd(
      indices=tf.expand_dims(points_inside_box_index, axis=1),
      updates=motion_vector,
      shape=tf.shape(point_positions, out_type=tf.dtypes.int64))

  return scattered_vector
Beispiel #5
0
def fit_gaussian_mixture(embeddings,
                         responsibilities,
                         damping=1e-7,
                         full_covariance=False):
  """Fits a unimodal Gaussian distribution `embeddings`.

  Args:
    embeddings: A [batch_size, embedding_dim] tf.Tensor of embeddings.
    responsibilities: The per-component responsibilities.
    damping: The scale of the covariance damping coefficient.
    full_covariance: Whether to use a full or diagonal covariance.

  Returns:
    Parameter estimates for a Gaussian mixture model.
  """

  num, dim = tf.split(tf.shape(input=embeddings), num_or_size_splits=2)
  num, dim = tf.squeeze(num), tf.squeeze(dim)
  num_classes = responsibilities.shape[1]

  mixing_proportion = tf.einsum('jk->k', responsibilities)
  mixing_proportion /= tf.cast(num, dtype=tf.float32)
  mixing_logits = tf.math.log(mixing_proportion)

  sample_mean = tf.einsum('ij,ik->jk', responsibilities, embeddings)
  sample_mean /= tf.reduce_sum(
      input_tensor=responsibilities, axis=0)[:, tf.newaxis]
  centered_embeddings = (
      embeddings[:, tf.newaxis, :] - sample_mean[tf.newaxis, :, :])

  if full_covariance:
    sample_covariance = tf.einsum('ijk,ijl->ijkl', centered_embeddings,
                                  centered_embeddings)  # Outer product.
    sample_covariance += damping * tf.eye(dim)  # Positive definiteness.
    weighted_covariance = tf.einsum('ij,ijkl->jkl', responsibilities,
                                    sample_covariance)
    weighted_covariance /= tf.reduce_sum(
        input_tensor=responsibilities, axis=0)[:, tf.newaxis, tf.newaxis]

    return (
        _split_and_squeeze(sample_mean, num_splits=num_classes),
        _split_and_squeeze(weighted_covariance, num_splits=num_classes),
        [mixing_logits],
    )
  else:
    avg_x_squared = (
        tf.matmul(responsibilities, embeddings**2, transpose_a=True) /
        tf.reduce_sum(input_tensor=responsibilities, axis=0)[:, tf.newaxis])
    avg_means_squared = sample_mean**2
    avg_x_means = (
        sample_mean *
        tf.matmul(responsibilities, embeddings, transpose_a=True) /
        tf.reduce_sum(input_tensor=responsibilities, axis=0)[:, tf.newaxis])
    sample_variances = (
        avg_x_squared - 2 * avg_x_means + avg_means_squared +
        damping * tf.ones(dim))
    log_variances = tf.math.log(sample_variances)
    return (
        _split_and_squeeze(sample_mean, num_splits=num_classes),
        _split_and_squeeze(log_variances, num_splits=num_classes),
        [mixing_logits],
    )