def compute_logits(self, support_embeddings, query_embeddings,
                       onehot_support_labels):
        """Computes the relation score of each query example to each prototype."""
        # [n_test, 21, 21, n_features].
        query_embed_shape = query_embeddings.shape.as_list()
        n_feature = query_embed_shape[3]
        out_shape = query_embed_shape[1:3]
        n_test = tf.shape(query_embeddings)[0]

        # [n_test, num_clases, 21, 21, n_feature].
        # It is okay one of the elements in the list to be tensor.
        prototypes = compute_prototypes(support_embeddings,
                                        onehot_support_labels)

        prototype_extended = tf.tile(tf.expand_dims(prototypes, 0),
                                     [n_test, 1, 1, 1, 1])
        # [num_clases, n_test, 21, 21, n_feature].
        query_f_extended = tf.tile(
            tf.expand_dims(query_embeddings, 1),
            [1, tf.shape(onehot_support_labels)[-1], 1, 1, 1])
        relation_pairs = tf.concat((prototype_extended, query_f_extended), 4)
        # relation_pairs.shape.as_list()[-3:] == [-1] + out_shape + [n_feature*2]
        relation_pairs = tf.reshape(relation_pairs,
                                    [-1] + out_shape + [n_feature * 2])
        relationnet_dict = functional_backbones.relation_module(
            relation_pairs, 'relationnet')
        way = tf.shape(onehot_support_labels)[-1]
        relations = tf.reshape(relationnet_dict['output'], [-1, way])
        return relations
Beispiel #2
0
  def compute_class_distances(self, support_embeddings, onehot_support_labels,
                              query_embeddings):
    """Return the relation score of each query example to each prototype."""
    # `query_embeddings` is [num_examples, 21, 21, num_features].
    out_shape = query_embeddings.shape.as_list()[1:]
    num_features = out_shape[-1]
    num_query_examples = tf.shape(input=query_embeddings)[0]

    # [num_classes, 19, 19, num_features].
    prototypes = compute_prototypes(support_embeddings, onehot_support_labels)

    # [num_classes, 19, 19, num_features].
    prototype_extended = tf.tile(
        tf.expand_dims(prototypes, 0),
        [num_query_examples] + [1] * (1 + len(out_shape)))

    # [num_query_examples, 19, 19, num_features].
    way = onehot_support_labels.shape.as_list()[-1]
    query_extended = tf.tile(
        tf.expand_dims(query_embeddings, 1), [1, way] + [1] * len(out_shape))
    relation_pairs = tf.concat((prototype_extended, query_extended),
                               len(out_shape) + 1)

    # relation_pairs.shape.as_list()[-3:] == [-1] + out_shape + [num_features*2]
    relation_pairs = tf.reshape(relation_pairs,
                                [-1] + out_shape[:-1] + [num_features * 2])

    return tf.reshape(self.relation_module_fn(relation_pairs), [-1, way])
Beispiel #3
0
  def _build_target_distribution(self):
    self._reshape_networks()
    batch_size = tf.shape(self._replay.rewards)[0]
    # size of rewards: batch_size x 1
    rewards = self._replay.rewards[:, None]
    # size of tiled_support: batch_size x num_atoms
    tiled_support = tf.tile(self.support, [batch_size])
    tiled_support = tf.reshape(tiled_support, [batch_size, self.num_atoms])
    # size of target_support: batch_size x num_atoms

    is_terminal_multiplier = 1. - tf.cast(self._replay.terminals, tf.float32)
    # Incorporate terminal state to discount factor.
    # size of gamma_with_terminal: batch_size x 1
    gamma_with_terminal = self.cumulative_gamma * is_terminal_multiplier
    gamma_with_terminal = gamma_with_terminal[:, None]

    target_support = rewards + gamma_with_terminal * tiled_support
    # size of next_probabilities: batch_size  x num_actions x num_atoms
    next_probabilities = tf.contrib.layers.softmax(
        self._replay_next_logits)

    # size of next_qt: 1 x num_actions
    next_qt = tf.reduce_sum(self.support * next_probabilities, 2)
    # size of next_qt_argmax: 1 x batch_size
    next_qt_argmax = tf.argmax(
        next_qt + self._replay.next_legal_actions, axis=1)[:, None]
    batch_indices = tf.range(tf.to_int64(batch_size))[:, None]
    # size of next_qt_argmax: batch_size x 2
    next_qt_argmax = tf.concat([batch_indices, next_qt_argmax], axis=1)
    # size of next_probabilities: batch_size x num_atoms
    next_probabilities = tf.gather_nd(next_probabilities, next_qt_argmax)
    return project_distribution(target_support, next_probabilities,
                                self.support)
Beispiel #4
0
def embedding_regularization_loss(inputs,
                                  outputs,
                                  lambda_coef=0.0001,
                                  regularization_type='unit_length',
                                  is_intermediate=False):
  """Classification loss with an iou threshold.

  Args:
    inputs: A dictionary that contains
      num_valid_voxels - A tf.int32 tensor of size [batch_size].
      instance_ids - A tf.int32 tensor of size [batch_size, n].
    outputs: A dictionart that contains
      embeddings - A tf.float32 tensor of size [batch_size, n, f].
    lambda_coef: Regularization loss coefficient.
    regularization_type: Regularization loss type. Supported values are 'msq'
      and 'unit_length'. 'msq' stands for 'mean square' which penalizes the
      embedding vectors if they have a length far from zero. 'unit_length'
      penalizes the embedding vectors if they have a length far from one.
    is_intermediate: True if applied to intermediate predictions;
      otherwise, False.

  Returns:
    A tf.float32 scalar loss tensor.
  """
  instance_ids_key = standard_fields.InputDataFields.object_instance_id_voxels
  num_voxels_key = standard_fields.InputDataFields.num_valid_voxels
  if is_intermediate:
    embedding_key = (
        standard_fields.DetectionResultFields
        .intermediate_instance_embedding_voxels)
  else:
    embedding_key = (
        standard_fields.DetectionResultFields.instance_embedding_voxels)
  if instance_ids_key not in inputs:
    raise ValueError('instance_ids is missing in inputs.')
  if embedding_key not in outputs:
    raise ValueError('embedding is missing in outputs.')
  if num_voxels_key not in inputs:
    raise ValueError('num_voxels is missing in inputs.')
  batch_size = inputs[num_voxels_key].get_shape().as_list()[0]
  if batch_size is None:
    raise ValueError('batch_size is not defined at graph construction time.')
  num_valid_voxels = inputs[num_voxels_key]
  num_voxels = tf.shape(inputs[instance_ids_key])[1]
  valid_mask = tf.less(
      tf.tile(tf.expand_dims(tf.range(num_voxels), axis=0), [batch_size, 1]),
      tf.expand_dims(num_valid_voxels, axis=1))
  valid_mask = tf.reshape(valid_mask, [-1])
  embedding_dims = outputs[embedding_key].get_shape().as_list()[-1]
  if embedding_dims is None:
    raise ValueError(
        'Embedding dimension is unknown at graph construction time.')
  embedding = tf.reshape(outputs[embedding_key], [-1, embedding_dims])
  embedding = tf.boolean_mask(embedding, valid_mask)
  return metric_learning_losses.regularization_loss(
      embedding=embedding,
      lambda_coef=lambda_coef,
      regularization_type=regularization_type)
def compute_target_optimal_q(reward, gamma, next_actions, next_q_values,
                             next_states, terminals):
    """Builds an op used as a target for the Q-value.

  This algorithm corresponds to the method "OT" in
  Ie et al. https://arxiv.org/abs/1905.12767..

  Args:
    reward: [batch_size] tensor, the immediate reward.
    gamma: float, discount factor with the usual RL meaning.
    next_actions: [batch_size, slate_size] tensor, the next slate.
    next_q_values: [batch_size, num_of_documents] tensor, the q values of the
      documents in the next step.
    next_states: [batch_size, 1 + num_of_documents] tensor, the features for the
      user and the docuemnts in the next step.
    terminals: [batch_size] tensor, indicating if this is a terminal step.

  Returns:
    [batch_size] tensor, the target q values.
  """
    scores, score_no_click = _get_unnormalized_scores(next_states)

    # Obtain all possible slates given current docs in the candidate set.
    slate_size = next_actions.get_shape().as_list()[1]
    num_candidates = next_q_values.get_shape().as_list()[1]
    mesh_args = [list(range(num_candidates))] * slate_size
    slates = tf.stack(tf.meshgrid(*mesh_args), axis=-1)
    slates = tf.reshape(slates, shape=(-1, slate_size))
    # Filter slates that include duplicates to ensure each document is picked
    # at most once.
    unique_mask = tf.map_fn(
        lambda x: tf.equal(tf.size(input=x), tf.size(input=tf.unique(x)[0])),
        slates,
        dtype=tf.bool)
    # [num_of_slates, slate_size]
    slates = tf.boolean_mask(tensor=slates, mask=unique_mask)

    # [batch_size, num_of_slates, slate_size]
    next_q_values_slate = tf.gather(next_q_values, slates, axis=1)
    # [batch_size, num_of_slates, slate_size]
    scores_slate = tf.gather(scores, slates, axis=1)
    # [batch_size, num_of_slates]
    batch_size = next_states.get_shape().as_list()[0]
    score_no_click_slate = tf.reshape(
        tf.tile(score_no_click,
                tf.shape(input=slates)[:1]), [batch_size, -1])

    # [batch_size, num_of_slates]
    next_q_target_slate = tf.reduce_sum(
        input_tensor=next_q_values_slate * scores_slate,
        axis=2) / (tf.reduce_sum(input_tensor=scores_slate, axis=2) +
                   score_no_click_slate)

    next_q_target_max = tf.reduce_max(input_tensor=next_q_target_slate, axis=1)

    return reward + gamma * next_q_target_max * (
        1. - tf.cast(terminals, tf.float32))
 def fn():
     """Loss function for when number of input and output boxes is positive."""
     if is_balanced:
         weights = loss_utils.get_balanced_loss_weights_multiclass(
             labels=input_boxes_instance_id)
     else:
         weights = tf.ones([tf.shape(input_boxes_instance_id)[0], 1],
                           dtype=tf.float32)
     normalized_box_size = 5.0
     predicted_boxes_length = output_boxes_length
     predicted_boxes_height = output_boxes_height
     predicted_boxes_width = output_boxes_width
     predicted_boxes_center = output_boxes_center
     predicted_boxes_rotation_matrix = output_boxes_rotation_matrix
     gt_boxes_length = input_boxes_length
     gt_boxes_height = input_boxes_height
     gt_boxes_width = input_boxes_width
     gt_boxes_center = input_boxes_center
     gt_boxes_rotation_matrix = input_boxes_rotation_matrix
     if loss_type in ['normalized_huber', 'normalized_euclidean']:
         predicted_boxes_length /= (gt_boxes_length / normalized_box_size)
         predicted_boxes_height /= (gt_boxes_height / normalized_box_size)
         predicted_boxes_width /= (gt_boxes_width / normalized_box_size)
         gt_boxes_length = tf.ones_like(
             gt_boxes_length, dtype=tf.float32) * normalized_box_size
         gt_boxes_height = tf.ones_like(
             gt_boxes_height, dtype=tf.float32) * normalized_box_size
         gt_boxes_width = tf.ones_like(
             gt_boxes_width, dtype=tf.float32) * normalized_box_size
     gt_box_corners = box_utils.get_box_corners_3d(
         boxes_length=gt_boxes_length,
         boxes_height=gt_boxes_height,
         boxes_width=gt_boxes_width,
         boxes_rotation_matrix=gt_boxes_rotation_matrix,
         boxes_center=gt_boxes_center)
     predicted_box_corners = box_utils.get_box_corners_3d(
         boxes_length=predicted_boxes_length,
         boxes_height=predicted_boxes_height,
         boxes_width=predicted_boxes_width,
         boxes_rotation_matrix=predicted_boxes_rotation_matrix,
         boxes_center=predicted_boxes_center)
     corner_weights = tf.tile(weights, [1, 8])
     if loss_type in ['huber', 'normalized_huber']:
         loss_fn = tf.keras.losses.Huber(
             delta=delta, reduction=tf.keras.losses.Reduction.NONE)
     elif loss_type in [
             'normalized_absolute_difference', 'absolute_difference'
     ]:
         loss_fn = tf.keras.losses.MeanAbsoluteError(
             reduction=tf.keras.losses.Reduction.NONE)
     else:
         raise ValueError(('Unknown loss type %s.' % loss_type))
     box_corner_losses = loss_fn(y_true=tf.reshape(gt_box_corners, [-1, 3]),
                                 y_pred=tf.reshape(predicted_box_corners,
                                                   [-1, 3]))
     return tf.reduce_mean(box_corner_losses *
                           tf.reshape(corner_weights, [-1]))
Beispiel #7
0
 def _build_bisimulation_target(self):
   """Build the bisimulation target."""
   batch_size = tf.shape(self.rewards_ph)[0]
   r1 = tf.tile([self.rewards_ph], [batch_size, 1])
   r2 = tf.transpose(r1)
   reward_differences = tf.abs(r1 - r2)
   reward_differences = tf.reshape(reward_differences, (batch_size**2, 1))
   next_state_distances = self.bisim_horizon_ph * self.s2_target_distances
   return reward_differences + self.gamma * next_state_distances
def classification_loss_using_mask_iou_func_unbatched(
        embeddings, instance_ids, sampled_embeddings, sampled_instance_ids,
        sampled_class_labels, sampled_logits, similarity_strategy,
        is_balanced):
    """Classification loss using mask iou.

  Args:
    embeddings: A tf.float32 tensor of size [n, f].
    instance_ids: A tf.int32 tensor of size [n].
    sampled_embeddings: A tf.float32 tensor of size [num_samples, f].
    sampled_instance_ids: A tf.int32 tensor of size [num_samples].
    sampled_class_labels: A tf.int32 tensor of size [num_samples, 1].
    sampled_logits: A tf.float32 tensor of size [num_samples, num_classes].
    similarity_strategy: Defines the method for computing similarity between
                         embedding vectors. Possible values are 'dotproduct' and
                         'distance'.
    is_balanced: If True, the per-voxel losses are re-weighted to have equal
      total weight for foreground vs. background voxels.

  Returns:
    A tf.float32 loss scalar tensor.
  """
    predicted_soft_masks = metric_learning_utils.embedding_centers_to_soft_masks(
        embedding=embeddings,
        centers=sampled_embeddings,
        similarity_strategy=similarity_strategy)
    predicted_masks = tf.cast(tf.greater(predicted_soft_masks, 0.5),
                              dtype=tf.float32)
    gt_masks = tf.cast(tf.equal(tf.expand_dims(sampled_instance_ids, axis=1),
                                tf.expand_dims(instance_ids, axis=0)),
                       dtype=tf.float32)
    pairwise_iou = instance_segmentation_utils.points_mask_pairwise_iou(
        masks1=predicted_masks, masks2=gt_masks)
    num_classes = sampled_logits.get_shape().as_list()[1]
    sampled_class_labels_one_hot = tf.one_hot(indices=tf.reshape(
        sampled_class_labels, [-1]),
                                              depth=num_classes)
    sampled_class_labels_one_hot_fg = sampled_class_labels_one_hot[:, 1:]
    iou_coefs = tf.tile(tf.reshape(pairwise_iou, [-1, 1]),
                        [1, num_classes - 1])
    sampled_class_labels_one_hot_fg *= iou_coefs
    sampled_class_labels_one_hot_bg = tf.maximum(
        1.0 - tf.math.reduce_sum(
            sampled_class_labels_one_hot_fg, axis=1, keepdims=True), 0.0)
    sampled_class_labels_one_hot = tf.concat(
        [sampled_class_labels_one_hot_bg, sampled_class_labels_one_hot_fg],
        axis=1)
    params = {}
    if is_balanced:
        weights = loss_utils.get_balanced_loss_weights_multiclass(
            labels=tf.expand_dims(sampled_instance_ids, axis=1))
        params['weights'] = weights
    return classification_loss_fn(logits=sampled_logits,
                                  labels=sampled_class_labels_one_hot,
                                  **params)
Beispiel #9
0
 def _expand_to_population(self, data):
     """Expand the input tensor to a population of replications
     Args:
         data (tf.Tensor): input data with shape [batch_size, ...]
     Returns:
         data_population (tf.Tensor) with shape
                                 [batch_size * self._population_size, ...].
         For example data tensor [[a, b], [c, d]] and a population_size of 2,
         we have the following data_population tensor as output
                                 [[a, b], [a, b], [c, d], [c, d]]
     """
     data_population = tf.tile(tf.expand_dims(
         data, 1), [1, self._population_size] + [1] * len(data.shape[1:]))
     data_population = tf.reshape(data_population,
                                  [-1] + data.shape[1:].as_list())
     return data_population
Beispiel #10
0
def identity_knn_graph_unbatched(points, k):
    """Returns each points as its own neighbor k times.

  Args:
    points: A tf.float32 tensor of [N, D] where D is the point dimensions.
    k: Number of neighbors for each point.

  Returns:
    distances: A tf.float32 tensor of [N, k]. Distances is all zeros since
      each point is returned as its own neighbor.
    indices: A tf.int32 tensor of [N, k]. Each row will contain values that
      are identical to the index of that row.
  """
    num_points = tf.shape(points)[0]
    indices = tf.expand_dims(tf.range(num_points), axis=1)
    indices = tf.tile(indices, [1, k])
    distances = tf.zeros([num_points, k], dtype=tf.float32)
    return distances, indices
Beispiel #11
0
    def _concat_states(self, states, transpose=False):
        """Concatenate all pairs of states in a batch.

    Args:
      states: Tensor, batch of states from which we will concatenate
        batch_size^2 pairs of states.
      transpose: bool, whether to concatenate states in transpose order.

    Returns:
      A batch_size^2 Tensor containing the concatenation of all elements in
        `states`.
    """
        # tiled_states will have shape
        # [batch_size, batch_size, representation_dimension] and will be of the
        # following form (where \phi_1 is the representation of the state of the
        # first batch_element):
        # [ \phi_1 \phi_2 ... \phi_batch_size ]
        # [ \phi_1 \phi_2 ... \phi_batch_size ]
        # ...
        # [ \phi_1 \phi_2 ... \phi_batch_size ]
        batch_size = tf.shape(states)[0]
        tiled_states = tf.tile([states], [batch_size, 1, 1])
        # transpose_tiled_states will have shape
        # [batch_size, batch_size, representation_dimension] and will be of the
        # following form (where \phi_1 is the representation of the state of the
        # first batch_element):
        # [ \phi_1 \phi_1 ... \phi_1 ]
        # [ \phi_2 \phi_2 ... \phi_2 ]
        # ...
        # [ \phi_batch_size \phi_batch_size ... \phi_batch_size ]
        transpose_tiled_states = tf.keras.backend.repeat(states, batch_size)
        # concat_states will be a
        # [batch_size, batch_size, representation_dimension*2] matrix containing the
        # concatenation of all pairs of states in the batch.
        if transpose:
            concat_states = tf.concat([transpose_tiled_states, tiled_states],
                                      2)
        else:
            concat_states = tf.concat([tiled_states, transpose_tiled_states],
                                      2)
        # We return a reshaped matrix which results in a new batch of size
        # batch_size ** 2. Resulting matrix will have shape
        # [batch_size**2, representation_dimension].
        return tf.reshape(concat_states, (batch_size**2, 4))
Beispiel #12
0
def flip_normals_towards_viewpoint(points, normals, viewpoint):
    """Flips the normals to face towards the view point.

  Args:
    points: A tf.float32 tensor of size [N, 3].
    normals: A tf.float32 tensor of size [N, 3].
    viewpoint: A tf.float32 tensor of size [3].

  Returns:
    flipped_normals: A tf.float32 tensor of size [N, 3].
  """
    # (viewpoint - point)
    view_vector = tf.expand_dims(viewpoint, axis=0) - points
    # Dot product between the (viewpoint - point) and the plane normal
    cos_theta = tf.expand_dims(tf.reduce_sum(view_vector * normals, axis=1),
                               axis=1)
    # Revert normals where cos is negative.
    normals *= tf.sign(tf.tile(cos_theta, [1, 3]))
    return normals
Beispiel #13
0
    def _build_op(self):

        x = self.x[:, :, 1:]
        c_x, c_y = self._split()

        r = NeuralEncoder(output_shape=None,
                          scope="encoder")(c_x, c_y, keep_prob=self.keep_prob)

        r = tf.tile(r[:, None, :],
                    (1, self.y_features.get_shape().as_list()[1], 1))
        h = tf.layers.dense(tf.concat([r, self.y_features], axis=2),
                            units=128,
                            activation=tf.nn.tanh)
        h = tf.layers.dense(h, units=1, activation=None)
        r = tf.reduce_mean(h, axis=1)

        self.d = NeuralDecoder(scope="decoder")(r, x, keep_prob=self.keep_prob)

        self.h = self.d.loc
Beispiel #14
0
def identity_knn_graph(points, num_valid_points, k):  # pylint: disable=unused-argument
    """Returns each points as its own neighbor k times.

  Args:
    points: A tf.float32 tensor of size [num_batches, N, D] where D is the point
      dimensions.
    num_valid_points: A tf.int32 tensor of size [num_batches] containing the
      number of valid points in each batch example.
    k: Number of neighbors for each point.

  Returns:
    distances: A tf.float32 tensor of [num_batches, N, k]. Distances is all
      zeros since each point is returned as its own neighbor.
    indices: A tf.int32 tensor of [num_batches, N, k]. Each row will contain
      values that are identical to the index of that row.
  """
    num_batches = points.get_shape()[0]
    num_points = tf.shape(points)[1]
    indices = tf.expand_dims(tf.range(num_points), axis=1)
    indices = tf.tile(tf.expand_dims(indices, axis=0), [num_batches, 1, k])
    distances = tf.zeros([num_batches, num_points, k], dtype=tf.float32)
    return distances, indices
def classification_loss_using_mask_iou(inputs,
                                       outputs,
                                       num_samples,
                                       max_instance_id=None,
                                       similarity_strategy='distance',
                                       is_balanced=True,
                                       is_intermediate=False):
    """Classification loss with an iou threshold.

  Args:
    inputs: A dictionary that contains
      num_valid_voxels - A tf.int32 tensor of size [batch_size].
      instance_ids - A tf.int32 tensor of size [batch_size, n].
      class_labels - A tf.int32 tensor of size [batch_size, n]. It is assumed
        that the background voxels are assigned to class 0.
    outputs: A dictionart that contains
      embeddings - A tf.float32 tensor of size [batch_size, n, f].
      logits - A tf.float32 tensor of size [batch_size, n, num_classes]. It is
        assumed that background is class 0.
    num_samples: An int determining the number of samples.
    max_instance_id: If set, instance ids larger than that value will be
      ignored. If not set, it will be computed from instance_ids tensor.
    similarity_strategy: Defines the method for computing similarity between
                         embedding vectors. Possible values are 'dotproduct' and
                         'distance'.
    is_balanced: If True, the per-voxel losses are re-weighted to have equal
      total weight for foreground vs. background voxels.
    is_intermediate: True if applied to intermediate predictions;
      otherwise, False.

  Returns:
    A tf.float32 scalar loss tensor.
  """
    instance_ids_key = standard_fields.InputDataFields.object_instance_id_voxels
    class_labels_key = standard_fields.InputDataFields.object_class_voxels
    num_voxels_key = standard_fields.InputDataFields.num_valid_voxels
    if is_intermediate:
        embedding_key = (standard_fields.DetectionResultFields.
                         intermediate_instance_embedding_voxels)
        logits_key = (standard_fields.DetectionResultFields.
                      intermediate_object_semantic_voxels)
    else:
        embedding_key = (
            standard_fields.DetectionResultFields.instance_embedding_voxels)
        logits_key = standard_fields.DetectionResultFields.object_semantic_voxels
    if instance_ids_key not in inputs:
        raise ValueError('instance_ids is missing in inputs.')
    if class_labels_key not in inputs:
        raise ValueError('class_labels is missing in inputs.')
    if num_voxels_key not in inputs:
        raise ValueError('num_voxels is missing in inputs.')
    if embedding_key not in outputs:
        raise ValueError('embedding is missing in outputs.')
    if logits_key not in outputs:
        raise ValueError('logits is missing in outputs.')
    batch_size = inputs[num_voxels_key].get_shape().as_list()[0]
    if batch_size is None:
        raise ValueError(
            'batch_size is not defined at graph construction time.')
    num_valid_voxels = inputs[num_voxels_key]
    num_voxels = tf.shape(inputs[instance_ids_key])[1]
    valid_mask = tf.less(
        tf.tile(tf.expand_dims(tf.range(num_voxels), axis=0), [batch_size, 1]),
        tf.expand_dims(num_valid_voxels, axis=1))
    return classification_loss_using_mask_iou_func(
        embeddings=outputs[embedding_key],
        logits=outputs[logits_key],
        instance_ids=tf.reshape(inputs[instance_ids_key], [batch_size, -1]),
        class_labels=inputs[class_labels_key],
        num_samples=num_samples,
        valid_mask=valid_mask,
        max_instance_id=max_instance_id,
        similarity_strategy=similarity_strategy,
        is_balanced=is_balanced)
def prepare_lidar_images_and_correspondences(
    inputs,
    resized_image_height,
    resized_image_width,
    camera_names=('front', 'front_left', 'front_right', 'side_left',
                  'side_right'),
    lidar_names=('top', 'front', 'side_left', 'side_right', 'rear')):
  """Integrates and returns the lidars, cameras and their correspondences.

  Args:
    inputs: A dictionary containing the images and point / pixel
      correspondences.
    resized_image_height: Target height of the images.
    resized_image_width: Target width of the images.
    camera_names: List of cameras to include images from.
    lidar_names: List of lidars to include point clouds from.

  Returns:
    A tf.float32 tensor of size [num_points, 3] containing point positions.
    A tf.float32 tensor of size [num_points, 1] containing point intensities.
    A tf.float32 tensor of size [num_points, 1] containing point elongations.
    A tf.float32 tensor of size [num_points, 3] containing point normals.
    A tf.float32 tensor of size [num_images, resized_image_height,
      resized_image_width, 3].
    A tf.int32 tensor of size [num_images, num_points, 2].

  Raises:
    ValueError: If camera_names or lidar_names are empty lists.
  """
  if not camera_names:
    raise ValueError('camera_names should contain at least one name.')
  if not lidar_names:
    raise ValueError('lidar_names should contain at least one name.')

  (points_position, points_intensity, points_elongation, points_normal,
   points_in_image_frame_yx, points_in_image_frame_id) = _prepare_lidar_points(
       inputs=inputs, lidar_names=lidar_names)

  images = []
  points_in_image_frame = []

  for camera_name in camera_names:
    image_key = ('cameras/%s/image' % camera_name)
    image_height = tf.shape(inputs[image_key])[0]
    image_width = tf.shape(inputs[image_key])[1]
    height_ratio = tf.cast(
        resized_image_height, dtype=tf.float32) / tf.cast(
            image_height, dtype=tf.float32)
    width_ratio = tf.cast(
        resized_image_width, dtype=tf.float32) / tf.cast(
            image_width, dtype=tf.float32)
    if tf.executing_eagerly():
      resize_method = tf.image.ResizeMethod.NEAREST_NEIGHBOR
    else:
      resize_method = tf.image.ResizeMethod.BILINEAR
      if inputs[image_key].dtype in [
          tf.int8, tf.uint8, tf.int16, tf.uint16, tf.int32, tf.int64
      ]:
        resize_method = tf.image.ResizeMethod.NEAREST_NEIGHBOR
    images.append(
        tf.image.resize(
            images=inputs[image_key],
            size=[resized_image_height, resized_image_width],
            method=resize_method,
            antialias=True))
    camera_id = tf.cast(inputs[('cameras/%s/id' % camera_name)], dtype=tf.int32)
    valid_points = tf.equal(points_in_image_frame_id, camera_id)
    valid_points = tf.tile(valid_points, [1, 2])
    point_coords = tf.cast(
        tf.cast(points_in_image_frame_yx, dtype=tf.float32) *
        tf.stack([height_ratio, width_ratio]),
        dtype=tf.int32)
    points_in_image_frame_camera = tf.where(
        valid_points, point_coords, -tf.ones_like(valid_points, dtype=tf.int32))
    points_in_image_frame.append(points_in_image_frame_camera)
  num_images = len(images)
  images = tf.stack(images, axis=0)
  images.set_shape([num_images, resized_image_height, resized_image_width, 3])
  points_in_image_frame = tf.stack(points_in_image_frame, axis=0)
  return {
      'points_position': points_position,
      'points_intensity': points_intensity,
      'points_elongation': points_elongation,
      'points_normal': points_normal,
      'view_images': {'rgb_view': images},
      'view_indices_2d': {'rgb_view': points_in_image_frame}
  }
Beispiel #17
0
def npair_loss(inputs,
               outputs,
               num_samples,
               max_instance_id=None,
               similarity_strategy='distance',
               loss_strategy='softmax',
               is_intermediate=False):
  """N-pair metric learning loss for learning feature embeddings.

  Args:
    inputs: A dictionary that contains
      instance_ids - A tf.int32 tensor of size [batch_size, n].
      valid_mask - A tf.bool tensor of size [batch_size, n] that is True when an
        element is valid and False if it needs to be ignored. By default the
        value is None which means it is not applied.
    outputs: A dictionary that contains
      embeddings - A tf.float32 tensor of size [batch_size, n, f].
    num_samples: An int determinig the number of samples.
    max_instance_id: If set, instance ids larger than that value will be
      ignored. If not set, it will be computed from instance_ids tensor.
    similarity_strategy: Defines the method for computing similarity between
                         embedding vectors. Possible values are 'dotproduct' and
                         'distance'.
    loss_strategy: Defines the type of loss including 'softmax' or 'sigmoid'.
    is_intermediate: True if applied to intermediate predictions;
      otherwise, False.

  Returns:
    A tf.float32 scalar loss tensor.
  """
  instance_ids_key = standard_fields.InputDataFields.object_instance_id_voxels
  num_voxels_key = standard_fields.InputDataFields.num_valid_voxels
  if is_intermediate:
    embedding_key = (
        standard_fields.DetectionResultFields
        .intermediate_instance_embedding_voxels)
  else:
    embedding_key = (
        standard_fields.DetectionResultFields.instance_embedding_voxels)
  if instance_ids_key not in inputs:
    raise ValueError('object_instance_id_voxels is missing in inputs.')
  if num_voxels_key not in inputs:
    raise ValueError('num_voxels is missing in inputs.')
  if embedding_key not in outputs:
    raise ValueError('embedding key is missing in outputs.')
  batch_size = inputs[num_voxels_key].get_shape().as_list()[0]
  if batch_size is None:
    raise ValueError('batch_size is not defined at graph construction time.')
  num_valid_voxels = inputs[num_voxels_key]
  num_voxels = tf.shape(inputs[instance_ids_key])[1]
  valid_mask = tf.less(
      tf.tile(tf.expand_dims(tf.range(num_voxels), axis=0), [batch_size, 1]),
      tf.expand_dims(num_valid_voxels, axis=1))
  return npair_loss_func(
      embeddings=outputs[embedding_key],
      instance_ids=tf.reshape(inputs[instance_ids_key], [batch_size, -1]),
      num_samples=num_samples,
      valid_mask=valid_mask,
      max_instance_id=max_instance_id,
      similarity_strategy=similarity_strategy,
      loss_strategy=loss_strategy)
Beispiel #18
0
def project_distribution(supports, weights, target_support,
                         validate_args=False):
  """Projects a batch of (support, weights) onto target_support.

  Based on equation (7) in (Bellemare et al., 2017):
    https://arxiv.org/abs/1707.06887
  In the rest of the comments we will refer to this equation simply as Eq7.

  This code is not easy to digest, so we will use a running example to clarify
  what is going on, with the following sample inputs:
    * supports =       [[0, 2, 4, 6, 8],
                        [1, 3, 4, 5, 6]]
    * weights =        [[0.1, 0.6, 0.1, 0.1, 0.1],
                        [0.1, 0.2, 0.5, 0.1, 0.1]]
    * target_support = [4, 5, 6, 7, 8]
  In the code below, comments preceded with 'Ex:' will be referencing the above
  values.

  Args:
    supports: Tensor of shape (batch_size, num_dims) defining supports for the
      distribution.
    weights: Tensor of shape (batch_size, num_dims) defining weights on the
      original support points. Although for the CategoricalDQN agent these
      weights are probabilities, it is not required that they are.
    target_support: Tensor of shape (num_dims) defining support of the projected
      distribution. The values must be monotonically increasing. Vmin and Vmax
      will be inferred from the first and last elements of this tensor,
      respectively. The values in this tensor must be equally spaced.
    validate_args: Whether we will verify the contents of the
      target_support parameter.

  Returns:
    A Tensor of shape (batch_size, num_dims) with the projection of a batch of
    (support, weights) onto target_support.

  Raises:
    ValueError: If target_support has no dimensions, or if shapes of supports,
      weights, and target_support are incompatible.
  """
  target_support_deltas = target_support[1:] - target_support[:-1]
  # delta_z = `\Delta z` in Eq7.
  delta_z = target_support_deltas[0]
  validate_deps = []
  supports.shape.assert_is_compatible_with(weights.shape)
  supports[0].shape.assert_is_compatible_with(target_support.shape)
  target_support.shape.assert_has_rank(1)
  if validate_args:
    # Assert that supports and weights have the same shapes.
    validate_deps.append(
        tf.Assert(
            tf.reduce_all(tf.equal(tf.shape(supports), tf.shape(weights))),
            [supports, weights]))
    # Assert that elements of supports and target_support have the same shape.
    validate_deps.append(
        tf.Assert(
            tf.reduce_all(
                tf.equal(tf.shape(supports)[1], tf.shape(target_support))),
            [supports, target_support]))
    # Assert that target_support has a single dimension.
    validate_deps.append(
        tf.Assert(
            tf.equal(tf.size(tf.shape(target_support)), 1), [target_support]))
    # Assert that the target_support is monotonically increasing.
    validate_deps.append(
        tf.Assert(tf.reduce_all(target_support_deltas > 0), [target_support]))
    # Assert that the values in target_support are equally spaced.
    validate_deps.append(
        tf.Assert(
            tf.reduce_all(tf.equal(target_support_deltas, delta_z)),
            [target_support]))

  with tf.control_dependencies(validate_deps):
    # Ex: `v_min, v_max = 4, 8`.
    v_min, v_max = target_support[0], target_support[-1]
    # Ex: `batch_size = 2`.
    batch_size = tf.shape(supports)[0]
    # `N` in Eq7.
    # Ex: `num_dims = 5`.
    num_dims = tf.shape(target_support)[0]
    # clipped_support = `[\hat{T}_{z_j}]^{V_max}_{V_min}` in Eq7.
    # Ex: `clipped_support = [[[ 4.  4.  4.  6.  8.]]
    #                         [[ 4.  4.  4.  5.  6.]]]`.
    clipped_support = tf.clip_by_value(supports, v_min, v_max)[:, None, :]
    # Ex: `tiled_support = [[[[ 4.  4.  4.  6.  8.]
    #                         [ 4.  4.  4.  6.  8.]
    #                         [ 4.  4.  4.  6.  8.]
    #                         [ 4.  4.  4.  6.  8.]
    #                         [ 4.  4.  4.  6.  8.]]
    #                        [[ 4.  4.  4.  5.  6.]
    #                         [ 4.  4.  4.  5.  6.]
    #                         [ 4.  4.  4.  5.  6.]
    #                         [ 4.  4.  4.  5.  6.]
    #                         [ 4.  4.  4.  5.  6.]]]]`.
    tiled_support = tf.tile([clipped_support], [1, 1, num_dims, 1])
    # Ex: `reshaped_target_support = [[[ 4.]
    #                                  [ 5.]
    #                                  [ 6.]
    #                                  [ 7.]
    #                                  [ 8.]]
    #                                 [[ 4.]
    #                                  [ 5.]
    #                                  [ 6.]
    #                                  [ 7.]
    #                                  [ 8.]]]`.
    reshaped_target_support = tf.tile(target_support[:, None], [batch_size, 1])
    reshaped_target_support = tf.reshape(reshaped_target_support,
                                         [batch_size, num_dims, 1])
    # numerator = `|clipped_support - z_i|` in Eq7.
    # Ex: `numerator = [[[[ 0.  0.  0.  2.  4.]
    #                     [ 1.  1.  1.  1.  3.]
    #                     [ 2.  2.  2.  0.  2.]
    #                     [ 3.  3.  3.  1.  1.]
    #                     [ 4.  4.  4.  2.  0.]]
    #                    [[ 0.  0.  0.  1.  2.]
    #                     [ 1.  1.  1.  0.  1.]
    #                     [ 2.  2.  2.  1.  0.]
    #                     [ 3.  3.  3.  2.  1.]
    #                     [ 4.  4.  4.  3.  2.]]]]`.
    numerator = tf.abs(tiled_support - reshaped_target_support)
    quotient = 1 - (numerator / delta_z)
    # clipped_quotient = `[1 - numerator / (\Delta z)]_0^1` in Eq7.
    # Ex: `clipped_quotient = [[[[ 1.  1.  1.  0.  0.]
    #                            [ 0.  0.  0.  0.  0.]
    #                            [ 0.  0.  0.  1.  0.]
    #                            [ 0.  0.  0.  0.  0.]
    #                            [ 0.  0.  0.  0.  1.]]
    #                           [[ 1.  1.  1.  0.  0.]
    #                            [ 0.  0.  0.  1.  0.]
    #                            [ 0.  0.  0.  0.  1.]
    #                            [ 0.  0.  0.  0.  0.]
    #                            [ 0.  0.  0.  0.  0.]]]]`.
    clipped_quotient = tf.clip_by_value(quotient, 0, 1)
    # Ex: `weights = [[ 0.1  0.6  0.1  0.1  0.1]
    #                 [ 0.1  0.2  0.5  0.1  0.1]]`.
    weights = weights[:, None, :]
    # inner_prod = `\sum_{j=0}^{N-1} clipped_quotient * p_j(x', \pi(x'))`
    # in Eq7.
    # Ex: `inner_prod = [[[[ 0.1  0.6  0.1  0.  0. ]
    #                      [ 0.   0.   0.   0.  0. ]
    #                      [ 0.   0.   0.   0.1 0. ]
    #                      [ 0.   0.   0.   0.  0. ]
    #                      [ 0.   0.   0.   0.  0.1]]
    #                     [[ 0.1  0.2  0.5  0.  0. ]
    #                      [ 0.   0.   0.   0.1 0. ]
    #                      [ 0.   0.   0.   0.  0.1]
    #                      [ 0.   0.   0.   0.  0. ]
    #                      [ 0.   0.   0.   0.  0. ]]]]`.
    inner_prod = clipped_quotient * weights
    # Ex: `projection = [[ 0.8 0.0 0.1 0.0 0.1]
    #                    [ 0.8 0.1 0.1 0.0 0.0]]`.
    projection = tf.reduce_sum(inner_prod, 3)
    projection = tf.reshape(projection, [batch_size, num_dims])
    return projection
Beispiel #19
0
  def _build_train_op(self, optimizer):
    """Build the TensorFlow graph used to learn the bisimulation metric.

    Args:
      optimizer: a tf.train optimizer.
    Returns:
      A TensorFlow op to minimize the bisimulation loss.
    """
    self.online_network = tf.make_template('Online',
                                           self._network_template)
    self.target_network = tf.make_template('Target',
                                           self._network_template)
    self.s1_ph = tf.placeholder(tf.float64, (self.batch_size, 2),
                                name='s1_ph')
    self.s2_ph = tf.placeholder(tf.float64, (self.batch_size, 2),
                                name='s2_ph')
    self.s1_online_distances = self.online_network(
        self._concat_states(self.s1_ph))
    self.s1_target_distances = self.target_network(
        self._concat_states(self.s1_ph))
    self.s2_target_distances = self.target_network(
        self._concat_states(self.s2_ph))
    self.action_ph = tf.placeholder(tf.int32, (self.batch_size,))
    self.rewards_ph = tf.placeholder(tf.float64, (self.batch_size,))
    # We use an expanding horizon for computing the distances.
    self.bisim_horizon_ph = tf.placeholder(tf.float64, ())
    # bisimulation_target_1 = rew_diff + gamma * next_distance.
    bisimulation_target_1 = tf.stop_gradient(self._build_bisimulation_target())
    # bisimulation_target_2 = curr_distance.
    bisimulation_target_2 = tf.stop_gradient(self.s1_target_distances)
    # We slowly taper in the maximum according to the bisim horizon.
    bisimulation_target = tf.maximum(
        bisimulation_target_1, bisimulation_target_2 * self.bisim_horizon_ph)
    # We zero-out diagonal entries, since those are estimating the distance
    # between a state and itself, which we know to be 0.
    diagonal_mask = 1.0 - tf.diag(tf.ones(self.batch_size, dtype=tf.float64))
    diagonal_mask = tf.reshape(diagonal_mask, (self.batch_size**2, 1))
    bisimulation_target *= diagonal_mask
    bisimulation_estimate = self.s1_online_distances
    # We start with a mask that includes everything.
    loss_mask = tf.ones(tf.shape(bisimulation_estimate))
    # We have to enforce that states being compared are done only using the same
    # action.
    indicators = self.action_ph
    indicators = tf.cast(indicators, tf.float64)
    # indicators will initially have shape [batch_size], we first tile it:
    square_ids = tf.tile([indicators], [self.batch_size, 1])
    # We subtract square_ids from its transpose:
    square_ids = square_ids - tf.transpose(square_ids)
    # At this point all zero-entries are the ones with equal IDs.
    # Now we would like to convert the zeros in this matrix to 1s, and make
    # everything else a 0. We can do this with the following operation:
    loss_mask = 1 - tf.abs(tf.sign(square_ids))
    # Now reshape to match the shapes of the estimate and target.
    loss_mask = tf.reshape(loss_mask, (self.batch_size**2, 1))
    larger_targets = bisimulation_target - bisimulation_estimate
    larger_targets_count = tf.reduce_sum(
        tf.cast(larger_targets > 0., tf.float64))
    tf.summary.scalar('Learning/LargerTargets', larger_targets_count)
    tf.summary.scalar('Learning/NumUpdates', tf.count_nonzero(loss_mask))
    tf.summary.scalar('Learning/BisimHorizon', self.bisim_horizon_ph)
    bisimulation_loss = tf.losses.mean_squared_error(
        bisimulation_target,
        bisimulation_estimate,
        weights=loss_mask)
    tf.summary.scalar('Learning/loss', bisimulation_loss)
    # Plot average distance between sampled representations.
    average_distance = tf.reduce_mean(bisimulation_estimate)
    tf.summary.scalar('Approx/AverageDistance', average_distance)
    return optimizer.minimize(bisimulation_loss)
Beispiel #20
0
 def __call__(self, shape=None, dtype=None, partition_info=None):
     out = tf.matrix_solve_ls(self.K + self.reg * tf.eye(self.K.get_shape()[-1].value), self.d)
     out = tf.tile(out, (1, 1, shape[1]))
     return tf.reshape(out, shape)