Esempio n. 1
0
def pick_labeled_image(mesh_inputs, view_image_inputs, view_indices_2d_inputs,
                       view_name):
    """Pick the image with most number of labeled points projecting to it."""
    if view_name not in view_image_inputs:
        return
    if view_name not in view_indices_2d_inputs:
        return
    if standard_fields.InputDataFields.point_loss_weights not in mesh_inputs:
        raise ValueError('The key `weights` is missing from mesh_inputs.')
    height = tf.shape(view_image_inputs[view_name])[1]
    width = tf.shape(view_image_inputs[view_name])[2]
    valid_points_y = tf.logical_and(
        tf.greater_equal(view_indices_2d_inputs[view_name][:, :, 0], 0),
        tf.less(view_indices_2d_inputs[view_name][:, :, 0], height))
    valid_points_x = tf.logical_and(
        tf.greater_equal(view_indices_2d_inputs[view_name][:, :, 1], 0),
        tf.less(view_indices_2d_inputs[view_name][:, :, 1], width))
    valid_points = tf.logical_and(valid_points_y, valid_points_x)
    image_total_weights = tf.reduce_sum(
        tf.cast(valid_points, dtype=tf.float32) * tf.squeeze(
            mesh_inputs[standard_fields.InputDataFields.point_loss_weights],
            axis=1),
        axis=1)
    image_total_weights = tf.cond(
        tf.equal(tf.reduce_sum(image_total_weights), 0),
        lambda: tf.reduce_sum(tf.cast(valid_points, dtype=tf.float32), axis=1),
        lambda: image_total_weights)
    best_image = tf.math.argmax(image_total_weights)
    view_image_inputs[view_name] = view_image_inputs[view_name][
        best_image:best_image + 1, :, :, :]
    view_indices_2d_inputs[view_name] = view_indices_2d_inputs[view_name][
        best_image:best_image + 1, :, :]
def _positions_center_origin(height, width):
  """Returns image coordinates where the origin at the image center."""
  h = tf.range(0.0, height, 1)
  w = tf.range(0.0, width, 1)
  center_h = tf.cast(height, tf.float32) / 2.0 - 0.5
  center_w = tf.cast(width, tf.float32) / 2.0 - 0.5
  return tf.stack(tf.meshgrid(h - center_h, w - center_w, indexing='ij'), -1)
Esempio n. 3
0
  def update_state(self, inputs, outputs):
    """Function that updates the metric state at each example.

    Args:
      inputs: A dictionary containing input tensors.
      outputs: A dictionary containing output tensors.

    Returns:
      Update op.
    """
    detections_score = tf.reshape(
        outputs[standard_fields.DetectionResultFields.objects_score], [-1])
    detections_class = tf.reshape(
        outputs[standard_fields.DetectionResultFields.objects_class], [-1])
    num_detections = tf.shape(detections_score)[0]
    detections_instance_mask = tf.reshape(
        outputs[
            standard_fields.DetectionResultFields.instance_segments_voxel_mask],
        [num_detections, -1])
    gt_class = tf.reshape(inputs[standard_fields.InputDataFields.objects_class],
                          [-1])
    num_gt = tf.shape(gt_class)[0]
    gt_voxel_instance_ids = tf.reshape(
        inputs[standard_fields.InputDataFields.object_instance_id_voxels], [-1])
    gt_instance_masks = tf.transpose(
        tf.one_hot(gt_voxel_instance_ids - 1, depth=num_gt, dtype=tf.float32))
    for c in self.class_range:
      gt_mask_c = tf.equal(gt_class, c)
      num_gt_c = tf.math.reduce_sum(tf.cast(gt_mask_c, dtype=tf.int32))
      gt_instance_masks_c = tf.boolean_mask(gt_instance_masks, gt_mask_c)
      detections_mask_c = tf.equal(detections_class, c)
      num_detections_c = tf.math.reduce_sum(
          tf.cast(detections_mask_c, dtype=tf.int32))
      if num_detections_c == 0:
        continue
      det_scores_c = tf.boolean_mask(detections_score, detections_mask_c)
      det_instance_mask_c = tf.boolean_mask(detections_instance_mask,
                                            detections_mask_c)
      det_scores_c, sorted_indices = tf.math.top_k(
          det_scores_c, k=num_detections_c)
      det_instance_mask_c = tf.gather(det_instance_mask_c, sorted_indices)
      tp_c = tf.zeros([num_detections_c], dtype=tf.int32)
      if num_gt_c > 0:
        ious_c = instance_segmentation_utils.points_mask_iou(
            masks1=gt_instance_masks_c, masks2=det_instance_mask_c)
        max_overlap_gt_ids = tf.cast(
            tf.math.argmax(ious_c, axis=0), dtype=tf.int32)
        is_gt_box_detected = tf.zeros([num_gt_c], dtype=tf.int32)
        for i in tf.range(num_detections_c):
          gt_id = max_overlap_gt_ids[i]
          if (ious_c[gt_id, i] > self.iou_threshold and
              is_gt_box_detected[gt_id] == 0):
            tp_c = tf.maximum(
                tf.one_hot(i, num_detections_c, dtype=tf.int32), tp_c)
            is_gt_box_detected = tf.maximum(
                tf.one_hot(gt_id, num_gt_c, dtype=tf.int32), is_gt_box_detected)
      self.tp[c] = tf.concat([self.tp[c], tp_c], axis=0)
      self.scores[c] = tf.concat([self.scores[c], det_scores_c], axis=0)
      self.num_gt[c] += num_gt_c
    return tf.no_op()
Esempio n. 4
0
def change_intensity_range(intensities,
                           threshold=2.5,
                           normalization_factor1=2500.0,
                           normalization_factor2=12.0):
    """Changes the range of intensity values.

  Args:
    intensities: A tensor containing intensity values. It is assumed it has a
      range of 0 to around 65000.
    threshold: A parameter used for re-ranging intensity values.
    normalization_factor1: A parameter used for re-ranging intensity values.
    normalization_factor2: A parameter used for re-ranging intensity values.

  Returns:
    Tensor with re-ranged intensity values.
  """
    intensities = tf.cast(intensities, dtype=tf.float32)
    intensities_large_mask = tf.cast(tf.greater(intensities, threshold),
                                     dtype=tf.float32)
    intensities_small = intensities * (1.0 - intensities_large_mask)
    intensities_large = ((threshold +
                          (intensities - threshold) / normalization_factor2) *
                         intensities_large_mask)
    return (
        (intensities_small + intensities_large) / normalization_factor1) - 1.0
Esempio n. 5
0
def _get_random_scaled_resolution(orig_height, orig_width, min_scale,
                                  max_scale, max_strech, probability_strech):
    """Computes a new random resolution."""
    # Choose a random scale factor and compute new resolution.
    scale = 2**tf.random.uniform([],
                                 minval=min_scale,
                                 maxval=max_scale,
                                 dtype=tf.float32)
    scale_height = scale
    scale_width = scale

    # Possibly change scale values individually to perform strech
    def true_fn(scale_height, scale_width):
        scale_height *= 2**tf.random.uniform([], -max_strech, max_strech)
        scale_width *= 2**tf.random.uniform([], -max_strech, max_strech)
        return tf.stack((scale_height, scale_width), axis=0)

    def false_fn(scale_height, scale_width):
        return tf.stack((scale_height, scale_width), axis=0)

    perform_strech = tf.random.uniform([]) < probability_strech
    scales = tf.cond(perform_strech,
                     lambda: true_fn(scale_height, scale_width),
                     lambda: false_fn(scale_height, scale_width))
    scale_height = scales[0]
    scale_width = scales[1]

    # Compute scaled image resolution.
    new_height = tf.cast(
        tf.math.ceil(tf.cast(orig_height, tf.float32) * scale_height),
        tf.int32)
    new_width = tf.cast(
        tf.math.ceil(tf.cast(orig_width, tf.float32) * scale_width), tf.int32)
    return new_height, new_width, scale
def random_rotation(images,
                    flow=None,
                    mask=None,
                    max_rotation=10,
                    not_empty_crop=True):
    """Performs a random rotation with the specified maximum rotation."""

    angle_radian = tf.random.uniform(
        [], minval=-max_rotation, maxval=max_rotation,
        dtype=tf.float32) * pi / 180.0
    images = rotate(images, angle_radian, is_flow=False, mask=None)

    if not_empty_crop:
        orig_height = tf.shape(images)[-3]
        orig_width = tf.shape(images)[-2]
        # introduce abbreviations for shorter notation
        cos = tf.math.cos(angle_radian % pi)
        sin = tf.math.sin(angle_radian % pi)
        h = tf.cast(orig_height, tf.float32)
        w = tf.cast(orig_width, tf.float32)

        # compute required scale factor
        scale = tf.cond(
            tf.math.less(angle_radian % pi, pi / 2.0), lambda: tf.math.maximum(
                (w / h) * sin + cos, (h / w) * sin + cos),
            lambda: tf.math.maximum((w / h) * sin - cos, (h / w) * sin - cos))
        new_height = tf.math.floor(h / scale)
        new_width = tf.math.floor(w / scale)

        # crop image again to original size
        offset_height = tf.cast((h - new_height) / 2, tf.int32)
        offset_width = tf.cast((w - new_width) / 2, tf.int32)
        images = tf.image.crop_to_bounding_box(
            images,
            offset_height=offset_height,
            offset_width=offset_width,
            target_height=tf.cast(new_height, tf.int32),
            target_width=tf.cast(new_width, tf.int32))

    if flow is not None:
        flow, mask = rotate(flow, angle_radian, is_flow=True, mask=mask)

        if not_empty_crop:
            # crop flow and mask again to original size
            flow = tf.image.crop_to_bounding_box(
                flow,
                offset_height=offset_height,
                offset_width=offset_width,
                target_height=tf.cast(new_height, tf.int32),
                target_width=tf.cast(new_width, tf.int32))
            mask = tf.image.crop_to_bounding_box(
                mask,
                offset_height=offset_height,
                offset_width=offset_width,
                target_height=tf.cast(new_height, tf.int32),
                target_width=tf.cast(new_width, tf.int32))
    return images, flow, mask
Esempio n. 7
0
def pointcloud_to_voxel_grid(points,
                             features,
                             grid_cell_size,
                             start_location,
                             end_location,
                             segment_func=tf.math.unsorted_segment_mean):
  """Converts a pointcloud into a voxel grid.

  Args:
    points: A tf.float32 tensor of size [N, 3].
    features: A tf.float32 tensor of size [N, F].
    grid_cell_size: A tf.float32 tensor of size [3].
    start_location: A tf.float32 tensor of size [3].
    end_location: A tf.float32 tensor of size [3].
    segment_func: A tensorflow function that operates on segments. Expect one
      of tf.math.unsorted_segment_{min/max/mean/prod/sum}. Defaults to
      tf.math.unsorted_segment_mean

  Returns:
    voxel_features: A tf.float32 tensor of
      size [grid_x_len, grid_y_len, grid_z_len, F].
    segment_ids: A tf.int32 tensor of IDs for each point indicating
      which (flattened) voxel cell its data was mapped to.
    point_indices: A tf.int32 tensor of size [num_points, 3] containing the
      location of each point in the 3d voxel grid.
  """
  grid_cell_size = tf.convert_to_tensor(grid_cell_size, dtype=tf.float32)
  start_location = tf.convert_to_tensor(start_location, dtype=tf.float32)
  end_location = tf.convert_to_tensor(end_location, dtype=tf.float32)
  point_indices = tf.cast(
      (points - tf.expand_dims(start_location, axis=0)) /
      tf.expand_dims(grid_cell_size, axis=0),
      dtype=tf.int32)
  grid_size = tf.cast(
      tf.math.ceil((end_location - start_location) / grid_cell_size),
      dtype=tf.int32)
  # Note: all points outside the grid are added to the edges
  # Cap index at grid_size - 1 (so a 10x10x10 grid's max cell is (9,9,9))
  point_indices = tf.minimum(point_indices, tf.expand_dims(grid_size - 1,
                                                           axis=0))
  # Don't allow any points below index (0, 0, 0)
  point_indices = tf.maximum(point_indices, 0)
  segment_ids = tf.reduce_sum(
      point_indices * tf.stack(
          [grid_size[1] * grid_size[2], grid_size[2], 1], axis=0),
      axis=1)
  voxel_features = segment_func(
      data=features,
      segment_ids=segment_ids,
      num_segments=(grid_size[0] * grid_size[1] * grid_size[2]))
  return (tf.reshape(voxel_features,
                     [grid_size[0],
                      grid_size[1],
                      grid_size[2],
                      features.get_shape().as_list()[1]]),
          segment_ids,
          point_indices)
def classification_loss_using_mask_iou_func_unbatched(
        embeddings, instance_ids, sampled_embeddings, sampled_instance_ids,
        sampled_class_labels, sampled_logits, similarity_strategy,
        is_balanced):
    """Classification loss using mask iou.

  Args:
    embeddings: A tf.float32 tensor of size [n, f].
    instance_ids: A tf.int32 tensor of size [n].
    sampled_embeddings: A tf.float32 tensor of size [num_samples, f].
    sampled_instance_ids: A tf.int32 tensor of size [num_samples].
    sampled_class_labels: A tf.int32 tensor of size [num_samples, 1].
    sampled_logits: A tf.float32 tensor of size [num_samples, num_classes].
    similarity_strategy: Defines the method for computing similarity between
                         embedding vectors. Possible values are 'dotproduct' and
                         'distance'.
    is_balanced: If True, the per-voxel losses are re-weighted to have equal
      total weight for foreground vs. background voxels.

  Returns:
    A tf.float32 loss scalar tensor.
  """
    predicted_soft_masks = metric_learning_utils.embedding_centers_to_soft_masks(
        embedding=embeddings,
        centers=sampled_embeddings,
        similarity_strategy=similarity_strategy)
    predicted_masks = tf.cast(tf.greater(predicted_soft_masks, 0.5),
                              dtype=tf.float32)
    gt_masks = tf.cast(tf.equal(tf.expand_dims(sampled_instance_ids, axis=1),
                                tf.expand_dims(instance_ids, axis=0)),
                       dtype=tf.float32)
    pairwise_iou = instance_segmentation_utils.points_mask_pairwise_iou(
        masks1=predicted_masks, masks2=gt_masks)
    num_classes = sampled_logits.get_shape().as_list()[1]
    sampled_class_labels_one_hot = tf.one_hot(indices=tf.reshape(
        sampled_class_labels, [-1]),
                                              depth=num_classes)
    sampled_class_labels_one_hot_fg = sampled_class_labels_one_hot[:, 1:]
    iou_coefs = tf.tile(tf.reshape(pairwise_iou, [-1, 1]),
                        [1, num_classes - 1])
    sampled_class_labels_one_hot_fg *= iou_coefs
    sampled_class_labels_one_hot_bg = tf.maximum(
        1.0 - tf.math.reduce_sum(
            sampled_class_labels_one_hot_fg, axis=1, keepdims=True), 0.0)
    sampled_class_labels_one_hot = tf.concat(
        [sampled_class_labels_one_hot_bg, sampled_class_labels_one_hot_fg],
        axis=1)
    params = {}
    if is_balanced:
        weights = loss_utils.get_balanced_loss_weights_multiclass(
            labels=tf.expand_dims(sampled_instance_ids, axis=1))
        params['weights'] = weights
    return classification_loss_fn(logits=sampled_logits,
                                  labels=sampled_class_labels_one_hot,
                                  **params)
def _prepare_lidar_points(inputs, lidar_names):
    """Integrates and returns the lidar points in vehicle coordinate frame."""
    points_position = []
    points_intensity = []
    points_elongation = []
    points_normal = []
    points_in_image_frame_xy = []
    points_in_image_frame_id = []
    for lidar_name in lidar_names:
        lidar_location = tf.reshape(
            inputs[('lidars/%s/extrinsics/t') % lidar_name], [-1, 3])
        inside_no_label_zone = tf.reshape(
            inputs[('lidars/%s/pointcloud/inside_nlz' % lidar_name)], [-1])
        valid_points_mask = tf.math.logical_not(inside_no_label_zone)
        points_position_current_lidar = tf.boolean_mask(
            inputs[('lidars/%s/pointcloud/positions' % lidar_name)],
            valid_points_mask)
        points_position.append(points_position_current_lidar)
        points_intensity.append(
            tf.boolean_mask(
                inputs[('lidars/%s/pointcloud/intensity' % lidar_name)],
                valid_points_mask))
        points_elongation.append(
            tf.boolean_mask(
                inputs[('lidars/%s/pointcloud/elongation' % lidar_name)],
                valid_points_mask))
        points_to_lidar_vectors = lidar_location - points_position_current_lidar
        points_normal_direction = points_to_lidar_vectors / tf.expand_dims(
            tf.norm(points_to_lidar_vectors, axis=1), axis=1)
        points_normal.append(points_normal_direction)
        points_in_image_frame_xy.append(
            tf.boolean_mask(
                inputs['lidars/%s/camera_projections/positions' % lidar_name],
                valid_points_mask))
        points_in_image_frame_id.append(
            tf.boolean_mask(
                inputs['lidars/%s/camera_projections/ids' % lidar_name],
                valid_points_mask))
    points_position = tf.concat(points_position, axis=0)
    points_intensity = tf.concat(points_intensity, axis=0)
    points_elongation = tf.concat(points_elongation, axis=0)
    points_normal = tf.concat(points_normal, axis=0)
    points_in_image_frame_xy = tf.concat(points_in_image_frame_xy, axis=0)
    points_in_image_frame_id = tf.cast(tf.concat(points_in_image_frame_id,
                                                 axis=0),
                                       dtype=tf.int32)
    points_in_image_frame_yx = tf.cast(tf.reverse(points_in_image_frame_xy,
                                                  axis=[-1]),
                                       dtype=tf.int32)

    return (points_position, points_intensity, points_elongation,
            points_normal, points_in_image_frame_yx, points_in_image_frame_id)
Esempio n. 10
0
    def convert_to_simclr_episode(support_images=None,
                                  support_labels=None,
                                  support_class_ids=None,
                                  query_images=None,
                                  query_labels=None,
                                  query_class_ids=None):
        """Convert a single episode into a SimCLR Episode."""

        # If there were k query examples of class c, keep the first k support
        # examples of class c as 'simclr' queries.  We do this by assigning an
        # id for each image in the query set, implemented as label*1e5+x+1, where
        # x is the number of images of the same label with a lower index within
        # the query set.  We do the same for the support set, which gives us a
        # mapping between query and support images which is injective (as long
        # as there's enough support-set images of each class).
        #
        # note: assumes max support label is 10000 - max_images_per_class
        query_idx_within_class = tf.cast(
            tf.equal(query_labels[tf.newaxis, :], query_labels[:, tf.newaxis]),
            tf.int32)
        query_idx_within_class = tf.linalg.diag_part(
            tf.cumsum(query_idx_within_class, axis=1))
        query_uid = query_labels * 10000 + query_idx_within_class
        support_idx_within_class = tf.cast(
            tf.equal(support_labels[tf.newaxis, :],
                     support_labels[:, tf.newaxis]), tf.int32)
        support_idx_within_class = tf.linalg.diag_part(
            tf.cumsum(support_idx_within_class, axis=1))
        support_uid = support_labels * 10000 + support_idx_within_class

        # compute which support-set images have matches in the query set, and
        # discard the rest to produce the new query set.
        support_keep = tf.reduce_any(tf.equal(support_uid[:, tf.newaxis],
                                              query_uid[tf.newaxis, :]),
                                     axis=1)
        query_images = tf.boolean_mask(support_images, support_keep)

        support_labels = tf.range(tf.shape(support_labels)[0],
                                  dtype=support_labels.dtype)
        query_labels = tf.boolean_mask(support_labels, support_keep)
        query_class_ids = tf.boolean_mask(support_class_ids, support_keep)

        # Finally, apply SimCLR augmentation to all images.
        # Note simclr only blurs one image.
        query_images = simclr_augment(query_images, blur=True)
        support_images = simclr_augment(support_images)

        return (support_images, support_labels, support_class_ids,
                query_images, query_labels, query_class_ids)
Esempio n. 11
0
def learning_rate_schedule_noam(train_steps,
                                warmup_steps=10000,
                                linear_decay_fraction=0.1,
                                multiplier=1.0):
    """Noam's favorite learning-rate schedule.

  (rsqrt(max(step_num, warmup_steps))
   * multiplier
   * min(1.0, (train_steps-step_num)/(train_steps*linear_decay_fraction)))

  Args:
    train_steps: a number
    warmup_steps: a number
    linear_decay_fraction: a number
    multiplier: a number
  Returns:
    a tf.scalar
  """
    train_steps = float(train_steps)
    step_num = tf.cast(tf.get_global_step(), tf.float32)
    learning_rate = tf.math.rsqrt(tf.maximum(step_num, warmup_steps))
    learning_rate *= multiplier
    if linear_decay_fraction > 0:
        learning_rate *= tf.minimum(1.0, (train_steps - step_num) /
                                    (train_steps * linear_decay_fraction))
    return learning_rate
Esempio n. 12
0
    def _compute_prototype_loss(self,
                                embeddings,
                                labels,
                                labels_one_hot,
                                prototypes=None):
        """Computes the loss and accuracy on an episode."""
        labels_dense = labels
        if prototypes is None:
            # Compute protos.
            labels = tf.cast(labels_one_hot, tf.float32)
            # [num examples, 1, embedding size].
            embeddings_ = tf.expand_dims(embeddings, 1)
            # [num examples, num classes, 1].
            labels = tf.expand_dims(labels, 2)
            # Sums each class' embeddings. [num classes, embedding size].
            class_sums = tf.reduce_sum(labels * embeddings_, 0)
            # The prototype of each class is the averaged embedding of its examples.
            class_num_images = tf.reduce_sum(labels, 0)  # [way].
            prototypes = class_sums / class_num_images  # [way, embedding size].

        # Compute logits.
        embeddings = tf.nn.l2_normalize(embeddings, 1, epsilon=1e-3)
        prototypes = tf.nn.l2_normalize(prototypes, 1, epsilon=1e-3)
        logits = tf.matmul(embeddings, prototypes, transpose_b=True)

        loss = self.compute_loss(labels_one_hot, logits)
        acc = tf.reduce_mean(self.compute_accuracy(labels_dense, logits))
        return loss, acc, prototypes, logits
def dqn_template(state, num_actions, layer_size=512, num_layers=1):
    r"""Builds a DQN Network mapping states to Q-values.

  Args:
    state: A `tf.placeholder` for the RL state.
    num_actions: int, number of actions that the RL agent can take.
    layer_size: int, number of hidden units per layer.
    num_layers: int, Number of hidden layers.

  Returns:
    net: A `tf.Graphdef` for DQN:
      `\theta : \mathcal{X}\rightarrow\mathbb{R}^{|\mathcal{A}|}`
  """
    weights_initializer = slim.variance_scaling_initializer(factor=1.0 /
                                                            np.sqrt(3.0),
                                                            mode='FAN_IN',
                                                            uniform=True)

    net = tf.cast(state, tf.float32)
    net = tf.squeeze(net, axis=2)
    for _ in range(num_layers):
        net = slim.fully_connected(net, layer_size, activation_fn=tf.nn.relu)
    net = slim.fully_connected(net,
                               num_actions,
                               activation_fn=None,
                               weights_initializer=weights_initializer)
    return net
Esempio n. 14
0
  def _build_target_distribution(self):
    self._reshape_networks()
    batch_size = tf.shape(self._replay.rewards)[0]
    # size of rewards: batch_size x 1
    rewards = self._replay.rewards[:, None]
    # size of tiled_support: batch_size x num_atoms
    tiled_support = tf.tile(self.support, [batch_size])
    tiled_support = tf.reshape(tiled_support, [batch_size, self.num_atoms])
    # size of target_support: batch_size x num_atoms

    is_terminal_multiplier = 1. - tf.cast(self._replay.terminals, tf.float32)
    # Incorporate terminal state to discount factor.
    # size of gamma_with_terminal: batch_size x 1
    gamma_with_terminal = self.cumulative_gamma * is_terminal_multiplier
    gamma_with_terminal = gamma_with_terminal[:, None]

    target_support = rewards + gamma_with_terminal * tiled_support
    # size of next_probabilities: batch_size  x num_actions x num_atoms
    next_probabilities = tf.contrib.layers.softmax(
        self._replay_next_logits)

    # size of next_qt: 1 x num_actions
    next_qt = tf.reduce_sum(self.support * next_probabilities, 2)
    # size of next_qt_argmax: 1 x batch_size
    next_qt_argmax = tf.argmax(
        next_qt + self._replay.next_legal_actions, axis=1)[:, None]
    batch_indices = tf.range(tf.to_int64(batch_size))[:, None]
    # size of next_qt_argmax: batch_size x 2
    next_qt_argmax = tf.concat([batch_indices, next_qt_argmax], axis=1)
    # size of next_probabilities: batch_size x num_atoms
    next_probabilities = tf.gather_nd(next_probabilities, next_qt_argmax)
    return project_distribution(target_support, next_probabilities,
                                self.support)
Esempio n. 15
0
def rainbow_template(state,
                     num_actions,
                     num_atoms=51,
                     layer_size=512,
                     num_layers=2): # FIXME: Aron 3/14/19: changed from 1 to 2
  r"""Builds a Rainbow Network mapping states to value distributions.

  Args:
    state: A `tf.placeholder` for the RL state.
    num_actions: int, number of actions that the RL agent can take.
    num_atoms: int, number of atoms to approximate the distribution with.
    layer_size: int, number of hidden units per layer.
    num_layers: int, number of hidden layers.

  Returns:
    net: A `tf.Graphdef` for Rainbow:
      `\theta : \mathcal{X}\rightarrow\mathbb{R}^{|\mathcal{A}| \times N}`,
      where `N` is num_atoms.
  """
  weights_initializer = slim.variance_scaling_initializer(
      factor=1.0 / np.sqrt(3.0), mode='FAN_IN', uniform=True)

  net = tf.cast(state, tf.float32)
  net = tf.squeeze(net, axis=2)

  for _ in range(num_layers):
    net = slim.fully_connected(net, layer_size,
                               activation_fn=tf.nn.relu)
  net = slim.fully_connected(net, num_actions * num_atoms, activation_fn=None,
                             weights_initializer=weights_initializer)
  net = tf.reshape(net, [-1, num_actions, num_atoms])
  return net
Esempio n. 16
0
    def train_complete(self,
                       tape: tf.GradientTape,
                       training_info: TrainingInfo,
                       weight=1.0):
        """Complete one iteration of training.

        `train_complete` should calculate gradients and update parameters using
        those gradients.

        Args:
            tape (tf.GradientTape): the tape which are used for calculating
                gradient. All the previous `train_interval` `train_step()` for
                are called under the context of this tape.
            training_info (TrainingInfo): information collected for training.
                training_info.info are the batched from each policy_step.info
                returned by train_step()
            weight (float): weight for this batch. Loss will be multiplied with
                this weight before calculating gradient
        Returns:
            a tuple of the following:
            loss_info (LossInfo): loss information
            grads_and_vars (list[tuple]): list of gradient and variable tuples
        """
        valid_masks = tf.cast(
            tf.not_equal(training_info.step_type, StepType.LAST), tf.float32)

        return super().train_complete(tape, training_info, valid_masks, weight)
Esempio n. 17
0
  def compute_class_distances(self, support_embeddings, onehot_support_labels,
                              query_embeddings):
    """Returns the weighted distance of each query to each support example.

    Args:
      support_embeddings: Tensor of examples of shape [num_examples,
        embedding_dim] or [num_examples, spatial_dim, spatial_dim, num_filters].
      onehot_support_labels: Tensor of targets of shape [num_examples,
        num_classes].
      query_embeddings: Tensor of examples of shape [num_examples,
        embedding_dim] or [num_examples, spatial_dim, spatial_dim, num_filters].

    Returns:
      Class log-probabilities computed as a weighted sum of one-hot encoded
      training labels. Weights for individual support-query pairs of examples
      are proportional to the distance between the embeddings of the two
      examples.
    """
    # [num_query_images, num_support_images]
    similarities = 1 - self.distance_metric(query_embeddings,
                                            support_embeddings)
    attention = tf.nn.softmax(similarities)

    # [num_query_images, way]
    probs = tf.matmul(attention,
                      tf.cast(onehot_support_labels, dtype=tf.float32))
    return tf.math.log(probs)
Esempio n. 18
0
 def joint_log_likelihood(self, onehot_labels, log_probs):
     """Compute p(z, y)."""
     labels = tf.cast(tf.reduce_sum(input_tensor=onehot_labels, axis=0),
                      dtype=tf.float32)
     class_log_probs = tf.math.log(labels /
                                   tf.reduce_sum(input_tensor=labels))
     return log_probs + tf.expand_dims(class_log_probs, 0)
Esempio n. 19
0
def _compute_prototypes(embeddings, labels):
    """Computes class prototypes over the last dimension of embeddings.

  Args:
    embeddings: Tensor of examples of shape [num_examples, embedding_size].
    labels: Tensor of one-hot encoded labels of shape [num_examples,
      num_classes].

  Returns:
    prototypes: Tensor of class prototypes of shape [num_classes,
    embedding_size].
  """
    labels = tf.cast(labels, tf.float32)

    # [num examples, 1, embedding size].
    embeddings = tf.expand_dims(embeddings, 1)

    # [num examples, num classes, 1].
    labels = tf.expand_dims(labels, 2)

    # Sums each class' embeddings. [num classes, embedding size].
    class_sums = tf.reduce_sum(labels * embeddings, 0)

    # The prototype of each class is the averaged embedding of its examples.
    class_num_images = tf.reduce_sum(labels, 0)  # [way].
    prototypes = class_sums / class_num_images

    return prototypes
Esempio n. 20
0
  def compute_logits(self, support_embeddings, query_embeddings,
                     onehot_support_labels):
    """Computes the class logits.

    Probabilities are computed as a weighted sum of one-hot encoded training
    labels. Weights for individual support/query pairs of examples are
    proportional to the (potentially semi-normalized) cosine distance between
    the embeddings of the two examples.

    Args:
      support_embeddings: A Tensor of size [num_support_images, embedding dim].
      query_embeddings: A Tensor of size [num_query_images, embedding dim].
      onehot_support_labels: A Tensor of size [batch size, way].

    Returns:
      The query set logits as a [num_query_images, way] matrix.
    """
    # Undocumented in the paper, but *very important*: *only* the support set
    # embeddings is L2-normalized, which means that the distance is not exactly
    # a cosine distance. For comparison we also allow for the actual cosine
    # distance to be computed, which is controlled with the
    # `exact_cosine_distance` instance attribute.
    support_embeddings = tf.nn.l2_normalize(support_embeddings, 1, epsilon=1e-3)
    if self.exact_cosine_distance:
      query_embeddings = tf.nn.l2_normalize(query_embeddings, 1, epsilon=1e-3)
    # [num_query_images, num_support_images]
    similarities = tf.matmul(
        query_embeddings, support_embeddings, transpose_b=True)
    attention = tf.nn.softmax(similarities)

    # [num_query_images, way]
    probs = tf.matmul(attention, tf.cast(onehot_support_labels, tf.float32))
    return tf.log(probs)
Esempio n. 21
0
def fit_gaussian(embeddings, damping=1e-7, full_covariance=False):
  """Fits a unimodal Gaussian distribution to `embeddings`.

  Args:
    embeddings: A [batch_size, embedding_dim] tf.Tensor of embeddings.
    damping: The scale of the covariance damping coefficient.
    full_covariance: Whether to use a full or diagonal covariance.

  Returns:
    Parameter estimates (means and log variances) for a Gaussian model.
  """
  if full_covariance:
    num, dim = tf.split(tf.shape(input=embeddings), num_or_size_splits=2)
    num, dim = tf.squeeze(num), tf.squeeze(dim)
    sample_mean = tf.reduce_mean(input_tensor=embeddings, axis=0)
    centered_embeddings = embeddings - sample_mean
    sample_covariance = tf.einsum('ij,ik->kj', centered_embeddings,
                                  centered_embeddings)  # Outer product.
    sample_covariance += damping * tf.eye(dim)  # Positive definiteness.
    sample_covariance /= tf.cast(num, dtype=tf.float32)  # Scale by N.
    return sample_mean, sample_covariance
  else:
    sample_mean, sample_variances = tf.nn.moments(x=embeddings)
    log_variances = tf.math.log(sample_variances +
                                damping * tf.ones_like(sample_variances))
    return sample_mean, log_variances
Esempio n. 22
0
def compute_pointcloud_weights_based_on_voxel_density(points, grid_cell_size):
  """Computes pointcloud weights based on voxel density.

  Args:
    points: A tf.float32 tensor of size [num_points, 3].
    grid_cell_size: The size of the grid cells in x, y, z dimensions in the
      voxel grid. It should be either a tf.float32 tensor, a numpy array or a
      list of size [3].

  Returns:
    A tf.float32 tensor of size [num_points, 1] containing weights that are
      inverse proportional to the denisty of the points in voxels.
  """
  num_points = tf.shape(points)[0]
  features = tf.ones([num_points, 1], dtype=tf.float32)
  voxel_features, _, segment_ids, _ = (
      pointcloud_to_sparse_voxel_grid_unbatched(
          points=points,
          features=features,
          grid_cell_size=grid_cell_size,
          segment_func=tf.math.unsorted_segment_sum))
  num_voxels = tf.shape(voxel_features)[0]
  point_features = sparse_voxel_grid_to_pointcloud(
      voxel_features=tf.expand_dims(voxel_features, axis=0),
      segment_ids=tf.expand_dims(segment_ids, axis=0),
      num_valid_voxels=tf.expand_dims(num_voxels, axis=0),
      num_valid_points=tf.expand_dims(num_points, axis=0))
  inverse_point_densities = 1.0 / tf.squeeze(point_features, axis=0)
  total_inverse_density = tf.reduce_sum(inverse_point_densities)
  return (inverse_point_densities * tf.cast(num_points, dtype=tf.float32) /
          total_inverse_density)
Esempio n. 23
0
def sdtw_loss(y_hat, y, gamma=0.01):
    y_hat = tf.cast(y_hat, tf.float64)
    y = tf.cast(y, tf.float64)
    D = batched_euclidean_distance(y, y_hat)
    R = tf.py_func(batch_sdtw_loss, inp=[D, gamma], Tout=tf.float64)

    m = D.get_shape()[1]
    loss = tf.reduce_mean(R[:, m, m])
    loss.set_shape(shape=())
    loss = tf.cast(loss, dtype=tf.float32)

    def grad(dy):
        _grad = tf.py_func(batch_sdtw_grad, inp=[y_hat, y, D, R, gamma], Tout=tf.float64)
        return tf.cast(_grad, dtype=tf.float32), tf.zeros_like(y, dtype=tf.float32)

    return loss, grad
Esempio n. 24
0
def _points_to_voxel_indices(points, grid_cell_size):
  """Converts points into corresponding voxel indices.

  Maps each point into a voxel grid with cell size given by grid_cell_size.
  For each voxel, it computes a x, y, z index. Also converts the x, y, z index
  to a single number index where there is a one-on-one mapping between
  each x, y, z index value and its corresponding single number index value.

  Args:
    points: A tf.float32 tensor of size [N, 3].
    grid_cell_size: The size of the grid cells in x, y, z dimensions in the
      voxel grid. It should be either a tf.float32 tensor, a numpy array or a
      list of size [3].

  Returns:
    voxel_xyz_indices: A tf.int32 tensor of size [N, 3] containing the x, y, z
      index of the voxel corresponding to each given point.
    voxel_single_number_indices: A tf.int32 tensor of size [N] containing the
      single number index of the voxel corresponding to each given point.
    voxel_start_location: A tf.float32 tensor of size [3] containing the start
      location of the voxels.
  """
  voxel_start_location = tf.reduce_min(points, axis=0)
  voxel_xyz_indices = tf.cast(
      tf.math.floordiv(points - voxel_start_location, grid_cell_size),
      dtype=tf.int32)
  voxel_xyz_indices, voxel_single_number_indices = compute_pooled_voxel_indices(
      voxel_xyz_indices=voxel_xyz_indices, pooling_size=(1, 1, 1))
  return voxel_xyz_indices, voxel_single_number_indices, voxel_start_location
Esempio n. 25
0
def _pad_or_clip_voxels(voxel_features, voxel_indices, num_valid_voxels,
                        segment_ids, voxels_pad_or_clip_size):
  """Pads or clips voxels."""
  if voxels_pad_or_clip_size:
    num_valid_voxels = tf.minimum(num_valid_voxels, voxels_pad_or_clip_size)
    num_channels = voxel_features.get_shape().as_list()[-1]
    if len(voxel_features.shape.as_list()) == 2:
      output_shape = [voxels_pad_or_clip_size, num_channels]
    elif len(voxel_features.shape.as_list()) == 3:
      num_samples_per_voxel = voxel_features.get_shape().as_list()[1]
      if num_samples_per_voxel is None:
        num_samples_per_voxel = tf.shape(voxel_features)[1]
      output_shape = [
          voxels_pad_or_clip_size, num_samples_per_voxel, num_channels
      ]
    else:
      raise ValueError('voxel_features should be either rank 2 or 3.')
    voxel_features = shape_utils.pad_or_clip_nd(
        tensor=voxel_features, output_shape=output_shape)
    voxel_indices = shape_utils.pad_or_clip_nd(
        tensor=voxel_indices, output_shape=[voxels_pad_or_clip_size, 3])
    valid_segment_ids_mask = tf.cast(
        tf.less(segment_ids, num_valid_voxels), dtype=tf.int32)
    segment_ids *= valid_segment_ids_mask
  return voxel_features, voxel_indices, num_valid_voxels, segment_ids
Esempio n. 26
0
def _filter_valid_objects(inputs):
  """Removes the objects that do not contain 3d info.

  Args:
    inputs: A dictionary containing input tensors.
  """
  if standard_fields.InputDataFields.objects_class not in inputs:
    return

  valid_objects_mask = tf.reshape(
      tf.greater(inputs[standard_fields.InputDataFields.objects_class], 0),
      [-1])
  if standard_fields.InputDataFields.objects_has_3d_info in inputs:
    objects_with_3d_info = tf.reshape(
        tf.cast(
            inputs[standard_fields.InputDataFields.objects_has_3d_info],
            dtype=tf.bool), [-1])
    valid_objects_mask = tf.logical_and(objects_with_3d_info,
                                        valid_objects_mask)
  if standard_fields.InputDataFields.objects_difficulty in inputs:
    valid_objects_mask = tf.logical_and(
        valid_objects_mask,
        tf.greater(
            tf.reshape(
                inputs[standard_fields.InputDataFields.objects_difficulty],
                [-1]), 0))
  for key in _OBJECT_KEYS:
    if key in inputs:
      inputs[key] = tf.boolean_mask(inputs[key], valid_objects_mask)
Esempio n. 27
0
    def __call__(self, example_string):
        """Processes a single example string.

    Extracts and processes the image, and ignores the label. We assume that the
    image has three channels.

    Args:
      example_string: str, an Example protocol buffer.

    Returns:
      image_rescaled: the image, resized to `image_size x image_size` and
      rescaled to [-1, 1]. Note that Gaussian data augmentation may cause values
      to go beyond this range.
    """
        image_decoded = read_example_and_parse_image(example_string)['image']
        image_resized = tf.image.resize_images(
            image_decoded, [self.image_size, self.image_size],
            method=tf.image.ResizeMethod.BILINEAR,
            align_corners=True)
        image_resized = tf.cast(image_resized, tf.float32)
        image = 2 * (image_resized / 255.0 - 0.5)  # Rescale to [-1, 1].

        if self.data_augmentation is not None:
            if self.data_augmentation.enable_gaussian_noise:
                image = image + tf.random_normal(tf.shape(
                    image)) * self.data_augmentation.gaussian_noise_std

            if self.data_augmentation.enable_jitter:
                j = self.data_augmentation.jitter_amount
                paddings = tf.constant([[j, j], [j, j], [0, 0]])
                image = tf.pad(image, paddings, 'REFLECT')
                image = tf.image.random_crop(
                    image, [self.image_size, self.image_size, 3])

        return image
def _center_crop(images, height, width):
  """Performs a center crop with the given heights and width."""
  # ensure height, width to be int
  height = tf.cast(height, tf.int32)
  width = tf.cast(width, tf.int32)
  # get current size
  images_shape = tf.shape(images)
  current_height = images_shape[-3]
  current_width = images_shape[-2]
  # compute required offset
  offset_height = tf.cast((current_height - height) / 2, tf.int32)
  offset_width = tf.cast((current_width - width) / 2, tf.int32)
  # perform the crop
  images = tf.image.crop_to_bounding_box(
      images, offset_height, offset_width, height, width)
  return images
def compute_target_topk_q(reward, gamma, next_actions, next_q_values,
                          next_states, terminals):
    """Computes the optimal target Q value with the greedy algorithm.

  This algorithm corresponds to the method "TT" in
  Ie et al. https://arxiv.org/abs/1905.12767.

  Args:
    reward: [batch_size] tensor, the immediate reward.
    gamma: float, discount factor with the usual RL meaning.
    next_actions: [batch_size, slate_size] tensor, the next slate.
    next_q_values: [batch_size, num_of_documents] tensor, the q values of the
      documents in the next step.
    next_states: [batch_size, 1 + num_of_documents] tensor, the features for the
      user and the docuemnts in the next step.
    terminals: [batch_size] tensor, indicating if this is a terminal step.

  Returns:
    [batch_size] tensor, the target q values.
  """
    slate_size = next_actions.get_shape().as_list()[1]
    scores, score_no_click = _get_unnormalized_scores(next_states)

    # Choose the documents with top affinity_scores * Q values to fill a slate and
    # treat it as if it is the optimal slate.
    unnormalized_next_q_target = next_q_values * scores
    _, topk_optimal_slate = tf.math.top_k(unnormalized_next_q_target,
                                          k=slate_size)

    # Get the expected Q-value of the slate containing top-K items.
    # [batch_size, slate_size]
    next_q_values_selected = tf.batch_gather(
        next_q_values, tf.cast(topk_optimal_slate, dtype=tf.int32))

    # Get normalized affinity scores on the slate.
    # [batch_size, slate_size]
    scores_selected = tf.batch_gather(
        scores, tf.cast(topk_optimal_slate, dtype=tf.int32))

    next_q_target_topk = tf.reduce_sum(
        input_tensor=next_q_values_selected * scores_selected,
        axis=1) / (tf.reduce_sum(input_tensor=scores_selected, axis=1) +
                   score_no_click)

    return reward + gamma * next_q_target_topk * (
        1. - tf.cast(terminals, tf.float32))
Esempio n. 30
0
    def true_fn(images, flow, mask):
        # choose a random scale factor and compute new resolution
        orig_height = tf.shape(images)[-3]
        orig_width = tf.shape(images)[-2]
        new_height, new_width, scale = _get_random_scaled_resolution(
            orig_height=orig_height,
            orig_width=orig_width,
            min_scale=min_scale,
            max_scale=max_scale,
            max_strech=0.0,
            probability_strech=0.0)

        # rescale only the second image
        image_1, image_2 = tf.unstack(images)
        image_2 = smurf_utils.resize(image_2,
                                     new_height,
                                     new_width,
                                     is_flow=False)
        # Crop either first or second image to have matching dimensions
        if scale < 1.0:
            image_1 = _center_crop(image_1, new_height, new_width)
        else:
            image_2 = _center_crop(image_2, orig_height, orig_width)
        images = tf.stack([image_1, image_2])

        if flow is not None:
            # get current locations (with the origin in the image center)
            positions = _positions_center_origin(orig_height, orig_width)

            # compute scale factor of the actual new image resolution
            scale_flow_h = tf.cast(new_height, tf.float32) / tf.cast(
                orig_height, tf.float32)
            scale_flow_w = tf.cast(new_width, tf.float32) / tf.cast(
                orig_width, tf.float32)
            scale_flow = tf.stack([scale_flow_h, scale_flow_w])

            # compute augmented flow (multiply by mask to zero invalid flow locations)
            flow = ((positions + flow) * scale_flow - positions) * mask

            if scale < 1.0:
                # in case we downsample the image we crop the reference image to keep
                # the same shape
                flow = _center_crop(flow, new_height, new_width)
                mask = _center_crop(mask, new_height, new_width)
        return images, flow, mask