Example #1
0
def batched_euclidean_distance(y_hat, y, squared=True):
    assert y_hat.get_shape().ndims == 3 and y.get_shape().ndims == 3
    a = tf.square(tf.reduce_sum(y, axis=2))[:, :, None]
    b = tf.square(tf.reduce_sum(y_hat, axis=2))[:, None, :]
    D = tf.matmul(y, y_hat, transpose_b=True)
    d = a + b - 2 * D
    return tf.sqrt(d) if not squared else d
def pick_labeled_image(mesh_inputs, view_image_inputs, view_indices_2d_inputs,
                       view_name):
    """Pick the image with most number of labeled points projecting to it."""
    if view_name not in view_image_inputs:
        return
    if view_name not in view_indices_2d_inputs:
        return
    if standard_fields.InputDataFields.point_loss_weights not in mesh_inputs:
        raise ValueError('The key `weights` is missing from mesh_inputs.')
    height = tf.shape(view_image_inputs[view_name])[1]
    width = tf.shape(view_image_inputs[view_name])[2]
    valid_points_y = tf.logical_and(
        tf.greater_equal(view_indices_2d_inputs[view_name][:, :, 0], 0),
        tf.less(view_indices_2d_inputs[view_name][:, :, 0], height))
    valid_points_x = tf.logical_and(
        tf.greater_equal(view_indices_2d_inputs[view_name][:, :, 1], 0),
        tf.less(view_indices_2d_inputs[view_name][:, :, 1], width))
    valid_points = tf.logical_and(valid_points_y, valid_points_x)
    image_total_weights = tf.reduce_sum(
        tf.cast(valid_points, dtype=tf.float32) * tf.squeeze(
            mesh_inputs[standard_fields.InputDataFields.point_loss_weights],
            axis=1),
        axis=1)
    image_total_weights = tf.cond(
        tf.equal(tf.reduce_sum(image_total_weights), 0),
        lambda: tf.reduce_sum(tf.cast(valid_points, dtype=tf.float32), axis=1),
        lambda: image_total_weights)
    best_image = tf.math.argmax(image_total_weights)
    view_image_inputs[view_name] = view_image_inputs[view_name][
        best_image:best_image + 1, :, :, :]
    view_indices_2d_inputs[view_name] = view_indices_2d_inputs[view_name][
        best_image:best_image + 1, :, :]
Example #3
0
    def _compute_prototype_loss(self,
                                embeddings,
                                labels,
                                labels_one_hot,
                                prototypes=None):
        """Computes the loss and accuracy on an episode."""
        labels_dense = labels
        if prototypes is None:
            # Compute protos.
            labels = tf.cast(labels_one_hot, tf.float32)
            # [num examples, 1, embedding size].
            embeddings_ = tf.expand_dims(embeddings, 1)
            # [num examples, num classes, 1].
            labels = tf.expand_dims(labels, 2)
            # Sums each class' embeddings. [num classes, embedding size].
            class_sums = tf.reduce_sum(labels * embeddings_, 0)
            # The prototype of each class is the averaged embedding of its examples.
            class_num_images = tf.reduce_sum(labels, 0)  # [way].
            prototypes = class_sums / class_num_images  # [way, embedding size].

        # Compute logits.
        embeddings = tf.nn.l2_normalize(embeddings, 1, epsilon=1e-3)
        prototypes = tf.nn.l2_normalize(prototypes, 1, epsilon=1e-3)
        logits = tf.matmul(embeddings, prototypes, transpose_b=True)

        loss = self.compute_loss(labels_one_hot, logits)
        acc = tf.reduce_mean(self.compute_accuracy(labels_dense, logits))
        return loss, acc, prototypes, logits
def _compute_prototypes(embeddings, labels):
    """Computes class prototypes over the last dimension of embeddings.

  Args:
    embeddings: Tensor of examples of shape [num_examples, embedding_size].
    labels: Tensor of one-hot encoded labels of shape [num_examples,
      num_classes].

  Returns:
    prototypes: Tensor of class prototypes of shape [num_classes,
    embedding_size].
  """
    labels = tf.cast(labels, tf.float32)

    # [num examples, 1, embedding size].
    embeddings = tf.expand_dims(embeddings, 1)

    # [num examples, num classes, 1].
    labels = tf.expand_dims(labels, 2)

    # Sums each class' embeddings. [num classes, embedding size].
    class_sums = tf.reduce_sum(labels * embeddings, 0)

    # The prototype of each class is the averaged embedding of its examples.
    class_num_images = tf.reduce_sum(labels, 0)  # [way].
    prototypes = class_sums / class_num_images

    return prototypes
Example #5
0
 def joint_log_likelihood(self, onehot_labels, log_probs):
     """Compute p(z, y)."""
     labels = tf.cast(tf.reduce_sum(input_tensor=onehot_labels, axis=0),
                      dtype=tf.float32)
     class_log_probs = tf.math.log(labels /
                                   tf.reduce_sum(input_tensor=labels))
     return log_probs + tf.expand_dims(class_log_probs, 0)
def compute_target_optimal_q(reward, gamma, next_actions, next_q_values,
                             next_states, terminals):
    """Builds an op used as a target for the Q-value.

  This algorithm corresponds to the method "OT" in
  Ie et al. https://arxiv.org/abs/1905.12767..

  Args:
    reward: [batch_size] tensor, the immediate reward.
    gamma: float, discount factor with the usual RL meaning.
    next_actions: [batch_size, slate_size] tensor, the next slate.
    next_q_values: [batch_size, num_of_documents] tensor, the q values of the
      documents in the next step.
    next_states: [batch_size, 1 + num_of_documents] tensor, the features for the
      user and the docuemnts in the next step.
    terminals: [batch_size] tensor, indicating if this is a terminal step.

  Returns:
    [batch_size] tensor, the target q values.
  """
    scores, score_no_click = _get_unnormalized_scores(next_states)

    # Obtain all possible slates given current docs in the candidate set.
    slate_size = next_actions.get_shape().as_list()[1]
    num_candidates = next_q_values.get_shape().as_list()[1]
    mesh_args = [list(range(num_candidates))] * slate_size
    slates = tf.stack(tf.meshgrid(*mesh_args), axis=-1)
    slates = tf.reshape(slates, shape=(-1, slate_size))
    # Filter slates that include duplicates to ensure each document is picked
    # at most once.
    unique_mask = tf.map_fn(
        lambda x: tf.equal(tf.size(input=x), tf.size(input=tf.unique(x)[0])),
        slates,
        dtype=tf.bool)
    # [num_of_slates, slate_size]
    slates = tf.boolean_mask(tensor=slates, mask=unique_mask)

    # [batch_size, num_of_slates, slate_size]
    next_q_values_slate = tf.gather(next_q_values, slates, axis=1)
    # [batch_size, num_of_slates, slate_size]
    scores_slate = tf.gather(scores, slates, axis=1)
    # [batch_size, num_of_slates]
    batch_size = next_states.get_shape().as_list()[0]
    score_no_click_slate = tf.reshape(
        tf.tile(score_no_click,
                tf.shape(input=slates)[:1]), [batch_size, -1])

    # [batch_size, num_of_slates]
    next_q_target_slate = tf.reduce_sum(
        input_tensor=next_q_values_slate * scores_slate,
        axis=2) / (tf.reduce_sum(input_tensor=scores_slate, axis=2) +
                   score_no_click_slate)

    next_q_target_max = tf.reduce_max(input_tensor=next_q_target_slate, axis=1)

    return reward + gamma * next_q_target_max * (
        1. - tf.cast(terminals, tf.float32))
def score_documents_tf(user_obs,
                       doc_obs,
                       no_click_mass=1.0,
                       is_mnl=False,
                       min_normalizer=-1.0):
    """Computes unnormalized scores given both user and document observations.

  This implements both multinomial proportional model and multinormial logit
    model given some parameters. We also assume scores are based on inner
    products of user_obs and doc_obs.

  Args:
    user_obs: An instance of AbstractUserState.
    doc_obs: A numpy array that represents the observation of all documents in
      the candidate set.
    no_click_mass: a float indicating the mass given to a no click option
    is_mnl: whether to use a multinomial logit model instead of a multinomial
      proportional model.
    min_normalizer: A float (<= 0) used to offset the scores to be positive when
      using multinomial proportional model.

  Returns:
    A float tensor that stores unnormalzied scores of documents and a float
      tensor that represents the score for the action of picking no document.
  """
    user_obs = tf.reshape(user_obs, [1, -1])
    scores = tf.reduce_sum(input_tensor=tf.multiply(user_obs, doc_obs), axis=1)
    all_scores = tf.concat([scores, tf.constant([no_click_mass])], axis=0)
    if is_mnl:
        all_scores = tf.nn.softmax(all_scores)
    else:
        all_scores = all_scores - min_normalizer
    return all_scores[:-1], all_scores[-1]
    def _build_train_op(self):
        """Builds the training op for Rainbow.

    Returns:
      train_op: An op performing one step of training.
    """

        replay_action_one_hot = tf.one_hot(self._replay.actions,
                                           self.num_actions,
                                           1.,
                                           0.,
                                           name='action_one_hot')
        replay_chosen_q = tf.reduce_sum(self._replay_qs *
                                        replay_action_one_hot,
                                        reduction_indices=1,
                                        name='replay_chosen_q')

        target = tf.stop_gradient(self._build_target_q_op())
        loss = tf.losses.huber_loss(target,
                                    replay_chosen_q,
                                    reduction=tf.losses.Reduction.NONE)

        update_priorities_op = self._replay.tf_set_priority(
            self._replay.indices, tf.sqrt(loss + 1e-10))

        target_priorities = self._replay.tf_get_priority(self._replay.indices)
        target_priorities = tf.math.add(target_priorities, 1e-10)
        target_priorities = 1.0 / tf.sqrt(target_priorities)
        target_priorities /= tf.reduce_max(target_priorities)

        weighted_loss = target_priorities * loss

        with tf.control_dependencies([update_priorities_op]):
            return self.optimizer.minimize(
                tf.reduce_mean(weighted_loss)), weighted_loss
def compute_prototypes(embeddings, onehot_labels):
    """Compute class prototypes over the last dimension of embeddings.

  Args:
    embeddings: Tensor of examples of shape [num_examples] + embedding_shape
    onehot_labels: Tensor of one-hot encoded labels of shape [num_examples,
      num_classes].

  Returns:
    prototypes: Tensor of class prototypes of shape [num_classes,
    embedding_size].
  """
    # Sums each class' embeddings. [num classes] + embedding shape.
    embedding_indices = 'klm'[:len(embeddings.shape) - 1]
    class_sums = tf.einsum('ij,i{0}->j{0}'.format(embedding_indices),
                           onehot_labels, embeddings)

    # The prototype of each class is the averaged embedding of its examples.
    class_num_images = tf.reduce_sum(input_tensor=onehot_labels,
                                     axis=0)  # [way].
    prototypes = tf.math.divide_no_nan(
        class_sums,
        tf.reshape(class_num_images, [-1] + [1] * (len(embeddings.shape) - 1)))

    return prototypes
Example #10
0
    def compute_logits(self,
                       support_embeddings,
                       query_embeddings,
                       onehot_support_labels,
                       cosine_distance=False):
        """Computes the negative distances of each query point to each prototype."""
        prototypes = compute_prototypes(support_embeddings,
                                        onehot_support_labels)

        if cosine_distance:
            query_embeddings = tf.nn.l2_normalize(query_embeddings,
                                                  1,
                                                  epsilon=1e-3)
            prototypes = tf.nn.l2_normalize(prototypes, 1, epsilon=1e-3)
            logits = tf.matmul(query_embeddings, prototypes, transpose_b=True)
        else:
            # [num test images, 1, embedding size].
            query_embeddings = tf.expand_dims(query_embeddings, 1)

            # [1, num_clases, embedding_size].
            prototypes = tf.expand_dims(prototypes, 0)

            # Squared euclidean distance between each test embedding / prototype pair.
            distances = tf.reduce_sum(tf.square(query_embeddings - prototypes),
                                      2)
            logits = -distances
        return logits
Example #11
0
def compute_pointcloud_weights_based_on_voxel_density(points, grid_cell_size):
  """Computes pointcloud weights based on voxel density.

  Args:
    points: A tf.float32 tensor of size [num_points, 3].
    grid_cell_size: The size of the grid cells in x, y, z dimensions in the
      voxel grid. It should be either a tf.float32 tensor, a numpy array or a
      list of size [3].

  Returns:
    A tf.float32 tensor of size [num_points, 1] containing weights that are
      inverse proportional to the denisty of the points in voxels.
  """
  num_points = tf.shape(points)[0]
  features = tf.ones([num_points, 1], dtype=tf.float32)
  voxel_features, _, segment_ids, _ = (
      pointcloud_to_sparse_voxel_grid_unbatched(
          points=points,
          features=features,
          grid_cell_size=grid_cell_size,
          segment_func=tf.math.unsorted_segment_sum))
  num_voxels = tf.shape(voxel_features)[0]
  point_features = sparse_voxel_grid_to_pointcloud(
      voxel_features=tf.expand_dims(voxel_features, axis=0),
      segment_ids=tf.expand_dims(segment_ids, axis=0),
      num_valid_voxels=tf.expand_dims(num_voxels, axis=0),
      num_valid_points=tf.expand_dims(num_points, axis=0))
  inverse_point_densities = 1.0 / tf.squeeze(point_features, axis=0)
  total_inverse_density = tf.reduce_sum(inverse_point_densities)
  return (inverse_point_densities * tf.cast(num_points, dtype=tf.float32) /
          total_inverse_density)
Example #12
0
 def _observation_cost(obs):
     c_theta, s_theta, d_theta = obs[:, :1], obs[:, 1:2], obs[:, 2:3]
     theta = tf.math.atan2(s_theta, c_theta)
     cost = tf.reduce_sum(tf.square(theta) + 0.1 * tf.square(d_theta),
                          axis=1)
     cost = tf.where(tf.math.is_nan(cost), 1e6 * tf.ones_like(cost), cost)
     return cost
Example #13
0
  def _build_target_distribution(self):
    self._reshape_networks()
    batch_size = tf.shape(self._replay.rewards)[0]
    # size of rewards: batch_size x 1
    rewards = self._replay.rewards[:, None]
    # size of tiled_support: batch_size x num_atoms
    tiled_support = tf.tile(self.support, [batch_size])
    tiled_support = tf.reshape(tiled_support, [batch_size, self.num_atoms])
    # size of target_support: batch_size x num_atoms

    is_terminal_multiplier = 1. - tf.cast(self._replay.terminals, tf.float32)
    # Incorporate terminal state to discount factor.
    # size of gamma_with_terminal: batch_size x 1
    gamma_with_terminal = self.cumulative_gamma * is_terminal_multiplier
    gamma_with_terminal = gamma_with_terminal[:, None]

    target_support = rewards + gamma_with_terminal * tiled_support
    # size of next_probabilities: batch_size  x num_actions x num_atoms
    next_probabilities = tf.contrib.layers.softmax(
        self._replay_next_logits)

    # size of next_qt: 1 x num_actions
    next_qt = tf.reduce_sum(self.support * next_probabilities, 2)
    # size of next_qt_argmax: 1 x batch_size
    next_qt_argmax = tf.argmax(
        next_qt + self._replay.next_legal_actions, axis=1)[:, None]
    batch_indices = tf.range(tf.to_int64(batch_size))[:, None]
    # size of next_qt_argmax: batch_size x 2
    next_qt_argmax = tf.concat([batch_indices, next_qt_argmax], axis=1)
    # size of next_probabilities: batch_size x num_atoms
    next_probabilities = tf.gather_nd(next_probabilities, next_qt_argmax)
    return project_distribution(target_support, next_probabilities,
                                self.support)
  def get_support_set_softmax(self, logits, class_ids):
    """Softmax normalize over the support set.

    Args:
      logits: [N_k, H*W, Q] dimensional tensor.
      class_ids: [N_k] tensor giving the support-set-id of each image.

    Returns:
      Softmax-ed x over the support set.

    softmax(x) = np.exp(x) / np.reduce_sum(np.exp(x), axis)
    """
    max_logit = tf.reduce_max(logits, axis=1, keepdims=True)
    max_logit = tf.math.unsorted_segment_max(max_logit, class_ids,
                                             tf.reduce_max(class_ids) + 1)
    max_logit = tf.gather(max_logit, class_ids)
    logits_reduc = logits - max_logit

    exp_x = tf.exp(logits_reduc)
    sum_exp_x = tf.reduce_sum(exp_x, axis=1, keepdims=True)
    sum_exp_x = tf.math.unsorted_segment_sum(sum_exp_x, class_ids,
                                             tf.reduce_max(class_ids) + 1)
    log_sum_exp_x = tf.log(sum_exp_x)
    log_sum_exp_x = tf.gather(log_sum_exp_x, class_ids)

    norm_logits = logits_reduc - log_sum_exp_x
    softmax = tf.exp(norm_logits)
    return softmax
def ctrl_rewards(states,
                 actions,
                 rewards,
                 next_states,
                 contexts,
                 reward_scales=1.0):
  """Returns the negative control cost.

  Args:
    states: A [batch_size, num_state_dims] Tensor representing a batch
        of states.
    actions: A [batch_size, num_action_dims] Tensor representing a batch
      of actions.
    rewards: A [batch_size] Tensor representing a batch of rewards.
    next_states: A [batch_size, num_state_dims] Tensor representing a batch
      of next states.
    contexts: A list of [batch_size, num_context_dims] Tensor representing
      a batch of contexts.
    reward_scales: multiplicative scale for rewards. A scalar or 1D tensor,
      must be broadcastable to number of reward dimensions.

  Returns:
    A new tf.float32 [batch_size] rewards Tensor, and
      tf.float32 [batch_size] discounts tensor.
  """
  del states, rewards, contexts  # Unused
  if actions is None:
    rewards = tf.to_float(tf.zeros(shape=next_states.shape[:1]))
  else:
    rewards = -tf.reduce_sum(tf.square(actions), axis=1)
    rewards *= reward_scales
    rewards = tf.to_float(rewards)
  return rewards, tf.ones_like(rewards)
Example #16
0
    def _build_train_op(self):
        """Builds a training op.

    Returns:
      An op performing one step of training from replay data.
    """
        # click_indicator: [B, S]
        # q_values: [B, A]
        # actions: [B, S]
        # slate_q_values: [B, S]
        # replay_click_q: [B]
        click_indicator = self._replay.rewards[:, :,
                                               self._click_response_index]
        slate_q_values = tf.compat.v1.batch_gather(
            self._replay_net_outputs.q_values,
            tf.cast(self._replay.actions, dtype=tf.int32))
        # Only get the Q from the clicked document.
        replay_click_q = tf.reduce_sum(input_tensor=slate_q_values *
                                       click_indicator,
                                       axis=1,
                                       name='replay_click_q')

        target = tf.stop_gradient(self._build_target_q_op())

        clicked = tf.reduce_sum(input_tensor=click_indicator, axis=1)
        clicked_indices = tf.squeeze(tf.compat.v1.where(tf.equal(clicked, 1)),
                                     axis=1)
        # clicked_indices is a vector and tf.gather selects the batch dimension.
        q_clicked = tf.gather(replay_click_q, clicked_indices)
        target_clicked = tf.gather(target, clicked_indices)

        def get_train_op():
            loss = tf.reduce_mean(input_tensor=tf.square(q_clicked -
                                                         target_clicked))
            if self.summary_writer is not None:
                with tf.compat.v1.variable_scope('Losses'):
                    tf.compat.v1.summary.scalar('Loss', loss)

            return loss

        loss = tf.cond(pred=tf.greater(tf.reduce_sum(input_tensor=clicked), 0),
                       true_fn=get_train_op,
                       false_fn=lambda: tf.constant(0.),
                       name='')

        return self.optimizer.minimize(loss)
def compute_target_topk_q(reward, gamma, next_actions, next_q_values,
                          next_states, terminals):
    """Computes the optimal target Q value with the greedy algorithm.

  This algorithm corresponds to the method "TT" in
  Ie et al. https://arxiv.org/abs/1905.12767.

  Args:
    reward: [batch_size] tensor, the immediate reward.
    gamma: float, discount factor with the usual RL meaning.
    next_actions: [batch_size, slate_size] tensor, the next slate.
    next_q_values: [batch_size, num_of_documents] tensor, the q values of the
      documents in the next step.
    next_states: [batch_size, 1 + num_of_documents] tensor, the features for the
      user and the docuemnts in the next step.
    terminals: [batch_size] tensor, indicating if this is a terminal step.

  Returns:
    [batch_size] tensor, the target q values.
  """
    slate_size = next_actions.get_shape().as_list()[1]
    scores, score_no_click = _get_unnormalized_scores(next_states)

    # Choose the documents with top affinity_scores * Q values to fill a slate and
    # treat it as if it is the optimal slate.
    unnormalized_next_q_target = next_q_values * scores
    _, topk_optimal_slate = tf.math.top_k(unnormalized_next_q_target,
                                          k=slate_size)

    # Get the expected Q-value of the slate containing top-K items.
    # [batch_size, slate_size]
    next_q_values_selected = tf.batch_gather(
        next_q_values, tf.cast(topk_optimal_slate, dtype=tf.int32))

    # Get normalized affinity scores on the slate.
    # [batch_size, slate_size]
    scores_selected = tf.batch_gather(
        scores, tf.cast(topk_optimal_slate, dtype=tf.int32))

    next_q_target_topk = tf.reduce_sum(
        input_tensor=next_q_values_selected * scores_selected,
        axis=1) / (tf.reduce_sum(input_tensor=scores_selected, axis=1) +
                   score_no_click)

    return reward + gamma * next_q_target_topk * (
        1. - tf.cast(terminals, tf.float32))
Example #18
0
 def actor_loss_fn(dqda, action):
     if self._dqda_clipping:
         dqda = tf.clip_by_value(dqda, -self._dqda_clipping,
                                 self._dqda_clipping)
     loss = 0.5 * losses.element_wise_squared_loss(
         tf.stop_gradient(dqda + action), action)
     loss = tf.reduce_sum(loss, axis=list(range(1, len(loss.shape))))
     return loss
Example #19
0
    def _reg(cls, batch_size, d, x, x_fake, beta=1e-1):
        alpha = tf.random_uniform(shape=[batch_size, 1], minval=0., maxval=1.)
        interpolates = alpha * x + (1 - alpha) * x_fake
        int_d = d(interpolates)
        gradients = tf.gradients(int_d, [interpolates])[0]

        slopes = tf.sqrt(
            tf.reduce_sum(tf.square(gradients), reduction_indices=[1]))
        return beta * tf.reduce_mean((slopes - 1)**2)
Example #20
0
def pointcloud_to_voxel_grid(points,
                             features,
                             grid_cell_size,
                             start_location,
                             end_location,
                             segment_func=tf.math.unsorted_segment_mean):
  """Converts a pointcloud into a voxel grid.

  Args:
    points: A tf.float32 tensor of size [N, 3].
    features: A tf.float32 tensor of size [N, F].
    grid_cell_size: A tf.float32 tensor of size [3].
    start_location: A tf.float32 tensor of size [3].
    end_location: A tf.float32 tensor of size [3].
    segment_func: A tensorflow function that operates on segments. Expect one
      of tf.math.unsorted_segment_{min/max/mean/prod/sum}. Defaults to
      tf.math.unsorted_segment_mean

  Returns:
    voxel_features: A tf.float32 tensor of
      size [grid_x_len, grid_y_len, grid_z_len, F].
    segment_ids: A tf.int32 tensor of IDs for each point indicating
      which (flattened) voxel cell its data was mapped to.
    point_indices: A tf.int32 tensor of size [num_points, 3] containing the
      location of each point in the 3d voxel grid.
  """
  grid_cell_size = tf.convert_to_tensor(grid_cell_size, dtype=tf.float32)
  start_location = tf.convert_to_tensor(start_location, dtype=tf.float32)
  end_location = tf.convert_to_tensor(end_location, dtype=tf.float32)
  point_indices = tf.cast(
      (points - tf.expand_dims(start_location, axis=0)) /
      tf.expand_dims(grid_cell_size, axis=0),
      dtype=tf.int32)
  grid_size = tf.cast(
      tf.math.ceil((end_location - start_location) / grid_cell_size),
      dtype=tf.int32)
  # Note: all points outside the grid are added to the edges
  # Cap index at grid_size - 1 (so a 10x10x10 grid's max cell is (9,9,9))
  point_indices = tf.minimum(point_indices, tf.expand_dims(grid_size - 1,
                                                           axis=0))
  # Don't allow any points below index (0, 0, 0)
  point_indices = tf.maximum(point_indices, 0)
  segment_ids = tf.reduce_sum(
      point_indices * tf.stack(
          [grid_size[1] * grid_size[2], grid_size[2], 1], axis=0),
      axis=1)
  voxel_features = segment_func(
      data=features,
      segment_ids=segment_ids,
      num_segments=(grid_size[0] * grid_size[1] * grid_size[2]))
  return (tf.reshape(voxel_features,
                     [grid_size[0],
                      grid_size[1],
                      grid_size[2],
                      features.get_shape().as_list()[1]]),
          segment_ids,
          point_indices)
  def projection_dist(states):
    inner = tf.multiply(states - starting_states, goals - starting_states)
    upper = tf.reduce_sum(inner, -1)
    sign = tf.sign(upper)
    
    result = tf.math.divide(upper, tf.norm(goals - starting_states, ord=2))

    term_1 = tf.norm(states - starting_states, 2)
   
    
    return -1*term_1+result
  def normalized_dist(states):
    inner = tf.multiply(states - starting_states, goals - starting_states)
    upper = tf.reduce_sum(inner, -1)
    sign = tf.sign(upper)
    
    result = sign * tf.square(tf.math.divide(upper, tf.norm(goals - starting_states, ord=2)))

    term_1 = tf.square(tf.norm(states - starting_states, 2))
    term_2 = tf.square(tf.math.divide(upper, tf.norm(goals - starting_states, ord=2)))
    
    return tf.sqrt(epsilon + tf.abs(result - alpha * (term_1 - term_2)))
Example #23
0
    def _build_train_op(self):
        """Builds a training op.

    Returns:
      train_op: An op performing one step of training from replay data.
    """
        replay_next_target_value = tf.reduce_max(
            self._replay_next_target_net_outputs.q_values, 1)
        replay_target_value = tf.reduce_max(
            self._replay_target_net_outputs.q_values, 1)

        replay_action_one_hot = tf.one_hot(self._replay.actions,
                                           self.num_actions,
                                           1.,
                                           0.,
                                           name='action_one_hot')
        replay_chosen_q = tf.reduce_sum(self._replay_net_outputs.q_values *
                                        replay_action_one_hot,
                                        axis=1,
                                        name='replay_chosen_q')
        replay_target_chosen_q = tf.reduce_sum(
            self._replay_target_net_outputs.q_values * replay_action_one_hot,
            axis=1,
            name='replay_chosen_q')

        augmented_rewards = self._replay.rewards - self.alpha * (
            replay_target_value - replay_target_chosen_q)

        target = (augmented_rewards +
                  self.cumulative_gamma * replay_next_target_value *
                  (1. - tf.cast(self._replay.terminals, tf.float32)))
        target = tf.stop_gradient(target)

        loss = tf.losses.huber_loss(target,
                                    replay_chosen_q,
                                    reduction=tf.losses.Reduction.NONE)
        if self.summary_writer is not None:
            with tf.variable_scope('Losses'):
                tf.summary.scalar('HuberLoss', tf.reduce_mean(loss))
        return self.optimizer.minimize(tf.reduce_mean(loss))
def select_slate_optimal(slate_size, s_no_click, s, q):
    """Selects the slate using exhaustive search.

  This algorithm corresponds to the method "OS" in
  Ie et al. https://arxiv.org/abs/1905.12767.

  Args:
    slate_size: int, the size of the recommendation slate.
    s_no_click: float tensor, the score for not clicking any document.
    s: [num_of_documents] tensor, the scores for clicking documents.
    q: [num_of_documents] tensor, the predicted q values for documents.

  Returns:
    [slate_size] tensor, the selected slate.
  """

    num_candidates = s.shape.as_list()[0]

    # Obtain all possible slates given current docs in the candidate set.
    mesh_args = [list(range(num_candidates))] * slate_size
    slates = tf.stack(tf.meshgrid(*mesh_args), axis=-1)
    slates = tf.reshape(slates, shape=(-1, slate_size))

    # Filter slates that include duplicates to ensure each document is picked
    # at most once.
    unique_mask = tf.map_fn(
        lambda x: tf.equal(tf.size(input=x), tf.size(input=tf.unique(x)[0])),
        slates,
        dtype=tf.bool)
    slates = tf.boolean_mask(tensor=slates, mask=unique_mask)

    slate_q_values = tf.gather(s * q, slates)
    slate_scores = tf.gather(s, slates)
    slate_normalizer = tf.reduce_sum(input_tensor=slate_scores,
                                     axis=1) + s_no_click

    slate_q_values = slate_q_values / tf.expand_dims(slate_normalizer, 1)
    slate_sum_q_values = tf.reduce_sum(input_tensor=slate_q_values, axis=1)
    max_q_slate_index = tf.argmax(input=slate_sum_q_values)
    return tf.gather(slates, max_q_slate_index, axis=0)
def state_rewards(states,
                  actions,
                  rewards,
                  next_states,
                  contexts,
                  weight_index=None,
                  state_indices=None,
                  weight_vector=1.0,
                  offset_vector=0.0,
                  summarize=False):
  """Returns the rewards that are linear mapping of next_states.

  Args:
    states: A [batch_size, num_state_dims] Tensor representing a batch
        of states.
    actions: A [batch_size, num_action_dims] Tensor representing a batch
      of actions.
    rewards: A [batch_size] Tensor representing a batch of rewards.
    next_states: A [batch_size, num_state_dims] Tensor representing a batch
      of next states.
    contexts: A list of [batch_size, num_context_dims] Tensor representing
      a batch of contexts.
    weight_index: (integer) Index of contexts lists that specify weighting.
    state_indices: (a list of Numpy integer array) Indices of states dimensions
      to be mapped.
    weight_vector: (a number or a list or Numpy array) The weighting vector,
      broadcastable to `next_states`.
    offset_vector: (a number or a list of Numpy array) The off vector.
    summarize: (boolean) enable summary ops.

  Returns:
    A new tf.float32 [batch_size] rewards Tensor, and
      tf.float32 [batch_size] discounts tensor.
  """
  del states, actions, rewards  # unused args
  stats = {}
  record_tensor(next_states, state_indices, stats)
  next_states = index_states(next_states, state_indices)
  weight = tf.constant(
      weight_vector, dtype=next_states.dtype, shape=next_states[0].shape)
  weights = tf.expand_dims(weight, 0)
  offset = tf.constant(
      offset_vector, dtype=next_states.dtype, shape=next_states[0].shape)
  offsets = tf.expand_dims(offset, 0)
  if weight_index is not None:
    weights *= contexts[weight_index]
  rewards = tf.to_float(tf.reduce_sum(weights * (next_states+offsets), axis=1))
  if summarize:
    with tf.name_scope('RewardFn/'):
      summarize_stats(stats)
  return rewards, tf.ones_like(rewards)
Example #26
0
 def _get_joint_loss_outputs(self, inputs):
     outputs = []
     for id_of_model, model in self.ids_to_models.items():
         outputs.append(
             model(self._get_model_inputs(id_of_model, inputs),
                   apply_projection_layer=False))
     outputs = tf.stack(outputs)
     outputs = tf.transpose(outputs, perm=[1, 0, 2])
     outputs = self.dropout_layer(outputs)
     outputs = self.transformer_layer(outputs)
     outputs = tf.transpose(outputs, perm=[1, 0, 2])
     outputs = tf.unstack(outputs)
     outputs = self._project_with_submodels(outputs)
     outputs = tf.reduce_sum(outputs, axis=0)
     return outputs
  def compute_logits(self, support_embeddings, query_embeddings,
                     onehot_support_labels):
    """Computes the negative distances of each query point to each prototype."""

    # [num test images, 1, embedding size].
    query_embeddings = tf.expand_dims(query_embeddings, 1)

    prototypes = compute_prototypes(support_embeddings, onehot_support_labels)

    # [1, num_clases, embedding_size].
    prototypes = tf.expand_dims(prototypes, 0)

    # Squared euclidean distances between each test embedding / prototype pair.
    distances = tf.reduce_sum(tf.square(query_embeddings - prototypes), 2)
    return -distances
Example #28
0
def filter_dummy_examples(example_strings, class_ids):
  """Returns tensors with only actual examples, filtering out the dummy ones.

  Actual examples are the first ones in the tensors, and followed by dummy ones,
  indicated by negative class IDs.

  Args:
    example_strings: 1-D Tensor of dtype str, Example protocol buffers.
    class_ids: 1-D Tensor of dtype int, class IDs (absolute wrt the original
      dataset, except for negative ones, that indicate dummy examples).
  """
  num_actual = tf.reduce_sum(tf.cast(class_ids >= 0, tf.int32))
  actual_example_strings = example_strings[:num_actual]
  actual_class_ids = class_ids[:num_actual]
  return (actual_example_strings, actual_class_ids)
Example #29
0
def proto_maml_fc_layer_init_fn(labels, embeddings, weights, biases,
                                prototype_multiplier):
    """Return a list of operations for reparameterized ProtoNet initialization."""

    # This is robust to classes missing from the training set, but assumes that
    # the last class is present.
    num_ways = tf.cast(
        tf.math.reduce_max(input_tensor=tf.unique(labels)[0]) + 1, tf.int32)

    # When there are no examples for a given class, we default its prototype to
    # zeros, per the implementation of `tf.math.unsorted_segment_mean`.
    prototypes = tf.math.unsorted_segment_mean(embeddings, labels, num_ways)

    # Scale the prototypes, which acts as a regularizer on the weights and biases.
    prototypes *= prototype_multiplier

    # logit = -<squared Euclidian distance to prototype>
    #       = -(x - p)^T.(x - p)
    #       = 2 x^T.p - p^T.p - x^T.x
    #       = x^T.w + b
    #         where w = 2p, b = -p^T.p
    output_weights = tf.transpose(a=2 * prototypes)
    output_biases = -tf.reduce_sum(input_tensor=prototypes * prototypes,
                                   axis=1)

    # We zero-pad to align with the original weights and biases.
    output_weights = tf.pad(tensor=output_weights,
                            paddings=[[0, 0],
                                      [
                                          0,
                                          tf.shape(input=weights)[1] -
                                          tf.shape(input=output_weights)[1]
                                      ]],
                            mode='CONSTANT',
                            constant_values=0)
    output_biases = tf.pad(tensor=output_biases,
                           paddings=[[
                               0,
                               tf.shape(input=biases)[0] -
                               tf.shape(input=output_biases)[0]
                           ]],
                           mode='CONSTANT',
                           constant_values=0)

    return [
        weights.assign(output_weights),
        biases.assign(output_biases),
    ]
Example #30
0
    def _loss_op(self):
        with tf.name_scope("loss_op"):

            weights = tf.ones_like(self.y, name='weights')
            self.loss = sequence_loss(self.y_hat,
                                      self.y,
                                      weights=weights,
                                      loss_fn=which_loss(self._config.loss))
            self._summary_dict.update({"loss": self.loss})

            if hasattr(self, '_reg'):
                reg = tf.reduce_sum(self._reg)
                self.loss += reg
                self._summary_dict.update({"loss": self.loss, "reg": reg})
            else:
                self._summary_dict.update({"loss": self.loss})