Esempio n. 1
0
    def false_fun():
        fg_labels = nearest_neighbors(fg_embeddings, centers, 1)[0][:, 0]

        if dist_threshold is not None:
            dist = tf.norm(fg_embeddings - tf.gather(centers, fg_labels),
                           axis=-1)
            fg_labels = tf.where(dist <= dist_threshold, fg_labels, -1)

        fg_labels = tf.cast(fg_labels, tf.int32)
        return tf.scatter_nd(tf.where(fg_mask), fg_labels + 1,
                             tf.cast(tf.shape(fg_mask), tf.int64))
Esempio n. 2
0
 def _sample_kmc2_chain():
     """Returns previous centers as well as a new center sampled using k-MC2."""
     # Extract the subset from the underlying batch.
     start = i * self._kmc2_chain_length
     end = start + self._kmc2_chain_length
     subset = first_shard[start:end]
     # Compute the distances from points in the subset to previous centers.
     _, distances = gen_clustering_ops.nearest_neighbors(
         subset, self._cluster_centers, 1)
     # Sample index of new center using k-MC2 Markov chain.
     new_center_index = gen_clustering_ops.kmc2_chain_initialization(
         array_ops.squeeze(distances), self._seed)
     # Extract actual new center.
     newly_sampled_center = array_ops.reshape(
         subset[new_center_index], [1, -1])
     # Return concatenation with previously sampled centers.
     if self._distance_metric == COSINE_DISTANCE:
         newly_sampled_center = nn_impl.l2_normalize(
             newly_sampled_center, dim=1)
     return array_ops.concat(
         [self._cluster_centers, newly_sampled_center], 0)
Esempio n. 3
0
 def _sample_kmc2_chain():
   """Returns previous centers as well as a new center sampled using k-MC2.
   """
   # Extract the subset from the underlying batch.
   start = i * self._kmc2_chain_length
   end = start + self._kmc2_chain_length
   subset = first_shard[start:end]
   # Compute the distances from points in the subset to previous centers.
   _, distances = gen_clustering_ops.nearest_neighbors(
       subset, self._cluster_centers, 1)
   # Sample index of new center using k-MC2 Markov chain.
   new_center_index = gen_clustering_ops.kmc2_chain_initialization(
       array_ops.squeeze(distances), self._seed)
   # Extract actual new center.
   newly_sampled_center = array_ops.reshape(subset[new_center_index],
                                            [1, -1])
   # Return concatenation with previously sampled centers.
   if self._distance_metric == COSINE_DISTANCE:
     newly_sampled_center = nn_impl.l2_normalize(
         newly_sampled_center, dim=1)
   return array_ops.concat([self._cluster_centers, newly_sampled_center],
                           0)
Esempio n. 4
0
  def _infer_graph(self, inputs, clusters):
    """Maps input to closest cluster and the score.

    Args:
      inputs: list of input Tensors.
      clusters: Tensor of cluster centers.

    Returns:
      List of tuple, where each value in tuple corresponds to a value in inp.
      The tuple has following three elements:
      all_scores: distance of each input to each cluster center.
      score: distance of each input to closest cluster center.
      cluster_idx: index of cluster center closest to the corresponding input.
    """
    assert isinstance(inputs, list)
    # Pairwise distances are used only by transform(). In all other cases, this
    # sub-graph is not evaluated.
    scores = self._distance_graph(inputs, clusters, self._distance_metric)
    output = []
    if (self._distance_metric == COSINE_DISTANCE and
        not self._clusters_l2_normalized()):
      # The cosine distance between normalized vectors x and y is the same as
      # 2 * squared_euclidean_distance. We are using this fact and reusing the
      # nearest_neighbors op.
      # TODO(ands): Support COSINE distance in nearest_neighbors and remove
      # this.
      with ops.colocate_with(clusters, ignore_existing=True):
        clusters = nn_impl.l2_normalize(clusters, axis=1)
    for inp, score in zip(inputs, scores):
      with ops.colocate_with(inp, ignore_existing=True):
        (indices,
         distances) = gen_clustering_ops.nearest_neighbors(inp, clusters, 1)
        if self._distance_metric == COSINE_DISTANCE:
          distances *= 0.5
        output.append(
            (score, array_ops.squeeze(distances,
                                      [-1]), array_ops.squeeze(indices, [-1])))
    return zip(*output)
Esempio n. 5
0
  def _infer_graph(self, inputs, clusters):
    """Maps input to closest cluster and the score.

    Args:
      inputs: list of input Tensors.
      clusters: Tensor of cluster centers.

    Returns:
      List of tuple, where each value in tuple corresponds to a value in inp.
      The tuple has following three elements:
      all_scores: distance of each input to each cluster center.
      score: distance of each input to closest cluster center.
      cluster_idx: index of cluster center closest to the corresponding input.
    """
    assert isinstance(inputs, list)
    # Pairwise distances are used only by transform(). In all other cases, this
    # sub-graph is not evaluated.
    scores = self._distance_graph(inputs, clusters, self._distance_metric)
    output = []
    if (self._distance_metric == COSINE_DISTANCE and
        not self._clusters_l2_normalized()):
      # The cosine distance between normalized vectors x and y is the same as
      # 2 * squared_euclidean_distance. We are using this fact and reusing the
      # nearest_neighbors op.
      # TODO(ands): Support COSINE distance in nearest_neighbors and remove
      # this.
      with ops.colocate_with(clusters, ignore_existing=True):
        clusters = nn_impl.l2_normalize(clusters, dim=1)
    for inp, score in zip(inputs, scores):
      with ops.colocate_with(inp, ignore_existing=True):
        (indices, distances) = gen_clustering_ops.nearest_neighbors(
            inp, clusters, 1)
        if self._distance_metric == COSINE_DISTANCE:
          distances *= 0.5
        output.append((score, array_ops.squeeze(distances, [-1]),
                       array_ops.squeeze(indices, [-1])))
    return zip(*output)
Esempio n. 6
0
 def false_fun():
     fg_labels = nearest_neighbors(fg_embeddings, centers, 1)[0][:, 0]
     fg_labels = tf.cast(fg_labels, tf.int32)
     return tf.scatter_nd(tf.where(fg_mask), fg_labels + 1,
                          tf.cast(tf.shape(fg_mask), tf.int64))