def false_fun(): fg_labels = nearest_neighbors(fg_embeddings, centers, 1)[0][:, 0] if dist_threshold is not None: dist = tf.norm(fg_embeddings - tf.gather(centers, fg_labels), axis=-1) fg_labels = tf.where(dist <= dist_threshold, fg_labels, -1) fg_labels = tf.cast(fg_labels, tf.int32) return tf.scatter_nd(tf.where(fg_mask), fg_labels + 1, tf.cast(tf.shape(fg_mask), tf.int64))
def _sample_kmc2_chain(): """Returns previous centers as well as a new center sampled using k-MC2.""" # Extract the subset from the underlying batch. start = i * self._kmc2_chain_length end = start + self._kmc2_chain_length subset = first_shard[start:end] # Compute the distances from points in the subset to previous centers. _, distances = gen_clustering_ops.nearest_neighbors( subset, self._cluster_centers, 1) # Sample index of new center using k-MC2 Markov chain. new_center_index = gen_clustering_ops.kmc2_chain_initialization( array_ops.squeeze(distances), self._seed) # Extract actual new center. newly_sampled_center = array_ops.reshape( subset[new_center_index], [1, -1]) # Return concatenation with previously sampled centers. if self._distance_metric == COSINE_DISTANCE: newly_sampled_center = nn_impl.l2_normalize( newly_sampled_center, dim=1) return array_ops.concat( [self._cluster_centers, newly_sampled_center], 0)
def _sample_kmc2_chain(): """Returns previous centers as well as a new center sampled using k-MC2. """ # Extract the subset from the underlying batch. start = i * self._kmc2_chain_length end = start + self._kmc2_chain_length subset = first_shard[start:end] # Compute the distances from points in the subset to previous centers. _, distances = gen_clustering_ops.nearest_neighbors( subset, self._cluster_centers, 1) # Sample index of new center using k-MC2 Markov chain. new_center_index = gen_clustering_ops.kmc2_chain_initialization( array_ops.squeeze(distances), self._seed) # Extract actual new center. newly_sampled_center = array_ops.reshape(subset[new_center_index], [1, -1]) # Return concatenation with previously sampled centers. if self._distance_metric == COSINE_DISTANCE: newly_sampled_center = nn_impl.l2_normalize( newly_sampled_center, dim=1) return array_ops.concat([self._cluster_centers, newly_sampled_center], 0)
def _infer_graph(self, inputs, clusters): """Maps input to closest cluster and the score. Args: inputs: list of input Tensors. clusters: Tensor of cluster centers. Returns: List of tuple, where each value in tuple corresponds to a value in inp. The tuple has following three elements: all_scores: distance of each input to each cluster center. score: distance of each input to closest cluster center. cluster_idx: index of cluster center closest to the corresponding input. """ assert isinstance(inputs, list) # Pairwise distances are used only by transform(). In all other cases, this # sub-graph is not evaluated. scores = self._distance_graph(inputs, clusters, self._distance_metric) output = [] if (self._distance_metric == COSINE_DISTANCE and not self._clusters_l2_normalized()): # The cosine distance between normalized vectors x and y is the same as # 2 * squared_euclidean_distance. We are using this fact and reusing the # nearest_neighbors op. # TODO(ands): Support COSINE distance in nearest_neighbors and remove # this. with ops.colocate_with(clusters, ignore_existing=True): clusters = nn_impl.l2_normalize(clusters, axis=1) for inp, score in zip(inputs, scores): with ops.colocate_with(inp, ignore_existing=True): (indices, distances) = gen_clustering_ops.nearest_neighbors(inp, clusters, 1) if self._distance_metric == COSINE_DISTANCE: distances *= 0.5 output.append( (score, array_ops.squeeze(distances, [-1]), array_ops.squeeze(indices, [-1]))) return zip(*output)
def _infer_graph(self, inputs, clusters): """Maps input to closest cluster and the score. Args: inputs: list of input Tensors. clusters: Tensor of cluster centers. Returns: List of tuple, where each value in tuple corresponds to a value in inp. The tuple has following three elements: all_scores: distance of each input to each cluster center. score: distance of each input to closest cluster center. cluster_idx: index of cluster center closest to the corresponding input. """ assert isinstance(inputs, list) # Pairwise distances are used only by transform(). In all other cases, this # sub-graph is not evaluated. scores = self._distance_graph(inputs, clusters, self._distance_metric) output = [] if (self._distance_metric == COSINE_DISTANCE and not self._clusters_l2_normalized()): # The cosine distance between normalized vectors x and y is the same as # 2 * squared_euclidean_distance. We are using this fact and reusing the # nearest_neighbors op. # TODO(ands): Support COSINE distance in nearest_neighbors and remove # this. with ops.colocate_with(clusters, ignore_existing=True): clusters = nn_impl.l2_normalize(clusters, dim=1) for inp, score in zip(inputs, scores): with ops.colocate_with(inp, ignore_existing=True): (indices, distances) = gen_clustering_ops.nearest_neighbors( inp, clusters, 1) if self._distance_metric == COSINE_DISTANCE: distances *= 0.5 output.append((score, array_ops.squeeze(distances, [-1]), array_ops.squeeze(indices, [-1]))) return zip(*output)
def false_fun(): fg_labels = nearest_neighbors(fg_embeddings, centers, 1)[0][:, 0] fg_labels = tf.cast(fg_labels, tf.int32) return tf.scatter_nd(tf.where(fg_mask), fg_labels + 1, tf.cast(tf.shape(fg_mask), tf.int64))