def _init_clusters(self): """Initialization of clusters. Returns: Tuple with following elements: cluster_centers: a Tensor for storing cluster centers cluster_counts: a Tensor for storing counts of points assigned to this cluster. This is used by mini-batch training. """ init = self._initial_clusters if init == RANDOM_INIT: clusters_init = self._init_clusters_random() elif init == KMEANS_PLUS_PLUS_INIT: # Points from only the first shard are used for initializing centers. # TODO(ands): Use all points. inp = self._inputs[0] if self._distance_metric == COSINE_DISTANCE: inp = nn_impl.l2_normalize(inp, dim=1) clusters_init = gen_clustering_ops.kmeans_plus_plus_initialization( inp, self._num_clusters, self._random_seed, self._kmeans_plus_plus_num_retries) elif callable(init): clusters_init = init(self._inputs, self._num_clusters) elif not isinstance(init, str): clusters_init = init else: assert False, 'Unsupported init passed to Kmeans %s' % str(init) if self._distance_metric == COSINE_DISTANCE and clusters_init is not None: clusters_init = nn_impl.l2_normalize(clusters_init, dim=1) clusters_init = clusters_init if clusters_init is not None else [] # TODO(agarwal): Locally cache cluster_centers on the worker to avoid # copying them each step. cluster_centers = variables.Variable(clusters_init, name='clusters', validate_shape=False) if self._use_mini_batch and self._mini_batch_steps_per_iteration > 1: # Copy of cluster centers actively updated each step according to # mini-batch update rule. cluster_centers_updated = variables.Variable(clusters_init, name='clusters_updated', validate_shape=False) # How many steps till we copy the updated clusters to cluster_centers. update_in_steps = variables.Variable(self._mini_batch_steps_per_iteration, dtype=dtypes.int64, name='update_in_steps') # Count of points assigned to cluster_centers_updated. cluster_counts = variables.Variable(array_ops.zeros([self._num_clusters], dtype=dtypes.int64)) else: cluster_centers_updated = cluster_centers update_in_steps = None cluster_counts = (variables.Variable(array_ops.ones([self._num_clusters], dtype=dtypes.int64)) if self._use_mini_batch else None) return (cluster_centers, cluster_counts, cluster_centers_updated, update_in_steps)
def _f(): # Note that there is a race condition here, so we do a best effort # updates here. We reset update_in_steps first so that other workers # don't duplicate the updates. Also we update cluster_center_vars # before resetting total_counts to avoid large updates to # cluster_centers_updated based on partially updated # cluster_center_vars. with ops.control_dependencies([ state_ops.assign(update_in_steps, self._mini_batch_steps_per_iteration - 1) ]): with ops.colocate_with( cluster_centers_updated, ignore_existing=True): if self._distance_metric == COSINE_DISTANCE: cluster_centers = nn_impl.l2_normalize( cluster_centers_updated, dim=1) else: cluster_centers = cluster_centers_updated with ops.colocate_with(cluster_centers_var, ignore_existing=True): with ops.control_dependencies( [state_ops.assign(cluster_centers_var, cluster_centers)]): with ops.colocate_with(None, ignore_existing=True): with ops.control_dependencies([ state_ops.assign(total_counts, array_ops.zeros_like(total_counts)) ]): return array_ops.identity(update_in_steps)
def _init_clusters(self): """Initialization of clusters. Returns: Tuple with following elements: cluster_centers: a Tensor for storing cluster centers cluster_counts: a Tensor for storing counts of points assigned to this cluster. This is used by mini-batch training. """ init = self._initial_clusters if init == RANDOM_INIT: clusters_init = self._init_clusters_random() elif init == KMEANS_PLUS_PLUS_INIT: # Points from only the first shard are used for initializing centers. # TODO(ands): Use all points. clusters_init = gen_clustering_ops.kmeans_plus_plus_initialization( self._inputs[0], self._num_clusters, self._random_seed, self._kmeans_plus_plus_num_retries) elif callable(init): clusters_init = init(self._inputs, self._num_clusters) elif not isinstance(init, str): clusters_init = init else: assert False, 'Unsupported init passed to Kmeans %s' % str(init) if self._distance_metric == COSINE_DISTANCE and clusters_init is not None: clusters_init = nn_impl.l2_normalize(clusters_init, dim=1) clusters_init = clusters_init if clusters_init is not None else [] cluster_centers = variables.Variable( clusters_init, name='clusters', validate_shape=False) cluster_counts = (variables.Variable( array_ops.ones( [self._num_clusters], dtype=dtypes.int64)) if self._use_mini_batch else None) return cluster_centers, cluster_counts
def _l2_normalize_data(cls, inputs): """Normalized the input data.""" output = [] for inp in inputs: with ops.colocate_with(inp): output.append(nn_impl.l2_normalize(inp, dim=1)) return output
def _full_batch_training_op(self, inputs, cluster_idx_list, cluster_centers): """Creates an op for training for full batch case. Args: inputs: list of input Tensors. cluster_idx_list: A vector (or list of vectors). Each element in the vector corresponds to an input row in 'inp' and specifies the cluster id corresponding to the input. cluster_centers: Tensor Ref of cluster centers. Returns: An op for doing an update of mini-batch k-means. """ cluster_sums = [] cluster_counts = [] epsilon = constant_op.constant(1e-6, dtype=inputs[0].dtype) for inp, cluster_idx in zip(inputs, cluster_idx_list): with ops.colocate_with(inp): cluster_sums.append( math_ops.unsorted_segment_sum(inp, cluster_idx, self._num_clusters)) cluster_counts.append( math_ops.unsorted_segment_sum( array_ops.reshape( array_ops.ones( array_ops.reshape(array_ops.shape(inp)[0], [-1])), [-1, 1]), cluster_idx, self._num_clusters)) with ops.colocate_with(cluster_centers): new_clusters_centers = math_ops.add_n(cluster_sums) / (math_ops.cast( math_ops.add_n(cluster_counts), cluster_sums[0].dtype) + epsilon) if self._clusters_l2_normalized(): new_clusters_centers = nn_impl.l2_normalize(new_clusters_centers, dim=1) return state_ops.assign(cluster_centers, new_clusters_centers)
def _sample_random(): """Returns a random point as a cluster center.""" # By assumption the batch is reshuffled and _sample_random is always # called for i=0. Hence, we simply return the first point. new_center = array_ops.reshape(first_shard[0], [1, -1]) if self._distance_metric == COSINE_DISTANCE: new_center = nn_impl.l2_normalize(new_center, dim=1) return new_center
def _initialize_clusters(self, cluster_centers, cluster_centers_initialized, cluster_centers_updated): """Returns an op to initialize the cluster centers.""" init = self._initial_clusters if init == RANDOM_INIT: clusters_init = self._init_clusters_random() elif init == KMEANS_PLUS_PLUS_INIT: # Points from only the first shard are used for initializing centers. # TODO(ands): Use all points. inp = self._inputs[0] if self._distance_metric == COSINE_DISTANCE: inp = nn_impl.l2_normalize(inp, dim=1) clusters_init = gen_clustering_ops.kmeans_plus_plus_initialization( inp, self._num_clusters, self._random_seed, self._kmeans_plus_plus_num_retries) elif callable(init): clusters_init = init(self._inputs, self._num_clusters) elif not isinstance(init, str): clusters_init = init else: assert False, 'Unsupported init passed to Kmeans %s' % str(init) if self._distance_metric == COSINE_DISTANCE and clusters_init is not None: clusters_init = nn_impl.l2_normalize(clusters_init, dim=1) with ops.colocate_with(cluster_centers_initialized): initialized = control_flow_ops.with_dependencies( [clusters_init], array_ops.identity(cluster_centers_initialized)) with ops.colocate_with(cluster_centers): assign_centers = state_ops.assign(cluster_centers, clusters_init, validate_shape=False) if cluster_centers_updated != cluster_centers: assign_centers = control_flow_ops.group( assign_centers, state_ops.assign(cluster_centers_updated, clusters_init, validate_shape=False)) assign_centers = control_flow_ops.with_dependencies( [assign_centers], state_ops.assign(cluster_centers_initialized, True)) return control_flow_ops.cond(initialized, control_flow_ops.no_op, lambda: assign_centers).op
def testL2NormalizeDimArray(self): x_shape = [20, 7, 3] np.random.seed(1) x_np = np.random.random_sample(x_shape).astype(np.float32) dim = [1, 2] y_np = self._l2Normalize(x_np, dim) x_tf = constant_op.constant(x_np, name="x") y_tf = nn_impl.l2_normalize(x_tf, dim) self.assertAllClose(y_np, self.evaluate(y_tf))
def _kmeans_plus_plus(self): # Points from only the first shard are used for initializing centers. # TODO(ands): Use all points. inp = self._inputs[0] if self._distance_metric == COSINE_DISTANCE: inp = nn_impl.l2_normalize(inp, dim=1) return gen_clustering_ops.kmeans_plus_plus_initialization( inp, math_ops.cast(self._num_remaining, dtypes.int64), self._seed, self._kmeans_plus_plus_num_retries)
def testL2Normalize(self): x_shape = [20, 7, 3] np.random.seed(1) x_np = np.random.random_sample(x_shape).astype(np.float32) for dim in range(len(x_shape)): y_np = self._l2Normalize(x_np, dim) with self.test_session(): x_tf = constant_op.constant(x_np, name="x") y_tf = nn_impl.l2_normalize(x_tf, dim) self.assertAllClose(y_np, y_tf.eval())
def testL2NormalizeGradient(self): x_shape = [20, 7, 3] np.random.seed(1) x_np = np.random.random_sample(x_shape).astype(np.float64) for dim in range(len(x_shape)): with self.test_session(): x_tf = constant_op.constant(x_np, name="x") y_tf = nn_impl.l2_normalize(x_tf, dim) err = gradient_checker.compute_gradient_error(x_tf, x_shape, y_tf, x_shape) print("L2Normalize gradient err = %g " % err) self.assertLess(err, 1e-4)
def training_graph(self): """Generate a training graph for kmeans algorithm. Returns: A tuple consisting of: all_scores: A matrix (or list of matrices) of dimensions (num_input, num_clusters) where the value is the distance of an input vector and a cluster center. cluster_idx: A vector (or list of vectors). Each element in the vector corresponds to an input row in 'inp' and specifies the cluster id corresponding to the input. scores: Similar to cluster_idx but specifies the distance to the assigned cluster instead. cluster_centers_initialized: scalar indicating whether clusters have been initialized. init_op: an op to initialize the clusters. training_op: an op that runs an iteration of training. """ # Implementation of kmeans. inputs = self._inputs (cluster_centers_var, cluster_centers_initialized, total_counts, cluster_centers_updated, update_in_steps) = self._create_variables() init_op = self._initialize_clusters(cluster_centers_var, cluster_centers_initialized, cluster_centers_updated) cluster_centers = cluster_centers_var if self._distance_metric == COSINE_DISTANCE: inputs = self._l2_normalize_data(inputs) if not self._clusters_l2_normalized(): cluster_centers = nn_impl.l2_normalize(cluster_centers, dim=1) all_scores, scores, cluster_idx = self._infer_graph(inputs, cluster_centers) if self._use_mini_batch: sync_updates_op = self._mini_batch_sync_updates_op( update_in_steps, cluster_centers_var, cluster_centers_updated, total_counts) assert sync_updates_op is not None with ops.control_dependencies([sync_updates_op]): training_op = self._mini_batch_training_op( inputs, cluster_idx, cluster_centers_updated, total_counts) else: assert cluster_centers == cluster_centers_var training_op = self._full_batch_training_op(inputs, cluster_idx, cluster_centers_var) return (all_scores, cluster_idx, scores, cluster_centers_initialized, init_op, training_op)
def _compute_cosine_distance(cls, inputs, clusters, inputs_normalized=True): """Computes cosine distance between each input and each cluster center. Args: inputs: list of input Tensor. clusters: cluster Tensor inputs_normalized: if True, it assumes that inp and clusters are normalized and computes the dot product which is equivalent to the cosine distance. Else it L2 normalizes the inputs first. Returns: list of Tensors, where each element corresponds to each element in inp. The value is the distance of each row to all the cluster centers. """ output = [] if not inputs_normalized: with ops.colocate_with(clusters): clusters = nn_impl.l2_normalize(clusters, dim=1) for inp in inputs: with ops.colocate_with(inp): if not inputs_normalized: inp = nn_impl.l2_normalize(inp, dim=1) output.append(1 - math_ops.matmul(inp, clusters, transpose_b=True)) return output
def _add_new_centers(self): """Adds some centers and returns the number of centers remaining.""" new_centers = self._choose_initial_centers() if self._distance_metric == COSINE_DISTANCE: new_centers = nn_impl.l2_normalize(new_centers, dim=1) # If cluster_centers is empty, it doesn't have the right shape for concat. all_centers = control_flow_ops.cond( math_ops.equal(self._num_selected, 0), lambda: new_centers, lambda: array_ops.concat([self._cluster_centers, new_centers], 0)) # TODO(ccolby): De-dupe all_centers? a = state_ops.assign( self._cluster_centers, all_centers, validate_shape=False) if self._cluster_centers_updated is not self._cluster_centers: a = state_ops.assign( self._cluster_centers_updated, a, validate_shape=False) return self._num_clusters - array_ops.shape(a)[0]
def _sample_kmc2_chain(): """Returns previous centers as well as a new center sampled using k-MC2. """ # Extract the subset from the underlying batch. start = i * self._kmc2_chain_length end = start + self._kmc2_chain_length subset = first_shard[start:end] # Compute the distances from points in the subset to previous centers. _, distances = gen_clustering_ops.nearest_neighbors( subset, self._cluster_centers, 1) # Sample index of new center using k-MC2 Markov chain. new_center_index = gen_clustering_ops.kmc2_chain_initialization( array_ops.squeeze(distances), self._random_seed) # Extract actual new center. newly_sampled_center = array_ops.reshape(subset[new_center_index], [1, -1]) # Return concatenation with previously sampled centers. if self._distance_metric == COSINE_DISTANCE: newly_sampled_center = nn_impl.l2_normalize( newly_sampled_center, dim=1) return array_ops.concat([self._cluster_centers, newly_sampled_center], 0)
def _infer_graph(self, inputs, clusters): """Maps input to closest cluster and the score. Args: inputs: list of input Tensors. clusters: Tensor of cluster centers. Returns: List of tuple, where each value in tuple corresponds to a value in inp. The tuple has following three elements: all_scores: distance of each input to each cluster center. score: distance of each input to closest cluster center. cluster_idx: index of cluster center closest to the corresponding input. """ assert isinstance(inputs, list) # Pairwise distances are used only by transform(). In all other cases, this # sub-graph is not evaluated. scores = self._distance_graph(inputs, clusters, self._distance_metric) output = [] if (self._distance_metric == COSINE_DISTANCE and not self._clusters_l2_normalized()): # The cosine distance between normalized vectors x and y is the same as # 2 * squared_euclidian_distance. We are using this fact and reusing the # nearest_neighbors op. # TODO(ands): Support COSINE distance in nearest_neighbors and remove # this. with ops.colocate_with(clusters): clusters = nn_impl.l2_normalize(clusters, dim=1) for inp, score in zip(inputs, scores): with ops.colocate_with(inp): (indices, distances) = gen_clustering_ops.nearest_neighbors(inp, clusters, 1) if self._distance_metric == COSINE_DISTANCE: distances *= 0.5 output.append( (score, array_ops.squeeze(distances), array_ops.squeeze(indices))) return zip(*output)
def _infer_graph(self, inputs, clusters): """Maps input to closest cluster and the score. Args: inputs: list of input Tensors. clusters: Tensor of cluster centers. Returns: List of tuple, where each value in tuple corresponds to a value in inp. The tuple has following three elements: all_scores: distance of each input to each cluster center. score: distance of each input to closest cluster center. cluster_idx: index of cluster center closest to the corresponding input. """ assert isinstance(inputs, list) # Pairwise distances are used only by transform(). In all other cases, this # sub-graph is not evaluated. scores = self._distance_graph(inputs, clusters, self._distance_metric) output = [] if (self._distance_metric == COSINE_DISTANCE and not self._clusters_l2_normalized()): # The cosine distance between normalized vectors x and y is the same as # 2 * squared_euclidian_distance. We are using this fact and reusing the # nearest_neighbors op. # TODO(ands): Support COSINE distance in nearest_neighbors and remove # this. with ops.colocate_with(clusters): clusters = nn_impl.l2_normalize(clusters, dim=1) for inp, score in zip(inputs, scores): with ops.colocate_with(inp): (indices, distances) = gen_clustering_ops.nearest_neighbors( inp, clusters, 1) if self._distance_metric == COSINE_DISTANCE: distances *= 0.5 output.append((score, array_ops.squeeze(distances), array_ops.squeeze(indices))) return zip(*output)
def _full_batch_training_op(self, inputs, cluster_idx_list, cluster_centers): """Creates an op for training for full batch case. Args: inputs: list of input Tensors. cluster_idx_list: A vector (or list of vectors). Each element in the vector corresponds to an input row in 'inp' and specifies the cluster id corresponding to the input. cluster_centers: Tensor Ref of cluster centers. Returns: An op for doing an update of mini-batch k-means. """ cluster_sums = [] cluster_counts = [] epsilon = constant_op.constant(1e-6, dtype=inputs[0].dtype) for inp, cluster_idx in zip(inputs, cluster_idx_list): with ops.colocate_with(inp): cluster_sums.append( math_ops.unsorted_segment_sum(inp, cluster_idx, self._num_clusters)) cluster_counts.append( math_ops.unsorted_segment_sum( array_ops.reshape( array_ops.ones( array_ops.reshape( array_ops.shape(inp)[0], [-1])), [-1, 1]), cluster_idx, self._num_clusters)) with ops.colocate_with(cluster_centers): new_clusters_centers = math_ops.add_n(cluster_sums) / ( math_ops.cast(math_ops.add_n(cluster_counts), cluster_sums[0].dtype) + epsilon) if self._clusters_l2_normalized(): new_clusters_centers = nn_impl.l2_normalize( new_clusters_centers, dim=1) return state_ops.assign(cluster_centers, new_clusters_centers)
def training_graph(self): """Generate a training graph for kmeans algorithm. Returns: A tuple consisting of: all_scores: A matrix (or list of matrices) of dimensions (num_input, num_clusters) where the value is the distance of an input vector and a cluster center. cluster_idx: A vector (or list of vectors). Each element in the vector corresponds to an input row in 'inp' and specifies the cluster id corresponding to the input. scores: Similar to cluster_idx but specifies the distance to the assigned cluster instead. training_op: an op that runs an iteration of training. """ # Implementation of kmeans. inputs = self._inputs cluster_centers_var, total_counts = self._init_clusters() cluster_centers = cluster_centers_var if self._distance_metric == COSINE_DISTANCE: inputs = self._l2_normalize_data(inputs) if not self._clusters_l2_normalized(): cluster_centers = nn_impl.l2_normalize(cluster_centers, dim=1) all_scores, scores, cluster_idx = self._infer_graph( inputs, cluster_centers) if self._use_mini_batch: training_op = self._mini_batch_training_op(inputs, cluster_idx, cluster_centers, cluster_centers_var, total_counts) else: assert cluster_centers == cluster_centers_var training_op = self._full_batch_training_op(inputs, cluster_idx, cluster_centers_var) return all_scores, cluster_idx, scores, training_op
def training_graph(self): """Generate a training graph for kmeans algorithm. Returns: A tuple consisting of: all_scores: A matrix (or list of matrices) of dimensions (num_input, num_clusters) where the value is the distance of an input vector and a cluster center. cluster_idx: A vector (or list of vectors). Each element in the vector corresponds to an input row in 'inp' and specifies the cluster id corresponding to the input. scores: Similar to cluster_idx but specifies the distance to the assigned cluster instead. training_op: an op that runs an iteration of training. """ # Implementation of kmeans. inputs = self._inputs cluster_centers_var, total_counts = self._init_clusters() cluster_centers = cluster_centers_var if self._distance_metric == COSINE_DISTANCE: inputs = self._l2_normalize_data(inputs) if not self._clusters_l2_normalized(): cluster_centers = nn_impl.l2_normalize(cluster_centers, dim=1) all_scores, scores, cluster_idx = self._infer_graph(inputs, cluster_centers) if self._use_mini_batch: training_op = self._mini_batch_training_op(inputs, cluster_idx, cluster_centers, cluster_centers_var, total_counts) else: assert cluster_centers == cluster_centers_var training_op = self._full_batch_training_op(inputs, cluster_idx, cluster_centers_var) return all_scores, cluster_idx, scores, training_op
def call(self, inputs, training=None): if self.lr_mul == 1.0: W = self.coeff * self.kernel else: @custom_gradient def lr_multiplier(x): y = array_ops.identity(x) def grad(dy): return dy * self.lr_mul return y, grad W = lr_multiplier(self.coeff * self.kernel) training = self._get_training_value(training) # Update singular vector by power iteration W_T = array_ops.transpose(W) u = array_ops.identity(self.u) for i in range(self.power_iter): v = nn_impl.l2_normalize(math_ops.matmul(u, W)) # 1 x filters u = nn_impl.l2_normalize(math_ops.matmul(v, W_T)) # Spectral Normalization sigma_W = math_ops.matmul(math_ops.matmul(u, W), array_ops.transpose(v)) # Backprop doesn't need in power iteration sigma_W = array_ops.stop_gradient(sigma_W) W_bar = W / array_ops.squeeze(sigma_W) # Assign new singular vector training_value = tf_utils.constant_value(training) if training_value is not False: def u_update(): def true_branch(): return self._assign_singular_vector(self.u, u) def false_branch(): return self.u return tf_utils.smart_cond(training, true_branch, false_branch) self.add_update(u_update) # normal Dense using W_bar inputs = ops.convert_to_tensor(inputs) rank = common_shapes.rank(inputs) if rank > 2: # Broadcasting is required for the inputs. outputs = standard_ops.tensordot(inputs, W_bar, [[rank - 1], [0]]) # Reshape the output back to the original ndim of the input. if not context.executing_eagerly(): shape = inputs.shape.as_list() output_shape = shape[:-1] + [self.units] outputs.set_shape(output_shape) else: inputs = math_ops.cast(inputs, self._compute_dtype) outputs = math_ops.mat_mul(inputs, W_bar) if self.use_bias: outputs = nn.bias_add(outputs, self.bias) if self.activation is not None: return self.activation(outputs) # pylint: disable=not-callable return outputs
def _normalize(self, weight, name): output_size = weight.get_shape().as_list()[1] g = vs.get_variable(name, [output_size], dtype=weight.dtype) return nn_impl.l2_normalize(weight, dim=0) * g
def __householder_rotation(self, x): u = nn_impl.l2_normalize(self.__e1 - self._loc, axis=-1) z = x - 2 * math_ops.reduce_sum(x * u, axis=-1, keepdims=True) * u return z
def training_graph(self): """Generate a training graph for kmeans algorithm. This returns, among other things, an op that chooses initial centers (init_op), a boolean variable that is set to True when the initial centers are chosen (cluster_centers_initialized), and an op to perform either an entire Lloyd iteration or a mini-batch of a Lloyd iteration (training_op). The caller should use these components as follows. A single worker should execute init_op multiple times until cluster_centers_initialized becomes True. Then multiple workers may execute training_op any number of times. Returns: A tuple consisting of: all_scores: A matrix (or list of matrices) of dimensions (num_input, num_clusters) where the value is the distance of an input vector and a cluster center. cluster_idx: A vector (or list of vectors). Each element in the vector corresponds to an input row in 'inp' and specifies the cluster id corresponding to the input. scores: Similar to cluster_idx but specifies the distance to the assigned cluster instead. cluster_centers_initialized: scalar indicating whether clusters have been initialized. init_op: an op to initialize the clusters. training_op: an op that runs an iteration of training. """ # Implementation of kmeans. if (isinstance(self._initial_clusters, str) or callable(self._initial_clusters)): initial_clusters = self._initial_clusters num_clusters = ops.convert_to_tensor(self._num_clusters) else: initial_clusters = ops.convert_to_tensor(self._initial_clusters) num_clusters = array_ops.shape(initial_clusters)[0] inputs = self._inputs (cluster_centers_var, cluster_centers_initialized, total_counts, cluster_centers_updated, update_in_steps) = self._create_variables(num_clusters) init_op = _InitializeClustersOpFactory( self._inputs, num_clusters, initial_clusters, self._distance_metric, self._random_seed, self._kmeans_plus_plus_num_retries, self._kmc2_chain_length, cluster_centers_var, cluster_centers_updated, cluster_centers_initialized).op() cluster_centers = cluster_centers_var if self._distance_metric == COSINE_DISTANCE: inputs = self._l2_normalize_data(inputs) if not self._clusters_l2_normalized(): cluster_centers = nn_impl.l2_normalize(cluster_centers, dim=1) all_scores, scores, cluster_idx = self._infer_graph( inputs, cluster_centers) if self._use_mini_batch: sync_updates_op = self._mini_batch_sync_updates_op( update_in_steps, cluster_centers_var, cluster_centers_updated, total_counts) assert sync_updates_op is not None with ops.control_dependencies([sync_updates_op]): training_op = self._mini_batch_training_op( inputs, cluster_idx, cluster_centers_updated, total_counts) else: assert cluster_centers == cluster_centers_var training_op = self._full_batch_training_op(inputs, num_clusters, cluster_idx, cluster_centers_var) return (all_scores, cluster_idx, scores, cluster_centers_initialized, init_op, training_op)
def _compute_weights(self): """Generate weights by combining the direction of weight vector with it's norm """ with variable_scope.variable_scope('compute_weights'): self.layer.W = nn_impl.l2_normalize( self.layer.v, axis=self.norm_axes) * self.layer.g
def _compute_weights(self): """Generate weights by combining the direction of weight vector with its norm """ with name_scope('compute_weights'): self.layer.kernel = nn_impl.l2_normalize( self.layer.v, axis=self.kernel_norm_axes) * self.layer.g
def training_graph(self): """Generate a training graph for kmeans algorithm. This returns, among other things, an op that chooses initial centers (init_op), a boolean variable that is set to True when the initial centers are chosen (cluster_centers_initialized), and an op to perform either an entire Lloyd iteration or a mini-batch of a Lloyd iteration (training_op). The caller should use these components as follows. A single worker should execute init_op multiple times until cluster_centers_initialized becomes True. Then multiple workers may execute training_op any number of times. Returns: A tuple consisting of: all_scores: A matrix (or list of matrices) of dimensions (num_input, num_clusters) where the value is the distance of an input vector and a cluster center. cluster_idx: A vector (or list of vectors). Each element in the vector corresponds to an input row in 'inp' and specifies the cluster id corresponding to the input. scores: Similar to cluster_idx but specifies the distance to the assigned cluster instead. cluster_centers_initialized: scalar indicating whether clusters have been initialized. init_op: an op to initialize the clusters. training_op: an op that runs an iteration of training. """ # Implementation of kmeans. if (isinstance(self._initial_clusters, str) or callable(self._initial_clusters)): initial_clusters = self._initial_clusters num_clusters = ops.convert_to_tensor(self._num_clusters) else: initial_clusters = ops.convert_to_tensor(self._initial_clusters) num_clusters = array_ops.shape(initial_clusters)[0] inputs = self._inputs (cluster_centers_var, cluster_centers_initialized, total_counts, cluster_centers_updated, update_in_steps) = self._create_variables(num_clusters) init_op = _InitializeClustersOpFactory( self._inputs, num_clusters, initial_clusters, self._distance_metric, self._random_seed, self._kmeans_plus_plus_num_retries, self._kmc2_chain_length, cluster_centers_var, cluster_centers_updated, cluster_centers_initialized).op() cluster_centers = cluster_centers_var if self._distance_metric == COSINE_DISTANCE: inputs = self._l2_normalize_data(inputs) if not self._clusters_l2_normalized(): cluster_centers = nn_impl.l2_normalize(cluster_centers, dim=1) all_scores, scores, cluster_idx = self._infer_graph(inputs, cluster_centers) if self._use_mini_batch: sync_updates_op = self._mini_batch_sync_updates_op( update_in_steps, cluster_centers_var, cluster_centers_updated, total_counts) assert sync_updates_op is not None with ops.control_dependencies([sync_updates_op]): training_op = self._mini_batch_training_op( inputs, cluster_idx, cluster_centers_updated, total_counts) else: assert cluster_centers == cluster_centers_var training_op = self._full_batch_training_op( inputs, num_clusters, cluster_idx, cluster_centers_var) return (all_scores, cluster_idx, scores, cluster_centers_initialized, init_op, training_op)
def _sample_n(self, n, seed=0): return nn_impl.l2_normalize(random_ops.random_normal( shape=array_ops.concat(([n], [self._dim + 1]), 0), dtype=self.dtype, seed=seed), axis=-1)
def cosine_similarity(tensor: Tensor) -> Tensor: """ Calculates the cosine similarity of a 2D array, with l2 normalization. """ l2_normalize(tensor, axis=1) return matmul(tensor, transpose(tensor))
def build(self, input_shape): input_shape = tensor_shape.TensorShape( input_shape) # get the input shape if self.data_format == 'channels_first': # judge the organize method of input_shape channel_axis = 1 else: channel_axis = -1 if input_shape.dims[channel_axis].value is None: raise ValueError( 'The channel dimension of the inputs should be defined. Found `None`.' ) input_dim = int(input_shape[channel_axis]) # get input dimension kernel_shape = self.kernel_size + (input_dim, self.filters ) # define kernel_shape kernel = self.add_weight(name='kernel', shape=kernel_shape, initializer=self.kernel_initializer, regularizer=self.kernel_regularizer, constraint=self.kernel_constraint, trainable=True, dtype=self.dtype) # weight normalization if self.weight_norm: self.g = self.add_weight(name='wn/g', shape=(self.filters, ), initializer=tf.ones_initializer(), trainable=True, dtype=kernel.dtype) self.kernel = tf.reshape( self.g, [1, 1, self.filters]) * nn_impl.l2_normalize( kernel, [0, 1]) else: self.kernel = kernel if self.use_bias: self.bias = self.add_weight(name='bias', shape=(self.filters, ), initializer=self.bias_initializer, regularizer=self.bias_regularizer, constraint=self.bias_constraint, trainable=True, dtype=self.dtype) else: self.bias = None self.input_spec = InputSpec(ndim=self.rank + 2, axes={channel_axis: input_dim}) if self.padding == 'causal': op_padding = 'valid' else: op_padding = self.padding if not isinstance(op_padding, (list, tuple)): op_padding = op_padding.upper() self._convolution_op = nn_ops.Convolution( input_shape, filter_shape=self.kernel.get_shape(), dilation_rate=self.dilation_rate, strides=self.strides, padding=op_padding, data_format=conv_utils.convert_data_format(self.data_format, self.rank + 2)) self.built = True
def call(self, inputs, training=None): if self.lr_mul == 1.0: kernel = self.coeff * self.kernel else: @custom_gradient def lr_multiplier(x): y = array_ops.identity(x) def grad(dy): return dy * self.lr_mul return y, grad kernel = lr_multiplier(self.coeff * self.kernel) training = self._get_training_value(training) # Update singular vector by power iteration if self.data_format == 'channels_first': W_T = array_ops.reshape(kernel, (self.filters, -1)) W = array_ops.transpose(W_T) else: W = array_ops.reshape(kernel, (-1, self.filters)) W_T = array_ops.transpose(W) u = array_ops.identity(self.u) for i in range(self.power_iter): v = nn_impl.l2_normalize(math_ops.matmul(u, W)) # 1 x filters u = nn_impl.l2_normalize(math_ops.matmul(v, W_T)) # Spectral Normalization sigma_W = math_ops.matmul(math_ops.matmul(u, W), array_ops.transpose(v)) # Backprop doesn't need in power iteration sigma_W = array_ops.stop_gradient(sigma_W) W_bar = kernel / array_ops.squeeze(sigma_W) # Assign new singular vector training_value = tf_utils.constant_value(training) if training_value is not False: def u_update(): def true_branch(): return self._assign_singular_vector(self.u, u) def false_branch(): return self.u return tf_utils.smart_cond(training, true_branch, false_branch) self.add_update(u_update) # normal convolution using W_bar outputs = self._convolution_op(inputs, W_bar) if self.use_bias: if self.data_format == 'channels_first': if self.rank == 1: # nn.bias_add does not accept a 1D input tensor. bias = array_ops.reshape(self.bias, (1, self.filters, 1)) outputs += bias else: outputs = nn.bias_add(outputs, self.bias, data_format='NCHW') else: outputs = nn.bias_add(outputs, self.bias, data_format='NHWC') if self.activation is not None: return self.activation(outputs) return outputs