Esempio n. 1
0
  def _init_clusters(self):
    """Initialization of clusters.

    Returns:
    Tuple with following elements:
      cluster_centers: a Tensor for storing cluster centers
      cluster_counts: a Tensor for storing counts of points assigned to this
        cluster. This is used by mini-batch training.
    """
    init = self._initial_clusters
    if init == RANDOM_INIT:
      clusters_init = self._init_clusters_random()
    elif init == KMEANS_PLUS_PLUS_INIT:
      # Points from only the first shard are used for initializing centers.
      # TODO(ands): Use all points.
      inp = self._inputs[0]
      if self._distance_metric == COSINE_DISTANCE:
        inp = nn_impl.l2_normalize(inp, dim=1)
      clusters_init = gen_clustering_ops.kmeans_plus_plus_initialization(
          inp, self._num_clusters, self._random_seed,
          self._kmeans_plus_plus_num_retries)
    elif callable(init):
      clusters_init = init(self._inputs, self._num_clusters)
    elif not isinstance(init, str):
      clusters_init = init
    else:
      assert False, 'Unsupported init passed to Kmeans %s' % str(init)
    if self._distance_metric == COSINE_DISTANCE and clusters_init is not None:
      clusters_init = nn_impl.l2_normalize(clusters_init, dim=1)
    clusters_init = clusters_init if clusters_init is not None else []
    # TODO(agarwal): Locally cache cluster_centers on the worker to avoid
    # copying them each step.
    cluster_centers = variables.Variable(clusters_init,
                                         name='clusters',
                                         validate_shape=False)
    if self._use_mini_batch and self._mini_batch_steps_per_iteration > 1:
      # Copy of cluster centers actively updated each step according to
      # mini-batch update rule.
      cluster_centers_updated = variables.Variable(clusters_init,
                                                   name='clusters_updated',
                                                   validate_shape=False)
      # How many steps till we copy the updated clusters to cluster_centers.
      update_in_steps = variables.Variable(self._mini_batch_steps_per_iteration,
                                           dtype=dtypes.int64,
                                           name='update_in_steps')
      # Count of points assigned to cluster_centers_updated.
      cluster_counts = variables.Variable(array_ops.zeros([self._num_clusters],
                                                          dtype=dtypes.int64))
    else:
      cluster_centers_updated = cluster_centers
      update_in_steps = None
      cluster_counts = (variables.Variable(array_ops.ones([self._num_clusters],
                                                          dtype=dtypes.int64))
                        if self._use_mini_batch else None)
    return (cluster_centers, cluster_counts,
            cluster_centers_updated, update_in_steps)
Esempio n. 2
0
 def _f():
   # Note that there is a race condition here, so we do a best effort
   # updates here. We reset update_in_steps first so that other workers
   # don't duplicate the updates. Also we update cluster_center_vars
   # before resetting total_counts to avoid large updates to
   # cluster_centers_updated based on partially updated
   # cluster_center_vars.
   with ops.control_dependencies([
       state_ops.assign(update_in_steps,
                        self._mini_batch_steps_per_iteration - 1)
   ]):
     with ops.colocate_with(
         cluster_centers_updated, ignore_existing=True):
       if self._distance_metric == COSINE_DISTANCE:
         cluster_centers = nn_impl.l2_normalize(
             cluster_centers_updated, dim=1)
       else:
         cluster_centers = cluster_centers_updated
     with ops.colocate_with(cluster_centers_var, ignore_existing=True):
       with ops.control_dependencies(
           [state_ops.assign(cluster_centers_var, cluster_centers)]):
         with ops.colocate_with(None, ignore_existing=True):
           with ops.control_dependencies([
               state_ops.assign(total_counts,
                                array_ops.zeros_like(total_counts))
           ]):
             return array_ops.identity(update_in_steps)
Esempio n. 3
0
  def _init_clusters(self):
    """Initialization of clusters.

    Returns:
    Tuple with following elements:
      cluster_centers: a Tensor for storing cluster centers
      cluster_counts: a Tensor for storing counts of points assigned to this
        cluster. This is used by mini-batch training.
    """
    init = self._initial_clusters
    if init == RANDOM_INIT:
      clusters_init = self._init_clusters_random()
    elif init == KMEANS_PLUS_PLUS_INIT:
      # Points from only the first shard are used for initializing centers.
      # TODO(ands): Use all points.
      clusters_init = gen_clustering_ops.kmeans_plus_plus_initialization(
          self._inputs[0], self._num_clusters, self._random_seed,
          self._kmeans_plus_plus_num_retries)
    elif callable(init):
      clusters_init = init(self._inputs, self._num_clusters)
    elif not isinstance(init, str):
      clusters_init = init
    else:
      assert False, 'Unsupported init passed to Kmeans %s' % str(init)
    if self._distance_metric == COSINE_DISTANCE and clusters_init is not None:
      clusters_init = nn_impl.l2_normalize(clusters_init, dim=1)
    clusters_init = clusters_init if clusters_init is not None else []
    cluster_centers = variables.Variable(
        clusters_init, name='clusters', validate_shape=False)
    cluster_counts = (variables.Variable(
        array_ops.ones(
            [self._num_clusters], dtype=dtypes.int64)) if self._use_mini_batch
                      else None)
    return cluster_centers, cluster_counts
Esempio n. 4
0
 def _l2_normalize_data(cls, inputs):
   """Normalized the input data."""
   output = []
   for inp in inputs:
     with ops.colocate_with(inp):
       output.append(nn_impl.l2_normalize(inp, dim=1))
   return output
Esempio n. 5
0
  def _full_batch_training_op(self, inputs, cluster_idx_list, cluster_centers):
    """Creates an op for training for full batch case.

    Args:
      inputs: list of input Tensors.
      cluster_idx_list: A vector (or list of vectors). Each element in the
        vector corresponds to an input row in 'inp' and specifies the cluster id
        corresponding to the input.
      cluster_centers: Tensor Ref of cluster centers.

    Returns:
      An op for doing an update of mini-batch k-means.
    """
    cluster_sums = []
    cluster_counts = []
    epsilon = constant_op.constant(1e-6, dtype=inputs[0].dtype)
    for inp, cluster_idx in zip(inputs, cluster_idx_list):
      with ops.colocate_with(inp):
        cluster_sums.append(
            math_ops.unsorted_segment_sum(inp, cluster_idx, self._num_clusters))
        cluster_counts.append(
            math_ops.unsorted_segment_sum(
                array_ops.reshape(
                    array_ops.ones(
                        array_ops.reshape(array_ops.shape(inp)[0], [-1])),
                    [-1, 1]), cluster_idx, self._num_clusters))
    with ops.colocate_with(cluster_centers):
      new_clusters_centers = math_ops.add_n(cluster_sums) / (math_ops.cast(
          math_ops.add_n(cluster_counts), cluster_sums[0].dtype) + epsilon)
      if self._clusters_l2_normalized():
        new_clusters_centers = nn_impl.l2_normalize(new_clusters_centers, dim=1)
    return state_ops.assign(cluster_centers, new_clusters_centers)
Esempio n. 6
0
 def _sample_random():
   """Returns a random point as a cluster center."""
   # By assumption the batch is reshuffled and _sample_random is always
   # called for i=0. Hence, we simply return the first point.
   new_center = array_ops.reshape(first_shard[0], [1, -1])
   if self._distance_metric == COSINE_DISTANCE:
     new_center = nn_impl.l2_normalize(new_center, dim=1)
   return new_center
Esempio n. 7
0
  def _initialize_clusters(self,
                           cluster_centers,
                           cluster_centers_initialized,
                           cluster_centers_updated):
    """Returns an op to initialize the cluster centers."""

    init = self._initial_clusters
    if init == RANDOM_INIT:
      clusters_init = self._init_clusters_random()
    elif init == KMEANS_PLUS_PLUS_INIT:
      # Points from only the first shard are used for initializing centers.
      # TODO(ands): Use all points.
      inp = self._inputs[0]
      if self._distance_metric == COSINE_DISTANCE:
        inp = nn_impl.l2_normalize(inp, dim=1)
      clusters_init = gen_clustering_ops.kmeans_plus_plus_initialization(
          inp, self._num_clusters, self._random_seed,
          self._kmeans_plus_plus_num_retries)
    elif callable(init):
      clusters_init = init(self._inputs, self._num_clusters)
    elif not isinstance(init, str):
      clusters_init = init
    else:
      assert False, 'Unsupported init passed to Kmeans %s' % str(init)
    if self._distance_metric == COSINE_DISTANCE and clusters_init is not None:
      clusters_init = nn_impl.l2_normalize(clusters_init, dim=1)

    with ops.colocate_with(cluster_centers_initialized):
      initialized = control_flow_ops.with_dependencies(
          [clusters_init],
          array_ops.identity(cluster_centers_initialized))
    with ops.colocate_with(cluster_centers):
      assign_centers = state_ops.assign(cluster_centers, clusters_init,
                                        validate_shape=False)
      if cluster_centers_updated != cluster_centers:
        assign_centers = control_flow_ops.group(
            assign_centers,
            state_ops.assign(cluster_centers_updated, clusters_init,
                             validate_shape=False))
      assign_centers = control_flow_ops.with_dependencies(
          [assign_centers],
          state_ops.assign(cluster_centers_initialized, True))
      return control_flow_ops.cond(initialized,
                                   control_flow_ops.no_op,
                                   lambda: assign_centers).op
Esempio n. 8
0
 def testL2NormalizeDimArray(self):
   x_shape = [20, 7, 3]
   np.random.seed(1)
   x_np = np.random.random_sample(x_shape).astype(np.float32)
   dim = [1, 2]
   y_np = self._l2Normalize(x_np, dim)
   x_tf = constant_op.constant(x_np, name="x")
   y_tf = nn_impl.l2_normalize(x_tf, dim)
   self.assertAllClose(y_np, self.evaluate(y_tf))
Esempio n. 9
0
 def _kmeans_plus_plus(self):
   # Points from only the first shard are used for initializing centers.
   # TODO(ands): Use all points.
   inp = self._inputs[0]
   if self._distance_metric == COSINE_DISTANCE:
     inp = nn_impl.l2_normalize(inp, dim=1)
   return gen_clustering_ops.kmeans_plus_plus_initialization(
       inp, math_ops.cast(self._num_remaining, dtypes.int64), self._seed,
       self._kmeans_plus_plus_num_retries)
Esempio n. 10
0
 def testL2Normalize(self):
   x_shape = [20, 7, 3]
   np.random.seed(1)
   x_np = np.random.random_sample(x_shape).astype(np.float32)
   for dim in range(len(x_shape)):
     y_np = self._l2Normalize(x_np, dim)
     with self.test_session():
       x_tf = constant_op.constant(x_np, name="x")
       y_tf = nn_impl.l2_normalize(x_tf, dim)
       self.assertAllClose(y_np, y_tf.eval())
Esempio n. 11
0
 def testL2NormalizeGradient(self):
   x_shape = [20, 7, 3]
   np.random.seed(1)
   x_np = np.random.random_sample(x_shape).astype(np.float64)
   for dim in range(len(x_shape)):
     with self.test_session():
       x_tf = constant_op.constant(x_np, name="x")
       y_tf = nn_impl.l2_normalize(x_tf, dim)
       err = gradient_checker.compute_gradient_error(x_tf, x_shape, y_tf,
                                                     x_shape)
     print("L2Normalize gradient err = %g " % err)
     self.assertLess(err, 1e-4)
Esempio n. 12
0
  def training_graph(self):
    """Generate a training graph for kmeans algorithm.

    Returns:
      A tuple consisting of:
      all_scores: A matrix (or list of matrices) of dimensions (num_input,
        num_clusters) where the value is the distance of an input vector and a
        cluster center.
      cluster_idx: A vector (or list of vectors). Each element in the vector
        corresponds to an input row in 'inp' and specifies the cluster id
        corresponding to the input.
      scores: Similar to cluster_idx but specifies the distance to the
        assigned cluster instead.
      cluster_centers_initialized: scalar indicating whether clusters have been
        initialized.
      init_op: an op to initialize the clusters.
      training_op: an op that runs an iteration of training.
    """
    # Implementation of kmeans.
    inputs = self._inputs
    (cluster_centers_var,
     cluster_centers_initialized,
     total_counts,
     cluster_centers_updated,
     update_in_steps) = self._create_variables()
    init_op = self._initialize_clusters(cluster_centers_var,
                                        cluster_centers_initialized,
                                        cluster_centers_updated)
    cluster_centers = cluster_centers_var

    if self._distance_metric == COSINE_DISTANCE:
      inputs = self._l2_normalize_data(inputs)
      if not self._clusters_l2_normalized():
        cluster_centers = nn_impl.l2_normalize(cluster_centers, dim=1)

    all_scores, scores, cluster_idx = self._infer_graph(inputs, cluster_centers)
    if self._use_mini_batch:
      sync_updates_op = self._mini_batch_sync_updates_op(
          update_in_steps,
          cluster_centers_var, cluster_centers_updated,
          total_counts)
      assert sync_updates_op is not None
      with ops.control_dependencies([sync_updates_op]):
        training_op = self._mini_batch_training_op(
            inputs, cluster_idx, cluster_centers_updated, total_counts)
    else:
      assert cluster_centers == cluster_centers_var
      training_op = self._full_batch_training_op(inputs, cluster_idx,
                                                 cluster_centers_var)

    return (all_scores, cluster_idx, scores,
            cluster_centers_initialized, init_op, training_op)
Esempio n. 13
0
  def _compute_cosine_distance(cls, inputs, clusters, inputs_normalized=True):
    """Computes cosine distance between each input and each cluster center.

    Args:
      inputs: list of input Tensor.
      clusters: cluster Tensor
      inputs_normalized: if True, it assumes that inp and clusters are
      normalized and computes the dot product which is equivalent to the cosine
      distance. Else it L2 normalizes the inputs first.

    Returns:
      list of Tensors, where each element corresponds to each element in inp.
      The value is the distance of each row to all the cluster centers.
    """
    output = []
    if not inputs_normalized:
      with ops.colocate_with(clusters):
        clusters = nn_impl.l2_normalize(clusters, dim=1)
    for inp in inputs:
      with ops.colocate_with(inp):
        if not inputs_normalized:
          inp = nn_impl.l2_normalize(inp, dim=1)
        output.append(1 - math_ops.matmul(inp, clusters, transpose_b=True))
    return output
Esempio n. 14
0
 def _add_new_centers(self):
   """Adds some centers and returns the number of centers remaining."""
   new_centers = self._choose_initial_centers()
   if self._distance_metric == COSINE_DISTANCE:
     new_centers = nn_impl.l2_normalize(new_centers, dim=1)
   # If cluster_centers is empty, it doesn't have the right shape for concat.
   all_centers = control_flow_ops.cond(
       math_ops.equal(self._num_selected, 0), lambda: new_centers,
       lambda: array_ops.concat([self._cluster_centers, new_centers], 0))
   # TODO(ccolby): De-dupe all_centers?
   a = state_ops.assign(
       self._cluster_centers, all_centers, validate_shape=False)
   if self._cluster_centers_updated is not self._cluster_centers:
     a = state_ops.assign(
         self._cluster_centers_updated, a, validate_shape=False)
   return self._num_clusters - array_ops.shape(a)[0]
Esempio n. 15
0
 def _sample_kmc2_chain():
   """Returns previous centers as well as a new center sampled using k-MC2.
   """
   # Extract the subset from the underlying batch.
   start = i * self._kmc2_chain_length
   end = start + self._kmc2_chain_length
   subset = first_shard[start:end]
   # Compute the distances from points in the subset to previous centers.
   _, distances = gen_clustering_ops.nearest_neighbors(
       subset, self._cluster_centers, 1)
   # Sample index of new center using k-MC2 Markov chain.
   new_center_index = gen_clustering_ops.kmc2_chain_initialization(
       array_ops.squeeze(distances), self._random_seed)
   # Extract actual new center.
   newly_sampled_center = array_ops.reshape(subset[new_center_index],
                                            [1, -1])
   # Return concatenation with previously sampled centers.
   if self._distance_metric == COSINE_DISTANCE:
     newly_sampled_center = nn_impl.l2_normalize(
         newly_sampled_center, dim=1)
   return array_ops.concat([self._cluster_centers, newly_sampled_center],
                           0)
Esempio n. 16
0
  def _infer_graph(self, inputs, clusters):
    """Maps input to closest cluster and the score.

    Args:
      inputs: list of input Tensors.
      clusters: Tensor of cluster centers.

    Returns:
      List of tuple, where each value in tuple corresponds to a value in inp.
      The tuple has following three elements:
      all_scores: distance of each input to each cluster center.
      score: distance of each input to closest cluster center.
      cluster_idx: index of cluster center closest to the corresponding input.
    """
    assert isinstance(inputs, list)
    # Pairwise distances are used only by transform(). In all other cases, this
    # sub-graph is not evaluated.
    scores = self._distance_graph(inputs, clusters, self._distance_metric)
    output = []
    if (self._distance_metric == COSINE_DISTANCE and
        not self._clusters_l2_normalized()):
      # The cosine distance between normalized vectors x and y is the same as
      # 2 * squared_euclidian_distance. We are using this fact and reusing the
      # nearest_neighbors op.
      # TODO(ands): Support COSINE distance in nearest_neighbors and remove
      # this.
      with ops.colocate_with(clusters):
        clusters = nn_impl.l2_normalize(clusters, dim=1)
    for inp, score in zip(inputs, scores):
      with ops.colocate_with(inp):
        (indices,
         distances) = gen_clustering_ops.nearest_neighbors(inp, clusters, 1)
        if self._distance_metric == COSINE_DISTANCE:
          distances *= 0.5
        output.append(
            (score, array_ops.squeeze(distances), array_ops.squeeze(indices)))
    return zip(*output)
    def _infer_graph(self, inputs, clusters):
        """Maps input to closest cluster and the score.

    Args:
      inputs: list of input Tensors.
      clusters: Tensor of cluster centers.

    Returns:
      List of tuple, where each value in tuple corresponds to a value in inp.
      The tuple has following three elements:
      all_scores: distance of each input to each cluster center.
      score: distance of each input to closest cluster center.
      cluster_idx: index of cluster center closest to the corresponding input.
    """
        assert isinstance(inputs, list)
        # Pairwise distances are used only by transform(). In all other cases, this
        # sub-graph is not evaluated.
        scores = self._distance_graph(inputs, clusters, self._distance_metric)
        output = []
        if (self._distance_metric == COSINE_DISTANCE
                and not self._clusters_l2_normalized()):
            # The cosine distance between normalized vectors x and y is the same as
            # 2 * squared_euclidian_distance. We are using this fact and reusing the
            # nearest_neighbors op.
            # TODO(ands): Support COSINE distance in nearest_neighbors and remove
            # this.
            with ops.colocate_with(clusters):
                clusters = nn_impl.l2_normalize(clusters, dim=1)
        for inp, score in zip(inputs, scores):
            with ops.colocate_with(inp):
                (indices, distances) = gen_clustering_ops.nearest_neighbors(
                    inp, clusters, 1)
                if self._distance_metric == COSINE_DISTANCE:
                    distances *= 0.5
                output.append((score, array_ops.squeeze(distances),
                               array_ops.squeeze(indices)))
        return zip(*output)
    def _full_batch_training_op(self, inputs, cluster_idx_list,
                                cluster_centers):
        """Creates an op for training for full batch case.

    Args:
      inputs: list of input Tensors.
      cluster_idx_list: A vector (or list of vectors). Each element in the
        vector corresponds to an input row in 'inp' and specifies the cluster id
        corresponding to the input.
      cluster_centers: Tensor Ref of cluster centers.

    Returns:
      An op for doing an update of mini-batch k-means.
    """
        cluster_sums = []
        cluster_counts = []
        epsilon = constant_op.constant(1e-6, dtype=inputs[0].dtype)
        for inp, cluster_idx in zip(inputs, cluster_idx_list):
            with ops.colocate_with(inp):
                cluster_sums.append(
                    math_ops.unsorted_segment_sum(inp, cluster_idx,
                                                  self._num_clusters))
                cluster_counts.append(
                    math_ops.unsorted_segment_sum(
                        array_ops.reshape(
                            array_ops.ones(
                                array_ops.reshape(
                                    array_ops.shape(inp)[0], [-1])), [-1, 1]),
                        cluster_idx, self._num_clusters))
        with ops.colocate_with(cluster_centers):
            new_clusters_centers = math_ops.add_n(cluster_sums) / (
                math_ops.cast(math_ops.add_n(cluster_counts),
                              cluster_sums[0].dtype) + epsilon)
            if self._clusters_l2_normalized():
                new_clusters_centers = nn_impl.l2_normalize(
                    new_clusters_centers, dim=1)
        return state_ops.assign(cluster_centers, new_clusters_centers)
    def training_graph(self):
        """Generate a training graph for kmeans algorithm.

    Returns:
      A tuple consisting of:
      all_scores: A matrix (or list of matrices) of dimensions (num_input,
        num_clusters) where the value is the distance of an input vector and a
        cluster center.
      cluster_idx: A vector (or list of vectors). Each element in the vector
        corresponds to an input row in 'inp' and specifies the cluster id
        corresponding to the input.
      scores: Similar to cluster_idx but specifies the distance to the
        assigned cluster instead.
      training_op: an op that runs an iteration of training.
    """
        # Implementation of kmeans.
        inputs = self._inputs
        cluster_centers_var, total_counts = self._init_clusters()
        cluster_centers = cluster_centers_var

        if self._distance_metric == COSINE_DISTANCE:
            inputs = self._l2_normalize_data(inputs)
            if not self._clusters_l2_normalized():
                cluster_centers = nn_impl.l2_normalize(cluster_centers, dim=1)

        all_scores, scores, cluster_idx = self._infer_graph(
            inputs, cluster_centers)
        if self._use_mini_batch:
            training_op = self._mini_batch_training_op(inputs, cluster_idx,
                                                       cluster_centers,
                                                       cluster_centers_var,
                                                       total_counts)
        else:
            assert cluster_centers == cluster_centers_var
            training_op = self._full_batch_training_op(inputs, cluster_idx,
                                                       cluster_centers_var)
        return all_scores, cluster_idx, scores, training_op
Esempio n. 20
0
  def training_graph(self):
    """Generate a training graph for kmeans algorithm.

    Returns:
      A tuple consisting of:
      all_scores: A matrix (or list of matrices) of dimensions (num_input,
        num_clusters) where the value is the distance of an input vector and a
        cluster center.
      cluster_idx: A vector (or list of vectors). Each element in the vector
        corresponds to an input row in 'inp' and specifies the cluster id
        corresponding to the input.
      scores: Similar to cluster_idx but specifies the distance to the
        assigned cluster instead.
      training_op: an op that runs an iteration of training.
    """
    # Implementation of kmeans.
    inputs = self._inputs
    cluster_centers_var, total_counts = self._init_clusters()
    cluster_centers = cluster_centers_var

    if self._distance_metric == COSINE_DISTANCE:
      inputs = self._l2_normalize_data(inputs)
      if not self._clusters_l2_normalized():
        cluster_centers = nn_impl.l2_normalize(cluster_centers, dim=1)

    all_scores, scores, cluster_idx = self._infer_graph(inputs, cluster_centers)
    if self._use_mini_batch:
      training_op = self._mini_batch_training_op(inputs, cluster_idx,
                                                 cluster_centers,
                                                 cluster_centers_var,
                                                 total_counts)
    else:
      assert cluster_centers == cluster_centers_var
      training_op = self._full_batch_training_op(inputs, cluster_idx,
                                                 cluster_centers_var)
    return all_scores, cluster_idx, scores, training_op
    def call(self, inputs, training=None):
        if self.lr_mul == 1.0:
            W = self.coeff * self.kernel
        else:

            @custom_gradient
            def lr_multiplier(x):
                y = array_ops.identity(x)

                def grad(dy):
                    return dy * self.lr_mul

                return y, grad

            W = lr_multiplier(self.coeff * self.kernel)

        training = self._get_training_value(training)

        # Update singular vector by power iteration
        W_T = array_ops.transpose(W)
        u = array_ops.identity(self.u)
        for i in range(self.power_iter):
            v = nn_impl.l2_normalize(math_ops.matmul(u, W))  # 1 x filters
            u = nn_impl.l2_normalize(math_ops.matmul(v, W_T))
        # Spectral Normalization
        sigma_W = math_ops.matmul(math_ops.matmul(u, W),
                                  array_ops.transpose(v))
        # Backprop doesn't need in power iteration
        sigma_W = array_ops.stop_gradient(sigma_W)
        W_bar = W / array_ops.squeeze(sigma_W)

        # Assign new singular vector
        training_value = tf_utils.constant_value(training)
        if training_value is not False:

            def u_update():
                def true_branch():
                    return self._assign_singular_vector(self.u, u)

                def false_branch():
                    return self.u

                return tf_utils.smart_cond(training, true_branch, false_branch)

            self.add_update(u_update)

        # normal Dense using W_bar
        inputs = ops.convert_to_tensor(inputs)
        rank = common_shapes.rank(inputs)
        if rank > 2:
            # Broadcasting is required for the inputs.
            outputs = standard_ops.tensordot(inputs, W_bar, [[rank - 1], [0]])
            # Reshape the output back to the original ndim of the input.
            if not context.executing_eagerly():
                shape = inputs.shape.as_list()
                output_shape = shape[:-1] + [self.units]
                outputs.set_shape(output_shape)
        else:
            inputs = math_ops.cast(inputs, self._compute_dtype)
            outputs = math_ops.mat_mul(inputs, W_bar)
        if self.use_bias:
            outputs = nn.bias_add(outputs, self.bias)
        if self.activation is not None:
            return self.activation(outputs)  # pylint: disable=not-callable
        return outputs
Esempio n. 22
0
 def _normalize(self, weight, name):
     output_size = weight.get_shape().as_list()[1]
     g = vs.get_variable(name, [output_size], dtype=weight.dtype)
     return nn_impl.l2_normalize(weight, dim=0) * g
Esempio n. 23
0
 def __householder_rotation(self, x):
     u = nn_impl.l2_normalize(self.__e1 - self._loc, axis=-1)
     z = x - 2 * math_ops.reduce_sum(x * u, axis=-1, keepdims=True) * u
     return z
Esempio n. 24
0
    def training_graph(self):
        """Generate a training graph for kmeans algorithm.

    This returns, among other things, an op that chooses initial centers
    (init_op), a boolean variable that is set to True when the initial centers
    are chosen (cluster_centers_initialized), and an op to perform either an
    entire Lloyd iteration or a mini-batch of a Lloyd iteration (training_op).
    The caller should use these components as follows. A single worker should
    execute init_op multiple times until cluster_centers_initialized becomes
    True. Then multiple workers may execute training_op any number of times.

    Returns:
      A tuple consisting of:
      all_scores: A matrix (or list of matrices) of dimensions (num_input,
        num_clusters) where the value is the distance of an input vector and a
        cluster center.
      cluster_idx: A vector (or list of vectors). Each element in the vector
        corresponds to an input row in 'inp' and specifies the cluster id
        corresponding to the input.
      scores: Similar to cluster_idx but specifies the distance to the
        assigned cluster instead.
      cluster_centers_initialized: scalar indicating whether clusters have been
        initialized.
      init_op: an op to initialize the clusters.
      training_op: an op that runs an iteration of training.
    """
        # Implementation of kmeans.
        if (isinstance(self._initial_clusters, str)
                or callable(self._initial_clusters)):
            initial_clusters = self._initial_clusters
            num_clusters = ops.convert_to_tensor(self._num_clusters)
        else:
            initial_clusters = ops.convert_to_tensor(self._initial_clusters)
            num_clusters = array_ops.shape(initial_clusters)[0]

        inputs = self._inputs
        (cluster_centers_var, cluster_centers_initialized, total_counts,
         cluster_centers_updated,
         update_in_steps) = self._create_variables(num_clusters)
        init_op = _InitializeClustersOpFactory(
            self._inputs, num_clusters, initial_clusters,
            self._distance_metric, self._random_seed,
            self._kmeans_plus_plus_num_retries, self._kmc2_chain_length,
            cluster_centers_var, cluster_centers_updated,
            cluster_centers_initialized).op()
        cluster_centers = cluster_centers_var

        if self._distance_metric == COSINE_DISTANCE:
            inputs = self._l2_normalize_data(inputs)
            if not self._clusters_l2_normalized():
                cluster_centers = nn_impl.l2_normalize(cluster_centers, dim=1)

        all_scores, scores, cluster_idx = self._infer_graph(
            inputs, cluster_centers)
        if self._use_mini_batch:
            sync_updates_op = self._mini_batch_sync_updates_op(
                update_in_steps, cluster_centers_var, cluster_centers_updated,
                total_counts)
            assert sync_updates_op is not None
            with ops.control_dependencies([sync_updates_op]):
                training_op = self._mini_batch_training_op(
                    inputs, cluster_idx, cluster_centers_updated, total_counts)
        else:
            assert cluster_centers == cluster_centers_var
            training_op = self._full_batch_training_op(inputs, num_clusters,
                                                       cluster_idx,
                                                       cluster_centers_var)

        return (all_scores, cluster_idx, scores, cluster_centers_initialized,
                init_op, training_op)
Esempio n. 25
0
 def _compute_weights(self):
     """Generate weights by combining the direction of weight vector
      with it's norm """
     with variable_scope.variable_scope('compute_weights'):
         self.layer.W = nn_impl.l2_normalize(
             self.layer.v, axis=self.norm_axes) * self.layer.g
Esempio n. 26
0
 def _compute_weights(self):
     """Generate weights by combining the direction of weight vector
      with its norm """
     with name_scope('compute_weights'):
         self.layer.kernel = nn_impl.l2_normalize(
             self.layer.v, axis=self.kernel_norm_axes) * self.layer.g
Esempio n. 27
0
  def training_graph(self):
    """Generate a training graph for kmeans algorithm.

    This returns, among other things, an op that chooses initial centers
    (init_op), a boolean variable that is set to True when the initial centers
    are chosen (cluster_centers_initialized), and an op to perform either an
    entire Lloyd iteration or a mini-batch of a Lloyd iteration (training_op).
    The caller should use these components as follows. A single worker should
    execute init_op multiple times until cluster_centers_initialized becomes
    True. Then multiple workers may execute training_op any number of times.

    Returns:
      A tuple consisting of:
      all_scores: A matrix (or list of matrices) of dimensions (num_input,
        num_clusters) where the value is the distance of an input vector and a
        cluster center.
      cluster_idx: A vector (or list of vectors). Each element in the vector
        corresponds to an input row in 'inp' and specifies the cluster id
        corresponding to the input.
      scores: Similar to cluster_idx but specifies the distance to the
        assigned cluster instead.
      cluster_centers_initialized: scalar indicating whether clusters have been
        initialized.
      init_op: an op to initialize the clusters.
      training_op: an op that runs an iteration of training.
    """
    # Implementation of kmeans.
    if (isinstance(self._initial_clusters, str) or
        callable(self._initial_clusters)):
      initial_clusters = self._initial_clusters
      num_clusters = ops.convert_to_tensor(self._num_clusters)
    else:
      initial_clusters = ops.convert_to_tensor(self._initial_clusters)
      num_clusters = array_ops.shape(initial_clusters)[0]

    inputs = self._inputs
    (cluster_centers_var, cluster_centers_initialized, total_counts,
     cluster_centers_updated,
     update_in_steps) = self._create_variables(num_clusters)
    init_op = _InitializeClustersOpFactory(
        self._inputs, num_clusters, initial_clusters, self._distance_metric,
        self._random_seed, self._kmeans_plus_plus_num_retries,
        self._kmc2_chain_length, cluster_centers_var, cluster_centers_updated,
        cluster_centers_initialized).op()
    cluster_centers = cluster_centers_var

    if self._distance_metric == COSINE_DISTANCE:
      inputs = self._l2_normalize_data(inputs)
      if not self._clusters_l2_normalized():
        cluster_centers = nn_impl.l2_normalize(cluster_centers, dim=1)

    all_scores, scores, cluster_idx = self._infer_graph(inputs, cluster_centers)
    if self._use_mini_batch:
      sync_updates_op = self._mini_batch_sync_updates_op(
          update_in_steps, cluster_centers_var, cluster_centers_updated,
          total_counts)
      assert sync_updates_op is not None
      with ops.control_dependencies([sync_updates_op]):
        training_op = self._mini_batch_training_op(
            inputs, cluster_idx, cluster_centers_updated, total_counts)
    else:
      assert cluster_centers == cluster_centers_var
      training_op = self._full_batch_training_op(
          inputs, num_clusters, cluster_idx, cluster_centers_var)

    return (all_scores, cluster_idx, scores, cluster_centers_initialized,
            init_op, training_op)
 def _sample_n(self, n, seed=0):
     return nn_impl.l2_normalize(random_ops.random_normal(
         shape=array_ops.concat(([n], [self._dim + 1]), 0),
         dtype=self.dtype,
         seed=seed),
                                 axis=-1)
Esempio n. 29
0
 def cosine_similarity(tensor: Tensor) -> Tensor:
     """ Calculates the cosine similarity of a 2D array, with l2 normalization. """
     l2_normalize(tensor, axis=1)
     return matmul(tensor, transpose(tensor))
Esempio n. 30
0
    def build(self, input_shape):
        input_shape = tensor_shape.TensorShape(
            input_shape)  # get the input shape
        if self.data_format == 'channels_first':  # judge the organize method of input_shape
            channel_axis = 1
        else:
            channel_axis = -1
        if input_shape.dims[channel_axis].value is None:
            raise ValueError(
                'The channel dimension of the inputs should be defined. Found `None`.'
            )
        input_dim = int(input_shape[channel_axis])  # get input dimension
        kernel_shape = self.kernel_size + (input_dim, self.filters
                                           )  # define kernel_shape

        kernel = self.add_weight(name='kernel',
                                 shape=kernel_shape,
                                 initializer=self.kernel_initializer,
                                 regularizer=self.kernel_regularizer,
                                 constraint=self.kernel_constraint,
                                 trainable=True,
                                 dtype=self.dtype)

        # weight normalization
        if self.weight_norm:
            self.g = self.add_weight(name='wn/g',
                                     shape=(self.filters, ),
                                     initializer=tf.ones_initializer(),
                                     trainable=True,
                                     dtype=kernel.dtype)

            self.kernel = tf.reshape(
                self.g, [1, 1, self.filters]) * nn_impl.l2_normalize(
                    kernel, [0, 1])
        else:
            self.kernel = kernel

        if self.use_bias:
            self.bias = self.add_weight(name='bias',
                                        shape=(self.filters, ),
                                        initializer=self.bias_initializer,
                                        regularizer=self.bias_regularizer,
                                        constraint=self.bias_constraint,
                                        trainable=True,
                                        dtype=self.dtype)
        else:
            self.bias = None

        self.input_spec = InputSpec(ndim=self.rank + 2,
                                    axes={channel_axis: input_dim})
        if self.padding == 'causal':
            op_padding = 'valid'
        else:
            op_padding = self.padding
        if not isinstance(op_padding, (list, tuple)):
            op_padding = op_padding.upper()
        self._convolution_op = nn_ops.Convolution(
            input_shape,
            filter_shape=self.kernel.get_shape(),
            dilation_rate=self.dilation_rate,
            strides=self.strides,
            padding=op_padding,
            data_format=conv_utils.convert_data_format(self.data_format,
                                                       self.rank + 2))

        self.built = True
Esempio n. 31
0
    def call(self, inputs, training=None):
        if self.lr_mul == 1.0:
            kernel = self.coeff * self.kernel
        else:

            @custom_gradient
            def lr_multiplier(x):
                y = array_ops.identity(x)

                def grad(dy):
                    return dy * self.lr_mul

                return y, grad

            kernel = lr_multiplier(self.coeff * self.kernel)

        training = self._get_training_value(training)

        # Update singular vector by power iteration
        if self.data_format == 'channels_first':
            W_T = array_ops.reshape(kernel, (self.filters, -1))
            W = array_ops.transpose(W_T)
        else:
            W = array_ops.reshape(kernel, (-1, self.filters))
            W_T = array_ops.transpose(W)
        u = array_ops.identity(self.u)
        for i in range(self.power_iter):
            v = nn_impl.l2_normalize(math_ops.matmul(u, W))  # 1 x filters
            u = nn_impl.l2_normalize(math_ops.matmul(v, W_T))
        # Spectral Normalization
        sigma_W = math_ops.matmul(math_ops.matmul(u, W),
                                  array_ops.transpose(v))
        # Backprop doesn't need in power iteration
        sigma_W = array_ops.stop_gradient(sigma_W)
        W_bar = kernel / array_ops.squeeze(sigma_W)

        # Assign new singular vector
        training_value = tf_utils.constant_value(training)
        if training_value is not False:

            def u_update():
                def true_branch():
                    return self._assign_singular_vector(self.u, u)

                def false_branch():
                    return self.u

                return tf_utils.smart_cond(training, true_branch, false_branch)

            self.add_update(u_update)

        # normal convolution using W_bar
        outputs = self._convolution_op(inputs, W_bar)

        if self.use_bias:
            if self.data_format == 'channels_first':
                if self.rank == 1:
                    # nn.bias_add does not accept a 1D input tensor.
                    bias = array_ops.reshape(self.bias, (1, self.filters, 1))
                    outputs += bias
                else:
                    outputs = nn.bias_add(outputs,
                                          self.bias,
                                          data_format='NCHW')
            else:
                outputs = nn.bias_add(outputs, self.bias, data_format='NHWC')

        if self.activation is not None:
            return self.activation(outputs)
        return outputs