Example #1
0
 def _model_fn(features, labels, mode):
   """Model function."""
   assert labels is None, labels
   (all_scores, model_predictions, losses,
    training_op) = clustering_ops.KMeans(
        self._parse_tensor_or_dict(features),
        self._num_clusters,
        self._training_initial_clusters,
        self._distance_metric,
        self._use_mini_batch,
        random_seed=self._random_seed,
        kmeans_plus_plus_num_retries=self.
        _kmeans_plus_plus_num_retries).training_graph()
   incr_step = state_ops.assign_add(variables.get_global_step(), 1)
   loss = math_ops.reduce_sum(losses, name=KMeansClustering.LOSS_OP_NAME)
   logging_ops.scalar_summary('loss/raw', loss)
   training_op = with_dependencies([training_op, incr_step], loss)
   predictions = {
       KMeansClustering.ALL_SCORES: all_scores[0],
       KMeansClustering.CLUSTER_IDX: model_predictions[0],
   }
   eval_metric_ops = {KMeansClustering.SCORES: loss,}
   return ModelFnOps(
       mode=mode,
       predictions=predictions,
       eval_metric_ops=eval_metric_ops,
       loss=loss,
       train_op=training_op)
Example #2
0
def inference(inp, num_clusters, hidden1_units, hidden2_units):
    """Build the MNIST model up to where it may be used for inference.

  Args:
    inp: input data
    num_clusters: number of clusters of input features to train.
    hidden1_units: Size of the first hidden layer.
    hidden2_units: Size of the second hidden layer.

  Returns:
    logits: Output tensor with the computed logits.
    clustering_loss: Clustering loss.
    kmeans_training_op: An op to train the clustering.
  """
    # Clustering
    kmeans = clustering_ops.KMeans(
        inp,
        num_clusters,
        distance_metric=clustering_ops.COSINE_DISTANCE,
        # TODO(agarwal): kmeans++ is currently causing crash in dbg mode.
        # Enable this after fixing.
        # initial_clusters=clustering_ops.KMEANS_PLUS_PLUS_INIT,
        use_mini_batch=True)

    all_scores, _, clustering_scores, kmeans_training_op = kmeans.training_graph(
    )
    # Some heuristics to approximately whiten this output.
    all_scores = (all_scores[0] - 0.5) * 5
    # Here we avoid passing the gradients from the supervised objective back to
    # the clusters by creating a stop_gradient node.
    all_scores = tf.stop_gradient(all_scores)
    clustering_loss = tf.reduce_sum(clustering_scores[0])
    # Hidden 1
    with tf.name_scope('hidden1'):
        weights = tf.Variable(tf.truncated_normal(
            [num_clusters, hidden1_units],
            stddev=1.0 / math.sqrt(float(IMAGE_PIXELS))),
                              name='weights')
        biases = tf.Variable(tf.zeros([hidden1_units]), name='biases')
        hidden1 = tf.nn.relu(tf.matmul(all_scores, weights) + biases)
    # Hidden 2
    with tf.name_scope('hidden2'):
        weights = tf.Variable(tf.truncated_normal(
            [hidden1_units, hidden2_units],
            stddev=1.0 / math.sqrt(float(hidden1_units))),
                              name='weights')
        biases = tf.Variable(tf.zeros([hidden2_units]), name='biases')
        hidden2 = tf.nn.relu(tf.matmul(hidden1, weights) + biases)
    # Linear
    with tf.name_scope('softmax_linear'):
        weights = tf.Variable(tf.truncated_normal(
            [hidden2_units, NUM_CLASSES],
            stddev=1.0 / math.sqrt(float(hidden2_units))),
                              name='weights')
        biases = tf.Variable(tf.zeros([NUM_CLASSES]), name='biases')
        logits = tf.matmul(hidden2, weights) + biases
    return logits, clustering_loss, kmeans_training_op
Example #3
0
def kmeans_cluster_model_fn(features, labels, mode, params, config):
    """Model function for KMeansClustering estimator."""
    # https://wiki.python.org/moin/UsingAssertionsEffectively
    assert labels is None, "labels are not needed: " + labels

    #   clustering_ops.KMeans 是重要的算法实现过程
    #   https://github.com/tensorflow/tensorflow/blob/master/tensorflow
    # /contrib/factorization/python/ops/clustering_ops.py
    (all_scores, model_predictions, losses,
     is_initialized, cluster_centers_var, init_op, training_op) = \
        clustering_ops.KMeans(
        _parse_tensor_or_dict(features),
        params.get('num_clusters'),
        initial_clusters=clustering_ops.RANDOM_INIT,
        distance_metric=clustering_ops.SQUARED_EUCLIDEAN_DISTANCE,
        use_mini_batch=False,
        mini_batch_steps_per_iteration=1,
        # use_mini_batch = params.get('use_mini_batch'),
        # mini_batch_steps_per_iteration=params.get(
        #     'mini_batch_steps_per_iteration'),
        random_seed=params.get('random_seed'),
        kmeans_plus_plus_num_retries=params.get(
            'kmeans_plus_plus_num_retries')).training_graph()

    incr_step = state_ops.assign_add(variables.get_global_step(), 1)
    loss = math_ops.reduce_sum(losses, name='kmeans_loss')
    #  Outputs a Summary protocol buffer containing a single scalar value.
    #  Used for visualizing in TensorBoard
    summary.scalar('loss/raw', loss)
    #  https://github.com/tensorflow/tensorflow/blob/master/tensorflow
    # /python/ops/control_flow_ops.py
    #  with_dependencies(dependencies, output_tensor, name=None):
    #  Produces the content of `output_tensor` only after `dependencies`.
    training_op = with_dependencies([training_op, incr_step], loss)
    predictions = {
        'all_scores': all_scores[0],
        'cluster_idx': model_predictions[0],
    }
    eval_metric_ops = {'scores': loss}

    #  Hook for monitor
    training_hooks = [
        _InitializeClustersHook(init_op, is_initialized, config.is_chief)
    ]
    relative_tolerance = params.get('relative_tolerance')
    if relative_tolerance is not None:
        training_hooks.append(_LossRelativeChangeHook(relative_tolerance))

    return ModelFnOps(mode=mode,
                      predictions=predictions,
                      eval_metric_ops=eval_metric_ops,
                      loss=loss,
                      train_op=training_op,
                      training_hooks=training_hooks)
Example #4
0
 def _get_eval_ops(self, features, _, unused_metrics):
     (_, _, losses, _) = clustering_ops.KMeans(
         features,
         self._num_clusters,
         self._training_initial_clusters,
         self._distance_metric,
         self._use_mini_batch,
         random_seed=self._random_seed,
         kmeans_plus_plus_num_retries=self.kmeans_plus_plus_num_retries
     ).training_graph()
     return {
         KMeansClustering.SCORES: tf.reduce_sum(losses),
     }
Example #5
0
 def _get_predict_ops(self, features):
     (all_scores, model_predictions, _, _) = clustering_ops.KMeans(
         features,
         self._num_clusters,
         self._training_initial_clusters,
         self._distance_metric,
         self._use_mini_batch,
         random_seed=self._random_seed,
         kmeans_plus_plus_num_retries=self.kmeans_plus_plus_num_retries
     ).training_graph()
     return {
         KMeansClustering.ALL_SCORES: all_scores[0],
         KMeansClustering.CLUSTER_IDX: model_predictions[0]
     }
Example #6
0
 def _get_train_ops(self, features, _):
     (_, _, losses, training_op) = clustering_ops.KMeans(
         features,
         self._num_clusters,
         self._training_initial_clusters,
         self._distance_metric,
         self._use_mini_batch,
         random_seed=self._random_seed,
         kmeans_plus_plus_num_retries=self.kmeans_plus_plus_num_retries
     ).training_graph()
     incr_step = tf.assign_add(tf.contrib.framework.get_global_step(), 1)
     self._loss = tf.reduce_sum(losses)
     training_op = with_dependencies([training_op, incr_step], self._loss)
     return training_op, self._loss
Example #7
0
    def _build_estimator(self, X=None):

        if not self._estimator_built:

            if self.num_features is None:
                self.num_features = get_num_features(X)

            # Reload params from checkpoint if available
            if self._to_be_restored and self.num_features is None:
                self.num_features = read_tensor_in_checkpoint(
                    'num_features', self._to_be_restored)
            if self._to_be_restored and self.num_classes is None:
                self.num_classes = read_tensor_in_checkpoint(
                    'num_classes', self._to_be_restored)

            # Purity checks
            if self.num_features is None:
                raise ValueError("'num_features' cannot be None.")

            # Persistent Parameters
            tf.Variable(self.num_features, dtype=tf.int32, name='num_features')

            self._kmeans = c_ops.KMeans(X,
                                        self.n_clusters,
                                        initial_clusters=self.init,
                                        distance_metric=self.distance,
                                        use_mini_batch=self.use_mini_batch)
            (self._all_scores, self._cluster_idx, self._scores,
             self._cluster_centers_initialized, self._cluster_centers_vars,
             self._init_op, self._train_op) = self._kmeans.training_graph()

            # fix for cluster_idx being a tuple
            self._cluster_idx = self._cluster_idx[0]
            self.avg_distance = tf.reduce_mean(self._scores)

            self._estimator_built = True
            self._init_graph()
Example #8
0
def _kmeans_clustering_model_fn(features, labels, mode, params, config):
    """Model function for KMeansClustering estimator."""
    assert labels is None, labels
    (all_scores, model_predictions, losses, is_initialized, init_op,
     training_op) = clustering_ops.KMeans(
         _parse_tensor_or_dict(features),
         params.get('num_clusters'),
         initial_clusters=params.get('training_initial_clusters'),
         distance_metric=params.get('distance_metric'),
         use_mini_batch=params.get('use_mini_batch'),
         mini_batch_steps_per_iteration=params.get(
             'mini_batch_steps_per_iteration'),
         random_seed=params.get('random_seed'),
         kmeans_plus_plus_num_retries=params.get(
             'kmeans_plus_plus_num_retries')).training_graph()
    incr_step = state_ops.assign_add(variables.get_global_step(), 1)
    loss = math_ops.reduce_sum(losses, name=KMeansClustering.LOSS_OP_NAME)
    summary.scalar('loss/raw', loss)
    training_op = with_dependencies([training_op, incr_step], loss)
    predictions = {
        KMeansClustering.ALL_SCORES: all_scores[0],
        KMeansClustering.CLUSTER_IDX: model_predictions[0],
    }
    eval_metric_ops = {KMeansClustering.SCORES: loss}
    training_hooks = [
        _InitializeClustersHook(init_op, is_initialized, config.is_chief)
    ]
    relative_tolerance = params.get('relative_tolerance')
    if relative_tolerance is not None:
        training_hooks.append(_LossRelativeChangeHook(relative_tolerance))
    return ModelFnOps(mode=mode,
                      predictions=predictions,
                      eval_metric_ops=eval_metric_ops,
                      loss=loss,
                      train_op=training_op,
                      training_hooks=training_hooks)
Example #9
0
    def model_fn(self, features, mode, config):
        """Model function for the estimator.

    Note that this does not take a `labels` arg. This works, but `input_fn` must
    return either `features` or, equivalently, `(features, None)`.

    Args:
      features: The input points. See `tf.estimator.Estimator`.
      mode: See `tf.estimator.Estimator`.
      config: See `tf.estimator.Estimator`.

    Returns:
      A `tf.estimator.EstimatorSpec` (see `tf.estimator.Estimator`) specifying
      this behavior:
        * `train_op`: Execute one mini-batch or full-batch run of Lloyd's
             algorithm.
        * `loss`: The sum of the squared distances from each input point to its
             closest center.
        * `eval_metric_ops`: Maps `SCORE` to `loss`.
        * `predictions`: Maps `ALL_DISTANCES` to the distance from each input
             point to each cluster center; maps `CLUSTER_INDEX` to the index of
             the closest cluster center for each input point.
    """
        # input_points is a single Tensor. Therefore, the sharding functionality
        # in clustering_ops is unused, and some of the values below are lists of a
        # single item.
        input_points = _parse_features_if_necessary(features,
                                                    self._feature_columns)

        # Let N = the number of input_points.
        # all_distances: A list of one matrix of shape (N, num_clusters). Each value
        #   is the distance from an input point to a cluster center.
        # model_predictions: A list of one vector of shape (N). Each value is the
        #   cluster id of an input point.
        # losses: Similar to cluster_idx but provides the distance to the cluster
        #   center.
        # is_initialized: scalar indicating whether the initial cluster centers
        #   have been chosen; see init_op.
        # init_op: an op to choose the initial cluster centers. A single worker
        #   repeatedly executes init_op until is_initialized becomes True.
        # training_op: an op that runs an iteration of training, either an entire
        #   Lloyd iteration or a mini-batch of a Lloyd iteration. Multiple workers
        #   may execute this op, but only after is_initialized becomes True.
        (all_distances, model_predictions, losses, is_initialized, init_op,
         training_op) = clustering_ops.KMeans(
             inputs=input_points,
             num_clusters=self._num_clusters,
             initial_clusters=self._initial_clusters,
             distance_metric=self._distance_metric,
             use_mini_batch=self._use_mini_batch,
             mini_batch_steps_per_iteration=self.
             _mini_batch_steps_per_iteration,
             random_seed=self._random_seed,
             kmeans_plus_plus_num_retries=self._kmeans_plus_plus_num_retries
         ).training_graph()

        loss = math_ops.reduce_sum(losses)
        summary.scalar('loss/raw', loss)

        incr_step = state_ops.assign_add(training_util.get_global_step(), 1)
        training_op = control_flow_ops.with_dependencies(
            [training_op, incr_step], loss)

        training_hooks = [
            _InitializeClustersHook(init_op, is_initialized, config.is_chief)
        ]
        if self._relative_tolerance is not None:
            training_hooks.append(
                _LossRelativeChangeHook(loss, self._relative_tolerance))

        export_outputs = {
            KMeansClustering.ALL_DISTANCES:
            export_output.PredictOutput(all_distances[0]),
            KMeansClustering.CLUSTER_INDEX:
            export_output.PredictOutput(model_predictions[0]),
            signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
            export_output.PredictOutput(model_predictions[0])
        }

        return model_fn_lib.EstimatorSpec(
            mode=mode,
            predictions={
                KMeansClustering.ALL_DISTANCES: all_distances[0],
                KMeansClustering.CLUSTER_INDEX: model_predictions[0],
            },
            loss=loss,
            train_op=training_op,
            eval_metric_ops={KMeansClustering.SCORE: metrics.mean(loss)},
            training_hooks=training_hooks,
            export_outputs=export_outputs)