def _ModelFn(features, labels, mode):
      if is_training:
        logits_out = self._BuildGraph(features)
      else:
        graph_def = self._GetGraphDef(use_trt, batch_size, model_dir)
        logits_out = importer.import_graph_def(
            graph_def,
            input_map={INPUT_NODE_NAME: features},
            return_elements=[OUTPUT_NODE_NAME + ':0'],
            name='')[0]

      loss = losses.sparse_softmax_cross_entropy(
          labels=labels, logits=logits_out)
      summary.scalar('loss', loss)

      classes_out = math_ops.argmax(logits_out, axis=1, name='classes_out')
      accuracy = metrics.accuracy(
          labels=labels, predictions=classes_out, name='acc_op')
      summary.scalar('accuracy', accuracy[1])

      if mode == ModeKeys.EVAL:
        return EstimatorSpec(
            mode, loss=loss, eval_metric_ops={'accuracy': accuracy})
      elif mode == ModeKeys.TRAIN:
        optimizer = AdamOptimizer(learning_rate=1e-2)
        train_op = optimizer.minimize(loss, global_step=get_global_step())
        return EstimatorSpec(mode, loss=loss, train_op=train_op)
Example #2
0
  def linear_logit_fn(features):
    """Linear model logit_fn.

    Args:
      features: This is the first item returned from the `input_fn`
                passed to `train`, `evaluate`, and `predict`. This should be a
                single `Tensor` or `dict` of same.

    Returns:
      A `Tensor` representing the logits.
    """
    cols_to_vars = {}
    logits = feature_column_lib.linear_model(
        features=features,
        feature_columns=feature_columns,
        units=units,
        sparse_combiner=sparse_combiner,
        cols_to_vars=cols_to_vars)
    bias = cols_to_vars.pop('bias')
    if units > 1:
      summary.histogram('bias', bias)
    else:
      # If units == 1, the bias value is a length-1 list of a scalar Tensor,
      # so we should provide a scalar summary.
      summary.scalar('bias', bias[0][0])
    summary.scalar('fraction_of_zero_weights',
                   _compute_fraction_of_zero(cols_to_vars))
    return logits
Example #3
0
  def testSummariesAreFlushedToDisk(self):
    checkpoint_dir = os.path.join(self.get_temp_dir(), 'summaries_are_flushed')
    logdir = os.path.join(self.get_temp_dir(), 'summaries_are_flushed_eval')
    if gfile.Exists(logdir):
      gfile.DeleteRecursively(logdir)

    # Train a Model to completion:
    self._train_model(checkpoint_dir, num_steps=300)

    # Create the model (which can be restored).
    inputs = constant_op.constant(self._inputs, dtype=dtypes.float32)
    logistic_classifier(inputs)

    names_to_values = {'bread': 3.4, 'cheese': 4.5, 'tomato': 2.0}

    for k in names_to_values:
      v = names_to_values[k]
      summary_lib.scalar(k, v)

    evaluation.evaluate_repeatedly(
        checkpoint_dir=checkpoint_dir,
        hooks=[evaluation.SummaryAtEndHook(log_dir=logdir),],
        max_number_of_evaluations=1)

    self._verify_summaries(logdir, names_to_values)
    def _tree_train_op_fn(loss):
      """Returns the op to optimize the loss."""
      if dnn_to_tree_distillation_param:
        loss_weight, loss_fn = dnn_to_tree_distillation_param
        weight_tensor = head_lib._weight_tensor(  # pylint: disable=protected-access
            features, head.weight_column_name)
        dnn_logits_fixed = array_ops.stop_gradient(dnn_logits)

        if loss_fn is None:
          # we create the loss_fn similar to the head loss_fn for
          # multi_class_head used previously as the default one.
          n_classes = 2 if head.logits_dimension == 1 else head.logits_dimension
          loss_fn = distillation_loss.create_dnn_to_tree_cross_entropy_loss_fn(
              n_classes)

        dnn_to_tree_distillation_loss = loss_weight * loss_fn(
            dnn_logits_fixed, tree_logits, weight_tensor)
        summary.scalar("dnn_to_tree_distillation_loss",
                       dnn_to_tree_distillation_loss)
        loss += dnn_to_tree_distillation_loss

      update_op = gbdt_model.train(loss, predictions_dict, labels)
      with ops.control_dependencies(
          [update_op]), (ops.colocate_with(global_step)):
        update_op = state_ops.assign_add(global_step, 1).op
        return update_op
  def testSummaryWithFamilyMetaGraphExport(self):
    with ops.name_scope('outer'):
      i = constant_op.constant(11)
      summ = summary_lib.scalar('inner', i)
      self.assertEquals(summ.op.name, 'outer/inner')
      summ_f = summary_lib.scalar('inner', i, family='family')
      self.assertEquals(summ_f.op.name, 'outer/family/inner')

    metagraph_def, _ = meta_graph.export_scoped_meta_graph(export_scope='outer')

    with ops.Graph().as_default() as g:
      meta_graph.import_scoped_meta_graph(metagraph_def, graph=g,
                                          import_scope='new_outer')
      # The summaries should exist, but with outer scope renamed.
      new_summ = g.get_tensor_by_name('new_outer/inner:0')
      new_summ_f = g.get_tensor_by_name('new_outer/family/inner:0')

      # However, the tags are unaffected.
      with self.cached_session() as s:
        new_summ_str, new_summ_f_str = s.run([new_summ, new_summ_f])
        new_summ_pb = summary_pb2.Summary()
        new_summ_pb.ParseFromString(new_summ_str)
        self.assertEquals('outer/inner', new_summ_pb.value[0].tag)
        new_summ_f_pb = summary_pb2.Summary()
        new_summ_f_pb.ParseFromString(new_summ_f_str)
        self.assertEquals('family/outer/family/inner',
                          new_summ_f_pb.value[0].tag)
Example #6
0
  def testTFSummaryScalar(self):
    """Verify processing of tf.summary.scalar."""
    event_sink = _EventGenerator(self, zero_out_timestamps=True)
    writer = SummaryToEventTransformer(event_sink)
    with self.test_session() as sess:
      ipt = array_ops.placeholder(dtypes.float32)
      summary_lib.scalar('scalar1', ipt)
      summary_lib.scalar('scalar2', ipt * ipt)
      merged = summary_lib.merge_all()
      writer.add_graph(sess.graph)
      for i in xrange(10):
        summ = sess.run(merged, feed_dict={ipt: i})
        writer.add_summary(summ, global_step=i)

    accumulator = ea.EventAccumulator(event_sink)
    accumulator.Reload()

    seq1 = [ea.ScalarEvent(wall_time=0, step=i, value=i) for i in xrange(10)]
    seq2 = [
        ea.ScalarEvent(
            wall_time=0, step=i, value=i * i) for i in xrange(10)
    ]

    self.assertTagsEqual(accumulator.Tags(), {
        ea.SCALARS: ['scalar1', 'scalar2'],
        ea.GRAPH: True,
        ea.META_GRAPH: False,
    })

    self.assertEqual(accumulator.Scalars('scalar1'), seq1)
    self.assertEqual(accumulator.Scalars('scalar2'), seq2)
    first_value = accumulator.Scalars('scalar1')[0].value
    self.assertTrue(isinstance(first_value, float))
Example #7
0
  def gradient_clipping(grads_and_vars):
    """Internal function for adaptive clipping."""
    grads, variables = zip(*grads_and_vars)

    norm = clip_ops.global_norm(grads)

    max_norm, log_mean = _adaptive_max_norm(norm, std_factor, decay,
                                            global_step, epsilon, name)

    # reports the max gradient norm for debugging
    if report_summary:
      summary.scalar("global_norm/adaptive_max_gradient_norm", max_norm)

    # factor will be 1. if norm is smaller than max_norm
    factor = array_ops.where(norm < max_norm,
                             array_ops.ones_like(norm),
                             math_ops.exp(log_mean) / norm)

    if static_max_norm is not None:
      factor = math_ops.minimum(static_max_norm / norm, factor)

    # apply factor
    clipped_grads = []
    for grad in grads:
      if grad is None:
        clipped_grads.append(None)
      elif isinstance(grad, ops.IndexedSlices):
        clipped_grads.append(
            ops.IndexedSlices(grad.values * factor, grad.indices,
                              grad.dense_shape))
      else:
        clipped_grads.append(grad * factor)

    return list(zip(clipped_grads, variables))
Example #8
0
  def create_estimator_spec(
      self, features, mode, logits, labels=None, train_op_fn=None):
    """See `Head`."""
    with ops.name_scope('head'):
      logits = head_lib._check_logits(logits, self.logits_dimension)  # pylint:disable=protected-access

      # Predict.
      pred_keys = prediction_keys.PredictionKeys
      with ops.name_scope(None, 'predictions', (logits,)):
        probabilities = math_ops.sigmoid(logits, name=pred_keys.PROBABILITIES)
        predictions = {
            pred_keys.LOGITS: logits,
            pred_keys.PROBABILITIES: probabilities,
        }
      if mode == model_fn.ModeKeys.PREDICT:
        return model_fn.EstimatorSpec(
            mode=model_fn.ModeKeys.PREDICT,
            predictions=predictions,
            export_outputs={
                '': export_output.ClassificationOutput(scores=probabilities)
            })

      # Eval.
      unweighted_loss, processed_labels = self.create_loss(
          features=features, mode=mode, logits=logits, labels=labels)
      # Averages loss over classes.
      per_example_loss = math_ops.reduce_mean(
          unweighted_loss, axis=-1, keep_dims=True)
      weights = head_lib._weights(features, self._weight_column)  # pylint:disable=protected-access
      training_loss = losses.compute_weighted_loss(
          per_example_loss, weights=weights, reduction=losses.Reduction.SUM)
      if mode == model_fn.ModeKeys.EVAL:
        return model_fn.EstimatorSpec(
            mode=model_fn.ModeKeys.EVAL,
            predictions=predictions,
            loss=training_loss,
            eval_metric_ops=self._eval_metric_ops(
                labels=processed_labels,
                probabilities=probabilities,
                weights=weights,
                per_example_loss=per_example_loss))

      # Train.
      if train_op_fn is None:
        raise ValueError('train_op_fn can not be None.')
    with ops.name_scope(''):
      summary.scalar(
          head_lib._summary_key(self._name, metric_keys.MetricKeys.LOSS),  # pylint:disable=protected-access
          training_loss)
      summary.scalar(
          head_lib._summary_key(  # pylint:disable=protected-access
              self._name, metric_keys.MetricKeys.LOSS_MEAN),
          losses.compute_weighted_loss(
              unweighted_loss, weights=weights,
              reduction=losses.Reduction.MEAN))
    return model_fn.EstimatorSpec(
        mode=model_fn.ModeKeys.TRAIN,
        predictions=predictions,
        loss=training_loss,
        train_op=train_op_fn(training_loss))
Example #9
0
  def testTrainWithTrace(self):
    logdir = os.path.join(
        tempfile.mkdtemp(prefix=self.get_temp_dir()), 'tmp_logs')
    with ops.Graph().as_default():
      random_seed.set_random_seed(0)
      tf_inputs = constant_op.constant(self._inputs, dtype=dtypes.float32)
      tf_labels = constant_op.constant(self._labels, dtype=dtypes.float32)

      tf_predictions = LogisticClassifier(tf_inputs)
      loss_ops.log_loss(tf_predictions, tf_labels)
      total_loss = loss_ops.get_total_loss()
      summary.scalar('total_loss', total_loss)

      optimizer = gradient_descent.GradientDescentOptimizer(learning_rate=1.0)

      train_op = learning.create_train_op(total_loss, optimizer)

      loss = learning.train(
          train_op,
          logdir,
          number_of_steps=300,
          log_every_n_steps=10,
          trace_every_n_steps=100)
    self.assertIsNotNone(loss)
    for trace_step in [1, 101, 201]:
      trace_filename = 'tf_trace-%d.json' % trace_step
      self.assertTrue(os.path.isfile(os.path.join(logdir, trace_filename)))
Example #10
0
  def create_estimator_spec(
      self, features, mode, logits, labels=None, optimizer=None,
      train_op_fn=None):
    """See `_Head`."""
    if isinstance(logits, dict):
      logits_dict = logits
    else:
      logits_dict = self._split_logits(logits)
    if labels and not isinstance(labels, dict):
      raise ValueError('labels must be a dict. Given: {}'.format(labels))

    all_estimator_spec = []
    for head in self._heads:
      head_name = head.name
      all_estimator_spec.append(
          head.create_estimator_spec(
              features=features,
              mode=mode,
              logits=logits_dict[head_name],
              labels=labels[head_name] if labels else None,
              train_op_fn=_no_op_train_fn))

    if mode == model_fn.ModeKeys.TRAIN:
      spec = self._merge_train(
          all_estimator_spec=all_estimator_spec,
          optimizer=optimizer,
          train_op_fn=train_op_fn)
      with ops.name_scope(''):
        summary.scalar(metric_keys.MetricKeys.LOSS, spec.loss)
      return spec
    if mode == model_fn.ModeKeys.PREDICT:
      return self._merge_predict(all_estimator_spec)
    if mode == model_fn.ModeKeys.EVAL:
      return self._merge_eval(all_estimator_spec)
    raise ValueError('mode={} unrecognized'.format(mode))
 def test_train_worker_monitor(self):
   # We need to explicitly set device due to check on non-chief workers
   # requiring all variables to have a device assigned.
   with ops.Graph().as_default() as g, g.device('/cpu:0'):
     global_step = variables_lib.create_global_step(g)
     train_op = state_ops.assign_add(global_step, 1)
     loss_op = constant_op.constant(2.0)
     summary.scalar('loss', loss_op)
     # Add explicit "local" init op to initialize all variables
     # as there's no chief to init here.
     init_op = variables.global_variables_initializer()
     ops.add_to_collection(ops.GraphKeys.LOCAL_INIT_OP, init_op)
     # Create worker monitors where one should be active on the worker
     # and the other chief exclusive.
     chief_exclusive_monitor = _BaseMonitorWrapper(False)
     all_workers_monitor = _BaseMonitorWrapper(True)
     with self.test_session(g):
       loss = learn.graph_actions.train(
           g,
           output_dir=self._output_dir,
           global_step_tensor=global_step,
           train_op=train_op,
           loss_op=loss_op,
           supervisor_is_chief=False,
           steps=1,
           monitors=[chief_exclusive_monitor, all_workers_monitor])
     self.assertEqual(2.0, loss)
     self.assertTrue(not chief_exclusive_monitor.is_active and
                     all_workers_monitor.is_active,
                     'Only non-chief runnable monitor must have been active.')
     self.assertTrue(not chief_exclusive_monitor.has_step and
                     all_workers_monitor.has_step,
                     'Only non-chief runnable monitor must have a step.')
  def apply_gradients(self, grads_and_vars, global_step=None, name=None):
    gradients = []
    # Number of stale gradients.
    stale_counter = variable_scope.get_variable(
        "stale_counter", [],
        initializer=init_ops.zeros_initializer(),
        trainable=False)

    def _AcceptGradientOp():
      with ops.control_dependencies(
          [self._opt.apply_gradients(
              grads_and_vars, global_step=global_step, name=name)]):
        return gen_array_ops.identity(0.0)

    def _DropGradientOp():
      return gen_array_ops.identity(1.0)

    for grad_and_var in grads_and_vars:
      grad = grad_and_var[0]
      if isinstance(grad, ops.Tensor):
        gradients.append(grad)
      else:
        gradients.append(grad.op)

    with ops.control_dependencies(gradients), ops.colocate_with(global_step):
      staleness = gen_array_ops.reshape(
          global_step - self._local_step, shape=())
      conditional_update = stale_counter.assign_add(control_flow_ops.cond(
          gen_math_ops.less_equal(staleness, self._staleness),
          _AcceptGradientOp, _DropGradientOp))

    summary.scalar(
        "Gradient staleness percentage",
        stale_counter / (math_ops.cast(global_step + 1, dtypes.float32)))
    return conditional_update
 def test_train_summaries(self):
   with ops.Graph().as_default() as g, self.test_session(g):
     with ops.control_dependencies(self._build_inference_graph()):
       train_op = state_ops.assign_add(variables_lib.get_global_step(), 1)
     loss_op = constant_op.constant(2.0)
     summary.scalar('loss', loss_op)
     self._assert_summaries(self._output_dir)
     self._assert_ckpt(self._output_dir, False)
     loss = learn.graph_actions.train(
         g,
         output_dir=self._output_dir,
         train_op=train_op,
         loss_op=loss_op,
         steps=1)
     # TODO(ebrevdo,ptucker,ispir): this meta_graph_def lacks the
     # SaverDef, so we can't add it to the summary assertion test below.
     # meta_graph_def = meta_graph.create_meta_graph_def()
     self.assertEqual(2.0, loss)
     self._assert_summaries(
         self._output_dir,
         expected_graphs=[g],
         expected_summaries={1: {
             'loss': 2.0
         }})
     self._assert_ckpt(self._output_dir, True)
 def test_train_summaries(self):
   with ops.Graph().as_default() as g, self.test_session(g):
     with ops.control_dependencies(self._build_inference_graph()):
       train_op = state_ops.assign_add(variables_lib.get_global_step(), 1)
     loss_op = constant_op.constant(2.0)
     summary.scalar('loss', loss_op)
     writer = learn.graph_actions.get_summary_writer(self._output_dir)
     self._assert_summaries(self._output_dir, writer)
     self._assert_ckpt(self._output_dir, False)
     loss = learn.graph_actions._monitored_train(  # pylint: disable=protected-access
         g,
         output_dir=self._output_dir,
         train_op=train_op,
         loss_op=loss_op,
         steps=1)
     meta_graph_def = meta_graph.create_meta_graph_def(
         graph_def=g.as_graph_def(add_shapes=True),
         saver_def=monitored_session.Scaffold().finalize().saver.saver_def)
     self.assertEqual(2.0, loss)
     self._assert_summaries(
         self._output_dir,
         writer,
         expected_graphs=[g],
         expected_meta_graphs=[meta_graph_def],
         expected_summaries={1: {
             'loss': 2.0
         }})
     self._assert_ckpt(self._output_dir, True)
Example #15
0
def prefetch_queue(tensors,
                   capacity=8,
                   num_threads=1,
                   dynamic_pad=False,
                   shared_name=None,
                   name=None):
  """Creates a queue to prefetch tensors from `tensors`.

  A queue runner for enqueuing tensors into the prefetch_queue is automatically
  added to the TF QueueRunners collection.

  Example:
  This is for example useful to pre-assemble input batches read with
  `tf.train.batch()` and enqueue the pre-assembled batches.  Ops that dequeue
  from the pre-assembled queue will not pay the cost of assembling the batch.

  images, labels = tf.train.batch([image, label], batch_size=32, num_threads=4)
  batch_queue = prefetch_queue([images, labels])
  images, labels = batch_queue.dequeue()
  logits = Net(images)
  loss = Loss(logits, labels)

  Args:
    tensors: A list or dictionary of `Tensors` to enqueue in the buffer.
    capacity: An integer. The maximum number of elements in the queue.
    num_threads: An integer.  Number of threads running the enqueue op.
    dynamic_pad: Boolean.  Whether to allow variable dimensions in input shapes.
    shared_name: (optional). If set, this queue will be shared under the given
      name across multiple sessions.
    name: (Optional) A name for the operations.

  Returns:
    A queue from which you can dequeue tensors with the same type and shape
    as `tensors`.
  """
  if isinstance(tensors, dict):
    # Need to wrap the keys and values in list() since Python3 returns views.
    # We sort the keys so the order is consistent across runs.
    names = list(sorted(tensors.keys()))
    tensor_list = list([tensors[n] for n in names])
  else:
    names = None
    tensor_list = tensors

  with ops.name_scope(name, "prefetch_queue", tensor_list) as name:
    dtypes = [t.dtype for t in tensor_list]
    shapes = [t.get_shape() for t in tensor_list]
    queue = _which_queue(dynamic_pad)(
        capacity=capacity,
        dtypes=dtypes,
        shapes=shapes,
        names=names,
        shared_name=shared_name)
    enqueue_op = queue.enqueue(tensors)
    queue_runner.add_queue_runner(
        queue_runner.QueueRunner(queue, [enqueue_op] * num_threads))
    summary.scalar(
        "fraction_of_%d_full" % capacity,
        math_ops.cast(queue.size(), _dtypes.float32) * (1. / capacity))
    return queue
Example #16
0
 def create_loss(self, features, mode, logits=None, labels=None):
   """See `_Head`."""
   model_outputs = self.state_manager.define_loss(
       self.model, features, mode)
   summary.scalar(
       head_lib._summary_key(self._name, metric_keys.MetricKeys.LOSS),
       model_outputs.loss)
   return model_outputs
 def _summary_computed():
   with ops.Graph().as_default():
     sv = supervisor.Supervisor(is_chief=False)
     sess = sv.prepare_or_wait_for_session("")
     summary.scalar("c1", constant_op.constant(1))
     summary.scalar("c2", constant_op.constant(2))
     summ = summary.merge_all()
     sv.summary_computed(sess, sess.run(summ))
Example #18
0
  def testSummaryNameConversion(self):
    c = constant_op.constant(3)
    s = summary_lib.scalar('name with spaces', c)
    self.assertEqual(s.op.name, 'name_with_spaces')

    s2 = summary_lib.scalar('name with many $#illegal^: characters!', c)
    self.assertEqual(s2.op.name, 'name_with_many___illegal___characters_')

    s3 = summary_lib.scalar('/name/with/leading/slash', c)
    self.assertEqual(s3.op.name, 'name/with/leading/slash')
  def testReadyForLocalInitOpRestoreFromCheckpoint(self):
    server = server_lib.Server.create_local_server()
    logdir = self._test_dir("ready_for_local_init_op_restore")

    uid = uuid.uuid4().hex

    # Create a checkpoint.
    with ops.Graph().as_default():
      v = variables.VariableV1(
          10.0, name="ready_for_local_init_op_restore_v_" + str(uid))
      summary.scalar("ready_for_local_init_op_restore_v_" + str(uid), v)
      sv = supervisor.Supervisor(logdir=logdir)
      sv.prepare_or_wait_for_session(server.target)
      save_path = sv.save_path
      self._wait_for_glob(save_path, 3.0)
      self._wait_for_glob(
          os.path.join(logdir, "*events*"), 3.0, for_checkpoint=False)
      # Wait to make sure everything is written to file before stopping.
      time.sleep(1)
      sv.stop()

    def get_session(is_chief):
      g = ops.Graph()
      with g.as_default():
        with ops.device("/job:local"):
          v = variables.VariableV1(
              1.0, name="ready_for_local_init_op_restore_v_" + str(uid))
          vadd = v.assign_add(1)
          w = variables.VariableV1(
              v,
              trainable=False,
              collections=[ops.GraphKeys.LOCAL_VARIABLES],
              name="ready_for_local_init_op_restore_w_" + str(uid))
          ready_for_local_init_op = variables.report_uninitialized_variables(
              variables.global_variables())
      sv = supervisor.Supervisor(
          logdir=logdir,
          is_chief=is_chief,
          graph=g,
          recovery_wait_secs=1,
          ready_for_local_init_op=ready_for_local_init_op)
      sess = sv.prepare_or_wait_for_session(server.target)

      return sv, sess, v, vadd, w

    sv0, sess0, v0, _, w0 = get_session(True)
    sv1, sess1, _, vadd1, w1 = get_session(False)

    self.assertEqual(10, sess0.run(w0))
    self.assertEqual(11, sess1.run(vadd1))
    self.assertEqual(10, sess1.run(w1))
    self.assertEqual(11, sess0.run(v0))

    sv0.stop()
    sv1.stop()
Example #20
0
  def create_estimator_spec(
      self, features, mode, logits, labels=None, train_op_fn=None):
    """See `Head`."""
    # Predict.
    with ops.name_scope('head'):
      logits = _check_logits(logits, self._logits_dimension)
      predictions = {prediction_keys.PredictionKeys.PREDICTIONS: logits}
      if mode == model_fn.ModeKeys.PREDICT:
        regression_output = export_output.RegressionOutput(value=logits)
        return model_fn.EstimatorSpec(
            mode=model_fn.ModeKeys.PREDICT,
            predictions=predictions,
            export_outputs={
                _DEFAULT_SERVING_KEY: regression_output,
                _REGRESS_SERVING_KEY: regression_output,
                _PREDICT_SERVING_KEY: export_output.PredictOutput(predictions)
            })

      # Eval.
      unweighted_loss, _ = self.create_loss(
          features=features, mode=mode, logits=logits, labels=labels)
      weights = _weights(features, self._weight_column)
      training_loss = losses.compute_weighted_loss(
          unweighted_loss, weights=weights, reduction=losses.Reduction.SUM)
      if mode == model_fn.ModeKeys.EVAL:
        # Estimator already adds a metric for loss.
        eval_metric_ops = {
            metric_keys.MetricKeys.LOSS_MEAN: metrics_lib.mean(
                unweighted_loss, weights=weights)
        }
        return model_fn.EstimatorSpec(
            mode=model_fn.ModeKeys.EVAL,
            predictions=predictions,
            loss=training_loss,
            eval_metric_ops=eval_metric_ops)

      # Train.
      if train_op_fn is None:
        raise ValueError('train_op_fn can not be None.')
    with ops.name_scope(''):
      summary.scalar(
          _summary_key(self._name, metric_keys.MetricKeys.LOSS),
          training_loss)
      summary.scalar(
          _summary_key(self._name, metric_keys.MetricKeys.LOSS_MEAN),
          losses.compute_weighted_loss(
              unweighted_loss, weights=weights,
              reduction=losses.Reduction.MEAN))
    return model_fn.EstimatorSpec(
        mode=model_fn.ModeKeys.TRAIN,
        predictions=predictions,
        loss=training_loss,
        train_op=train_op_fn(training_loss))
Example #21
0
def acgan_generator_loss(discriminator_gen_classification_logits,
                         one_hot_labels,
                         weights=1.0,
                         scope=None,
                         loss_collection=ops.GraphKeys.LOSSES,
                         reduction=losses.Reduction.SUM_BY_NONZERO_WEIGHTS,
                         add_summaries=False):
  """ACGAN loss for the generator.

  The ACGAN loss adds a classification loss to the conditional discriminator.
  Therefore, the discriminator must output a tuple consisting of
    (1) the real/fake prediction and
    (2) the logits for the classification (usually the last conv layer,
        flattened).

  For more details:
    ACGAN: https://arxiv.org/abs/1610.09585

  Args:
    discriminator_gen_classification_logits: Classification logits for generated
      data.
    one_hot_labels: A Tensor holding one-hot labels for the batch.
    weights: Optional `Tensor` whose rank is either 0, or the same rank as
      `discriminator_gen_classification_logits`, and must be broadcastable to
      `discriminator_gen_classification_logits` (i.e., all dimensions must be
      either `1`, or the same as the corresponding dimension).
    scope: The scope for the operations performed in computing the loss.
    loss_collection: collection to which this loss will be added.
    reduction: A `tf.compat.v1.losses.Reduction` to apply to loss.
    add_summaries: Whether or not to add summaries for the loss.

  Returns:
    A loss Tensor. Shape depends on `reduction`.

  Raises:
    ValueError: if arg module not either `generator` or `discriminator`
    TypeError: if the discriminator does not output a tuple.
  """
  with ops.name_scope(
      scope, 'acgan_generator_loss',
      (discriminator_gen_classification_logits, one_hot_labels)) as scope:
    loss = losses.softmax_cross_entropy(
        one_hot_labels,
        discriminator_gen_classification_logits,
        weights=weights,
        scope=scope,
        loss_collection=loss_collection,
        reduction=reduction)

    if add_summaries:
      summary.scalar('generator_ac_loss', loss)

  return loss
Example #22
0
def minimax_generator_loss(discriminator_gen_outputs,
                           label_smoothing=0.0,
                           weights=1.0,
                           scope=None,
                           loss_collection=ops.GraphKeys.LOSSES,
                           reduction=losses.Reduction.SUM_BY_NONZERO_WEIGHTS,
                           add_summaries=False):
  """Original minimax generator loss for GANs.

  Note that the authors don't recommend using this loss. A more practically
  useful loss is `modified_generator_loss`.

  L = log(sigmoid(D(x))) + log(1 - sigmoid(D(G(z))))

  See `Generative Adversarial Nets` (https://arxiv.org/abs/1406.2661) for more
  details.

  Args:
    discriminator_gen_outputs: Discriminator output on generated data. Expected
      to be in the range of (-inf, inf).
    label_smoothing: The amount of smoothing for positive labels. This technique
      is taken from `Improved Techniques for Training GANs`
      (https://arxiv.org/abs/1606.03498). `0.0` means no smoothing.
    weights: Optional `Tensor` whose rank is either 0, or the same rank as
      `discriminator_gen_outputs`, and must be broadcastable to
      `discriminator_gen_outputs` (i.e., all dimensions must be either `1`, or
      the same as the corresponding dimension).
    scope: The scope for the operations performed in computing the loss.
    loss_collection: collection to which this loss will be added.
    reduction: A `tf.compat.v1.losses.Reduction` to apply to loss.
    add_summaries: Whether or not to add summaries for the loss.

  Returns:
    A loss Tensor. The shape depends on `reduction`.
  """
  with ops.name_scope(scope, 'generator_minimax_loss') as scope:
    loss = -minimax_discriminator_loss(
        array_ops.ones_like(discriminator_gen_outputs),
        discriminator_gen_outputs,
        label_smoothing,
        weights,
        weights,
        scope,
        loss_collection,
        reduction,
        add_summaries=False)

  if add_summaries:
    summary.scalar('generator_minimax_loss', loss)

  return loss
Example #23
0
  def linear_logit_fn(features):
    """Linear model logit_fn.

    Args:
      features: This is the first item returned from the `input_fn`
                passed to `train`, `evaluate`, and `predict`. This should be a
                single `Tensor` or `dict` of same.

    Returns:
      A `Tensor` representing the logits.
    """
    if feature_column_v2.is_feature_column_v2(feature_columns):
      shared_state_manager = feature_column_v2.SharedEmbeddingStateManager()
      linear_model = feature_column_v2.LinearModel(
          feature_columns=feature_columns,
          units=units,
          sparse_combiner=sparse_combiner,
          shared_state_manager=shared_state_manager)
      logits = linear_model(features)
      bias = linear_model.bias_variable

      # We'd like to get all the non-bias variables associated with this
      # LinearModel. This includes the shared embedding variables as well.
      variables = linear_model.variables
      variables.remove(bias)
      variables.extend(shared_state_manager.variables)

      # Expand (potential) Partitioned variables
      bias = _get_expanded_variable_list([bias])
      variables = _get_expanded_variable_list(variables)
    else:
      linear_model = feature_column._LinearModel(  # pylint: disable=protected-access
          feature_columns=feature_columns,
          units=units,
          sparse_combiner=sparse_combiner,
          name='linear_model')
      logits = linear_model(features)
      cols_to_vars = linear_model.cols_to_vars()
      bias = cols_to_vars.pop('bias')
      variables = cols_to_vars.values()

    if units > 1:
      summary.histogram('bias', bias)
    else:
      # If units == 1, the bias value is a length-1 list of a scalar Tensor,
      # so we should provide a scalar summary.
      summary.scalar('bias', bias[0][0])
    summary.scalar('fraction_of_zero_weights',
                   _compute_fraction_of_zero(variables))
    return logits
Example #24
0
def add_regularization_loss_summaries(gan_model):
  """Adds summaries for a regularization losses..

  Args:
    gan_model: A GANModel tuple.
  """
  if gan_model.generator_scope:
    summary.scalar(
        'generator_regularization_loss',
        loss_util.get_regularization_loss(gan_model.generator_scope.name))
  if gan_model.discriminator_scope:
    summary.scalar(
        'discriminator_regularization_loss',
        loss_util.get_regularization_loss(gan_model.discriminator_scope.name))
 def test_summaries(self):
   logdir = _test_dir(self.get_temp_dir(), 'test_summaries')
   with ops.Graph().as_default():
     gstep = variables_lib.get_or_create_global_step()
     new_gstep = state_ops.assign_add(gstep, 1)
     summary.scalar('my_summary_tag', new_gstep * 2)
     with monitored_session.MonitoredTrainingSession(
         is_chief=True, checkpoint_dir=logdir) as session:
       for _ in range(101):  # 100 is default summary writing steps
         session.run(new_gstep)
   summaries = util_test.latest_summaries(logdir)
   tags = [s.summary.value[0].tag for s in summaries]
   self.assertIn('my_summary_tag', tags)
   self.assertIn('global_step/sec', tags)
  def setUp(self):
    test.TestCase.setUp(self)

    self.log_dir = 'log/dir'
    self.summary_writer = fake_summary_writer.FakeSummaryWriter(self.log_dir)

    var = variables_lib.Variable(0.0)
    tensor = state_ops.assign_add(var, 1.0)
    tensor2 = tensor * 2
    self.summary_op = summary_lib.scalar('my_summary', tensor)
    self.summary_op2 = summary_lib.scalar('my_summary2', tensor2)

    global_step = variables.get_or_create_global_step()
    self.train_op = state_ops.assign_add(global_step, 1)
Example #27
0
def mutual_information_penalty(
    structured_generator_inputs,
    predicted_distributions,
    weights=1.0,
    scope=None,
    loss_collection=ops.GraphKeys.LOSSES,
    reduction=losses.Reduction.SUM_BY_NONZERO_WEIGHTS,
    add_summaries=False):
  """Returns a penalty on the mutual information in an InfoGAN model.

  This loss comes from an InfoGAN paper https://arxiv.org/abs/1606.03657.

  Args:
    structured_generator_inputs: A list of Tensors representing the random noise
      that must  have high mutual information with the generator output. List
      length should match `predicted_distributions`.
    predicted_distributions: A list of `tfp.distributions.Distribution`s.
      Predicted by the recognizer, and used to evaluate the likelihood of the
      structured noise. List length should match `structured_generator_inputs`.
    weights: Optional `Tensor` whose rank is either 0, or the same dimensions as
      `structured_generator_inputs`.
    scope: The scope for the operations performed in computing the loss.
    loss_collection: collection to which this loss will be added.
    reduction: A `tf.compat.v1.losses.Reduction` to apply to loss.
    add_summaries: Whether or not to add summaries for the loss.

  Returns:
    A scalar Tensor representing the mutual information loss.
  """
  _validate_information_penalty_inputs(structured_generator_inputs,
                                       predicted_distributions)

  with ops.name_scope(scope, 'mutual_information_loss') as scope:
    # Calculate the negative log-likelihood of the reconstructed noise.
    log_probs = [
        math_ops.reduce_mean(dist.log_prob(noise)) for dist, noise in zip(
            predicted_distributions, structured_generator_inputs)
    ]
    loss = -1 * losses.compute_weighted_loss(
        log_probs,
        weights,
        scope,
        loss_collection=loss_collection,
        reduction=reduction)

    if add_summaries:
      summary.scalar('mutual_information_penalty', loss)

  return loss
Example #28
0
def _conditional_batch(tensors, keep_input, batch_size, num_threads=10):
  """Conditionally enqueue tensors based on accept_prob.

  Specifically, enqueue the element if accept_prob > rand_unif([0, 1]).

  Args:
      tensors: List of tensors to enqueue.
      keep_input: Bool. Whether to enqueue or not.
      batch_size: Size of batch.
      num_threads: Number of enqueueing threads.

  Returns:
      List of batched tensors.

  Raises:
      ValueError: `accept_prob` isn't 0D.
  """
  keep_input.get_shape().assert_has_rank(0)
  # Determine shapes and types of to-be-enqueued-tensors.
  shapes_list = []
  dtypes_list = []
  for tensor in tensors:
    cur_shape = tensor.get_shape()
    cur_shape.assert_is_fully_defined()
    shapes_list.append(cur_shape)
    dtypes_list.append(tensor.dtype)

  final_q = data_flow_ops.FIFOQueue(
      capacity=batch_size,
      shapes=shapes_list,
      dtypes=dtypes_list,
      name='batched_queue')
  summary.scalar('queue/%s/size' % final_q.name, final_q.size())

  # Conditionally enqueue.
  # Reshape enqueue op to match no_op's shape.
  conditional_enqueue = control_flow_ops.cond(keep_input,
                                              lambda: final_q.enqueue(tensors),
                                              control_flow_ops.no_op)
  queue_runner.add_queue_runner(
      queue_runner.QueueRunner(final_q, [conditional_enqueue] * num_threads))

  out_tensor = final_q.dequeue_many(batch_size)
  # Queues return a single tensor if the list of enqued tensors is one. Since we
  # want the type to be the same in all cases, always return a list.
  if isinstance(out_tensor, ops.Tensor):
    out_tensor = [out_tensor]

  return out_tensor
Example #29
0
  def __init__(self, examples, variables, options):
    """Create a new sdca optimizer."""

    if not examples or not variables or not options:
      raise ValueError('examples, variables and options must all be specified.')

    supported_losses = ('logistic_loss', 'squared_loss', 'hinge_loss',
                        'smooth_hinge_loss', 'poisson_loss')
    if options['loss_type'] not in supported_losses:
      raise ValueError('Unsupported loss_type: ', options['loss_type'])

    self._assertSpecified([
        'example_labels', 'example_weights', 'example_ids', 'sparse_features',
        'dense_features'
    ], examples)
    self._assertList(['sparse_features', 'dense_features'], examples)

    self._assertSpecified(['sparse_features_weights', 'dense_features_weights'],
                          variables)
    self._assertList(['sparse_features_weights', 'dense_features_weights'],
                     variables)

    self._assertSpecified([
        'loss_type', 'symmetric_l2_regularization',
        'symmetric_l1_regularization'
    ], options)

    for name in ['symmetric_l1_regularization', 'symmetric_l2_regularization']:
      value = options[name]
      if value < 0.0:
        raise ValueError('%s should be non-negative. Found (%f)' %
                         (name, value))

    self._examples = examples
    self._variables = variables
    self._options = options
    self._create_slots()
    self._hashtable = ShardedMutableDenseHashTable(
        key_dtype=dtypes.int64,
        value_dtype=dtypes.float32,
        num_shards=self._num_table_shards(),
        default_value=[0.0, 0.0, 0.0, 0.0],
        # SdcaFprint never returns 0 or 1 for the low64 bits, so this a safe
        # empty_key (that will never collide with actual payloads).
        empty_key=[0, 0],
        deleted_key=[1, 1])

    summary.scalar('approximate_duality_gap', self.approximate_duality_gap())
    summary.scalar('examples_seen', self._hashtable.size())
Example #30
0
  def create_estimator_spec(
      self, features, mode, logits, labels=None, train_op_fn=None):
    """See `Head`."""
    # Predict.
    with ops.name_scope('head'):
      logits = _check_logits(logits, self._logits_dimension)
      predictions = {prediction_keys.PredictionKeys.PREDICTIONS: logits}
      if mode == model_fn.ModeKeys.PREDICT:
        return model_fn.EstimatorSpec(
            mode=model_fn.ModeKeys.PREDICT,
            predictions=predictions,
            export_outputs={'': export_output.RegressionOutput(value=logits)})

      # Eval.
      labels = _check_labels(_maybe_expand_dim(math_ops.to_float(labels)),
                             self._logits_dimension)
      unweighted_loss = losses.mean_squared_error(
          labels=labels, predictions=logits, reduction=losses.Reduction.NONE)
      weights = _weights(features, self._weight_column)
      training_loss = losses.compute_weighted_loss(
          unweighted_loss, weights=weights, reduction=losses.Reduction.SUM)
      if mode == model_fn.ModeKeys.EVAL:
        # Estimator already adds a metric for loss.
        eval_metric_ops = {
            metric_keys.MetricKeys.LOSS_MEAN: metrics_lib.mean(
                unweighted_loss, weights=weights)
        }
        return model_fn.EstimatorSpec(
            mode=model_fn.ModeKeys.EVAL,
            predictions=predictions,
            loss=training_loss,
            eval_metric_ops=eval_metric_ops)

      # Train.
      if train_op_fn is None:
        raise ValueError('train_op_fn can not be None.')
    with ops.name_scope(''):
      summary.scalar(metric_keys.MetricKeys.LOSS, training_loss)
      summary.scalar(metric_keys.MetricKeys.LOSS_MEAN,
                     losses.compute_weighted_loss(
                         unweighted_loss,
                         weights=weights,
                         reduction=losses.Reduction.MEAN))
    return model_fn.EstimatorSpec(
        mode=model_fn.ModeKeys.TRAIN,
        predictions=predictions,
        loss=training_loss,
        train_op=train_op_fn(training_loss))
 def test_summary_saver(self):
     with ops.Graph().as_default() as g, self.test_session(g):
         log_dir = 'log/dir'
         summary_writer = testing.FakeSummaryWriter(log_dir, g)
         var = variables.Variable(0.0)
         var.initializer.run()
         tensor = state_ops.assign_add(var, 1.0)
         summary_op = summary.scalar('my_summary', tensor)
         self._run_monitor(learn.monitors.SummarySaver(
             summary_op=summary_op,
             save_steps=8,
             summary_writer=summary_writer),
                           num_epochs=3,
                           num_steps_per_epoch=10)
         summary_writer.assert_summaries(test_case=self,
                                         expected_logdir=log_dir,
                                         expected_graph=g,
                                         expected_summaries={
                                             0: {
                                                 'my_summary': 1.0
                                             },
                                             1: {
                                                 'my_summary': 2.0
                                             },
                                             9: {
                                                 'my_summary': 3.0
                                             },
                                             17: {
                                                 'my_summary': 4.0
                                             },
                                             25: {
                                                 'my_summary': 5.0
                                             },
                                             29: {
                                                 'my_summary': 6.0
                                             },
                                         })
Example #32
0
def add_scalar_summary(tensor, name=None, prefix=None, print_summary=False):
    """Adds a scalar summary for the given tensor.
  Args:
    tensor: a variable or op tensor.
    name: the optional name for the summary.
    prefix: An optional prefix for the summary names.
    print_summary: If `True`, the summary is printed to stdout when the summary
      is computed.
  Returns:
    A scalar `Tensor` of type `string` whose contents are the serialized
    `Summary` protocol buffer.
  """
    collections = [] if print_summary else None
    summary_name = _get_summary_name(tensor, name, prefix)

    # If print_summary, then we need to make sure that this call doesn't add the
    # non-printing op to the collection. We'll add it to the collection later.
    op = summary.scalar(name=summary_name,
                        tensor=tensor,
                        collections=collections)
    if print_summary:
        op = logging_ops.Print(op, [tensor], summary_name)
        ops.add_to_collection(ops.GraphKeys.SUMMARIES, op)
    return op
Example #33
0
def combine_adversarial_loss(main_loss,
                             adversarial_loss,
                             weight_factor=None,
                             gradient_ratio=None,
                             gradient_ratio_epsilon=1e-6,
                             variables=None,
                             scalar_summaries=True,
                             gradient_summaries=True,
                             scope=None):
    """Utility to combine main and adversarial losses.

  This utility combines the main and adversarial losses in one of two ways.
  1) Fixed coefficient on adversarial loss. Use `weight_factor` in this case.
  2) Fixed ratio of gradients. Use `gradient_ratio` in this case. This is often
    used to make sure both losses affect weights roughly equally, as in
    https://arxiv.org/pdf/1705.05823.

  One can optionally also visualize the scalar and gradient behavior of the
  losses.

  Args:
    main_loss: A floating scalar Tensor indicating the main loss.
    adversarial_loss: A floating scalar Tensor indication the adversarial loss.
    weight_factor: If not `None`, the coefficient by which to multiply the
      adversarial loss. Exactly one of this and `gradient_ratio` must be
      non-None.
    gradient_ratio: If not `None`, the ratio of the magnitude of the gradients.
      Specifically, gradient_ratio = grad_mag(main_loss) /
      grad_mag(adversarial_loss) Exactly one of this and `weight_factor` must be
      non-None.
    gradient_ratio_epsilon: An epsilon to add to the adversarial loss
      coefficient denominator, to avoid division-by-zero.
    variables: List of variables to calculate gradients with respect to. If not
      present, defaults to all trainable variables.
    scalar_summaries: Create scalar summaries of losses.
    gradient_summaries: Create gradient summaries of losses.
    scope: Optional name scope.

  Returns:
    A floating scalar Tensor indicating the desired combined loss.

  Raises:
    ValueError: Malformed input.
  """
    _validate_args([main_loss, adversarial_loss], weight_factor,
                   gradient_ratio)
    if variables is None:
        variables = contrib_variables_lib.get_trainable_variables()

    with ops.name_scope(scope,
                        'adversarial_loss',
                        values=[main_loss, adversarial_loss]):
        # Compute gradients if we will need them.
        if gradient_summaries or gradient_ratio is not None:
            main_loss_grad_mag = _numerically_stable_global_norm(
                gradients_impl.gradients(main_loss, variables))
            adv_loss_grad_mag = _numerically_stable_global_norm(
                gradients_impl.gradients(adversarial_loss, variables))

        # Add summaries, if applicable.
        if scalar_summaries:
            summary.scalar('main_loss', main_loss)
            summary.scalar('adversarial_loss', adversarial_loss)
        if gradient_summaries:
            summary.scalar('main_loss_gradients', main_loss_grad_mag)
            summary.scalar('adversarial_loss_gradients', adv_loss_grad_mag)

        # Combine losses in the appropriate way.
        # If `weight_factor` is always `0`, avoid computing the adversarial loss
        # tensor entirely.
        if _used_weight((weight_factor, gradient_ratio)) == 0:
            final_loss = main_loss
        elif weight_factor is not None:
            final_loss = (
                main_loss +
                array_ops.stop_gradient(weight_factor) * adversarial_loss)
        elif gradient_ratio is not None:
            grad_mag_ratio = main_loss_grad_mag / (adv_loss_grad_mag +
                                                   gradient_ratio_epsilon)
            adv_coeff = grad_mag_ratio / gradient_ratio
            summary.scalar('adversarial_coefficient', adv_coeff)
            final_loss = (
                main_loss +
                array_ops.stop_gradient(adv_coeff) * adversarial_loss)

    return final_loss
Example #34
0
def least_squares_discriminator_loss(
        discriminator_real_outputs,
        discriminator_gen_outputs,
        real_label=1,
        fake_label=0,
        real_weights=1.0,
        generated_weights=1.0,
        scope=None,
        loss_collection=ops.GraphKeys.LOSSES,
        reduction=losses.Reduction.SUM_BY_NONZERO_WEIGHTS,
        add_summaries=False):
    """Least squares discriminator loss.

  This loss comes from `Least Squares Generative Adversarial Networks`
  (https://arxiv.org/abs/1611.04076).

  L = 1/2 * (D(x) - `real`) ** 2 +
      1/2 * (D(G(z)) - `fake_label`) ** 2

  where D(y) are discriminator logits.

  Args:
    discriminator_real_outputs: Discriminator output on real data.
    discriminator_gen_outputs: Discriminator output on generated data. Expected
      to be in the range of (-inf, inf).
    real_label: The value that the discriminator tries to output for real data.
    fake_label: The value that the discriminator tries to output for fake data.
    real_weights: Optional `Tensor` whose rank is either 0, or the same rank as
      `discriminator_real_outputs`, and must be broadcastable to
      `discriminator_real_outputs` (i.e., all dimensions must be either `1`, or
      the same as the corresponding dimension).
    generated_weights: Same as `real_weights`, but for
      `discriminator_gen_outputs`.
    scope: The scope for the operations performed in computing the loss.
    loss_collection: collection to which this loss will be added.
    reduction: A `tf.compat.v1.losses.Reduction` to apply to loss.
    add_summaries: Whether or not to add summaries for the loss.

  Returns:
    A loss Tensor. The shape depends on `reduction`.
  """
    with ops.name_scope(scope, 'lsq_discriminator_loss',
                        (discriminator_gen_outputs, real_label)) as scope:
        discriminator_real_outputs = _to_float(discriminator_real_outputs)
        discriminator_gen_outputs = _to_float(discriminator_gen_outputs)
        discriminator_real_outputs.shape.assert_is_compatible_with(
            discriminator_gen_outputs.shape)

        real_losses = math_ops.squared_difference(discriminator_real_outputs,
                                                  real_label) / 2.0
        fake_losses = math_ops.squared_difference(discriminator_gen_outputs,
                                                  fake_label) / 2.0

        loss_on_real = losses.compute_weighted_loss(real_losses,
                                                    real_weights,
                                                    scope,
                                                    loss_collection=None,
                                                    reduction=reduction)
        loss_on_generated = losses.compute_weighted_loss(fake_losses,
                                                         generated_weights,
                                                         scope,
                                                         loss_collection=None,
                                                         reduction=reduction)

        loss = loss_on_real + loss_on_generated
        util.add_loss(loss, loss_collection)

    if add_summaries:
        summary.scalar('discriminator_gen_lsq_loss', loss_on_generated)
        summary.scalar('discriminator_real_lsq_loss', loss_on_real)
        summary.scalar('discriminator_lsq_loss', loss)

    return loss
def cluster_loss(labels,
                 embeddings,
                 margin_multiplier,
                 enable_pam_finetuning=True,
                 margin_type='nmi',
                 print_losses=False):
    """Computes the clustering loss.

  The following structured margins are supported:
    nmi: normalized mutual information
    ami: adjusted mutual information
    ari: adjusted random index
    vmeasure: v-measure
    const: indicator checking whether the two clusterings are the same.

  Args:
    labels: 2-D Tensor of labels of shape [batch size, 1]
    embeddings: 2-D Tensor of embeddings of shape
      [batch size, embedding dimension]. Embeddings should be l2 normalized.
    margin_multiplier: float32 scalar. multiplier on the structured margin term
      See section 3.2 of paper for discussion.
    enable_pam_finetuning: Boolean, Whether to run local pam refinement.
      See section 3.4 of paper for discussion.
    margin_type: Type of structured margin to use. See section 3.2 of
      paper for discussion. Can be 'nmi', 'ami', 'ari', 'vmeasure', 'const'.
    print_losses: Boolean. Option to print the loss.

  Paper: https://arxiv.org/abs/1612.01213.

  Returns:
    clustering_loss: A float32 scalar `Tensor`.
  Raises:
    ImportError: If sklearn dependency is not installed.
  """
    if not HAS_SKLEARN:
        raise ImportError('Cluster loss depends on sklearn.')
    pairwise_distances = pairwise_distance(embeddings)
    labels = array_ops.squeeze(labels)
    all_ids = math_ops.range(array_ops.shape(embeddings)[0])

    # Compute the loss augmented inference and get the cluster centroids.
    chosen_ids = compute_augmented_facility_locations(pairwise_distances,
                                                      labels, all_ids,
                                                      margin_multiplier,
                                                      margin_type)
    # Given the predicted centroids, compute the clustering score.
    score_pred = compute_facility_energy(pairwise_distances, chosen_ids)

    # Branch whether to use PAM finetuning.
    if enable_pam_finetuning:
        # Initialize with augmented facility solution.
        chosen_ids = compute_augmented_facility_locations_pam(
            pairwise_distances, labels, margin_multiplier, margin_type,
            chosen_ids)
        score_pred = compute_facility_energy(pairwise_distances, chosen_ids)

    # Given the predicted centroids, compute the cluster assignments.
    predictions = get_cluster_assignment(pairwise_distances, chosen_ids)

    # Compute the clustering (i.e. NMI) score between the two assignments.
    clustering_score_pred = compute_clustering_score(labels, predictions,
                                                     margin_type)

    # Compute the clustering score from labels.
    score_gt = compute_gt_cluster_score(pairwise_distances, labels)

    # Compute the hinge loss.
    clustering_loss = math_ops.maximum(score_pred + margin_multiplier *
                                       (1.0 - clustering_score_pred) -
                                       score_gt,
                                       0.0,
                                       name='clustering_loss')
    clustering_loss.set_shape([])

    if print_losses:
        clustering_loss = logging_ops.Print(clustering_loss, [
            'clustering_loss: ', clustering_loss,
            array_ops.shape(clustering_loss)
        ])

    # Clustering specific summary.
    summary.scalar('losses/score_pred', score_pred)
    summary.scalar('losses/' + margin_type, clustering_score_pred)
    summary.scalar('losses/score_gt', score_gt)

    return clustering_loss
Example #36
0
    def model_fn(self, features, mode, config):
        """Model function for the estimator.

    Note that this does not take a `labels` arg. This works, but `input_fn` must
    return either `features` or, equivalently, `(features, None)`.

    Args:
      features: The input points. See `tf.estimator.Estimator`.
      mode: See `tf.estimator.Estimator`.
      config: See `tf.estimator.Estimator`.

    Returns:
      A `tf.estimator.EstimatorSpec` (see `tf.estimator.Estimator`) specifying
      this behavior:
        * `train_op`: Execute one mini-batch or full-batch run of Lloyd's
             algorithm.
        * `loss`: The sum of the squared distances from each input point to its
             closest center.
        * `eval_metric_ops`: Maps `SCORE` to `loss`.
        * `predictions`: Maps `ALL_DISTANCES` to the distance from each input
             point to each cluster center; maps `CLUSTER_INDEX` to the index of
             the closest cluster center for each input point.
    """
        # input_points is a single Tensor. Therefore, the sharding functionality
        # in clustering_ops is unused, and some of the values below are lists of a
        # single item.
        input_points = _parse_features_if_necessary(features,
                                                    self._feature_columns)

        # Let N = the number of input_points.
        # all_distances: A list of one matrix of shape (N, num_clusters). Each value
        #   is the distance from an input point to a cluster center.
        # model_predictions: A list of one vector of shape (N). Each value is the
        #   cluster id of an input point.
        # losses: Similar to cluster_idx but provides the distance to the cluster
        #   center.
        # is_initialized: scalar indicating whether the initial cluster centers
        #   have been chosen; see init_op.
        # init_op: an op to choose the initial cluster centers. A single worker
        #   repeatedly executes init_op until is_initialized becomes True.
        # training_op: an op that runs an iteration of training, either an entire
        #   Lloyd iteration or a mini-batch of a Lloyd iteration. Multiple workers
        #   may execute this op, but only after is_initialized becomes True.
        (all_distances, model_predictions, losses, is_initialized, init_op,
         training_op) = clustering_ops.KMeans(
             inputs=input_points,
             num_clusters=self._num_clusters,
             initial_clusters=self._initial_clusters,
             distance_metric=self._distance_metric,
             use_mini_batch=self._use_mini_batch,
             mini_batch_steps_per_iteration=self.
             _mini_batch_steps_per_iteration,
             random_seed=self._random_seed,
             kmeans_plus_plus_num_retries=self._kmeans_plus_plus_num_retries
         ).training_graph()

        loss = math_ops.reduce_sum(losses)
        summary.scalar('loss/raw', loss)

        incr_step = state_ops.assign_add(training_util.get_global_step(), 1)
        training_op = control_flow_ops.with_dependencies(
            [training_op, incr_step], loss)

        training_hooks = [
            _InitializeClustersHook(init_op, is_initialized, config.is_chief)
        ]
        if self._relative_tolerance is not None:
            training_hooks.append(
                _LossRelativeChangeHook(loss, self._relative_tolerance))

        export_outputs = {
            KMeansClustering.ALL_DISTANCES:
            export_output.PredictOutput(all_distances[0]),
            KMeansClustering.CLUSTER_INDEX:
            export_output.PredictOutput(model_predictions[0]),
            signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
            export_output.PredictOutput(model_predictions[0])
        }

        return model_fn_lib.EstimatorSpec(
            mode=mode,
            predictions={
                KMeansClustering.ALL_DISTANCES: all_distances[0],
                KMeansClustering.CLUSTER_INDEX: model_predictions[0],
            },
            loss=loss,
            train_op=training_op,
            eval_metric_ops={KMeansClustering.SCORE: metrics.mean(loss)},
            training_hooks=training_hooks,
            export_outputs=export_outputs)
Example #37
0
def norm(name, value):
    summary.scalar("Norm_{}".format(name), tf.norm(value))
Example #38
0
    def build(self, input_shape):
        input_shape = tensor_shape.TensorShape(input_shape)
        if input_shape.ndims is None:
            raise ValueError('Inputs to `Dense` should have known rank.')
        if len(input_shape) < 2:
            raise ValueError('Inputs to `Dense` should have rank >= 2.')
        if input_shape[-1].value is None:
            raise ValueError('The last dimension of the inputs to `Dense` '
                             'should be defined. Found `None`.')
        # Note that we set `trainable=True` because this is a trainable
        # weight of the layer. If the layer is not trainable
        # (self.trainable = False), the variable will not be added to
        # tf.trainable_variables(), and self.trainable_weights will be empty.
        m = input_shape[-1].value
        k = self.rank
        n = self.units

        # glorot unifiorm
        limit = (6.0 / (m + n))**0.5
        kernel0 = random_ops.random_uniform([m, n],
                                            minval=-limit,
                                            maxval=limit)
        s0, u0, v0 = linalg_ops.svd(kernel0, full_matrices=False)
        v0 = array_ops.transpose(v0)
        u0 = array_ops.slice(u0, [0, 0], [m, k])  #u0[:,:k]
        s0 = array_ops.slice(s0, [
            0,
        ], [
            k,
        ])  #s0[:k]
        v0 = array_ops.slice(v0, [0, 0], [k, n])  #v0[:k,:]

        self.manifold_args = [
            vs.get_variable(
                'U',
                #shape=[m, k],
                initializer=u0,  #init_ops.orthogonal_initializer(),
                regularizer=self.kernel_regularizer,
                dtype=self.dtype,
                trainable=True),
            vs.get_variable(
                'S',
                #shape=[k,],
                initializer=s0,  #self.kernel_initializer,
                regularizer=self.kernel_regularizer,
                dtype=self.dtype,
                trainable=True),
            vs.get_variable(
                'V',
                #shape=[k, n],
                initializer=v0,  #init_ops.orthogonal_initializer(),
                regularizer=self.kernel_regularizer,
                dtype=self.dtype,
                trainable=True)
        ]

        U = self.manifold_args[0]
        US = standard_ops.matmul(U, standard_ops.diag(self.manifold_args[1]))
        USV = standard_ops.matmul(US, self.manifold_args[2])
        self.kernel = USV

        if 'norm' in self.summaries:
            summary.scalar('krenel-norm', linalg_ops.norm(self.kernel))
        if 'histogram' in self.summaries:
            summary.histogram('kernel-histogram', self.kernel)

        manifold = manifolds.FixedRankEmbedded(m, n, k)

        if self.use_bias:
            self.bias = vs.get_variable('bias',
                                        shape=[n],
                                        initializer=self.bias_initializer,
                                        regularizer=self.bias_regularizer,
                                        dtype=self.dtype,
                                        trainable=True)

            self.manifold_args.append(self.bias)
            manifold = manifolds.Product([manifold, manifolds.Euclidean(n)])

        else:
            self.bias = None

        self.manifold = manifold
Example #39
0
    def create_estimator_spec(self,
                              features,
                              mode,
                              logits,
                              labels=None,
                              train_op_fn=None):
        """See `Head`."""
        # Predict.
        with ops.name_scope(self._name, 'head'):
            with ops.name_scope(None, 'predictions', (logits, )):
                pred_keys = prediction_keys.PredictionKeys
                logits = _check_logits(logits, self.logits_dimension)
                logistic = math_ops.sigmoid(logits, name=pred_keys.LOGISTIC)
                two_class_logits = array_ops.concat(
                    (array_ops.zeros_like(logits), logits),
                    1,
                    name='two_class_logits')
                probabilities = nn.softmax(two_class_logits,
                                           name=pred_keys.PROBABILITIES)
                class_ids = array_ops.reshape(math_ops.argmax(two_class_logits,
                                                              axis=1), (-1, 1),
                                              name='classes')
                if self._label_vocabulary:
                    table = lookup_ops.index_to_string_table_from_tensor(
                        vocabulary_list=self._label_vocabulary,
                        name='class_string_lookup')
                    classes = table.lookup(class_ids)
                else:
                    classes = string_ops.as_string(class_ids,
                                                   name='str_classes')
                predictions = {
                    pred_keys.LOGITS: logits,
                    pred_keys.LOGISTIC: logistic,
                    pred_keys.PROBABILITIES: probabilities,
                    pred_keys.CLASS_IDS: class_ids,
                    pred_keys.CLASSES: classes,
                }
            if mode == model_fn.ModeKeys.PREDICT:
                classifier_output = _classification_output(
                    scores=probabilities,
                    n_classes=2,
                    label_vocabulary=self._label_vocabulary)
                return model_fn.EstimatorSpec(
                    mode=model_fn.ModeKeys.PREDICT,
                    predictions=predictions,
                    export_outputs={
                        _DEFAULT_SERVING_KEY:
                        classifier_output,
                        _CLASSIFY_SERVING_KEY:
                        classifier_output,
                        _REGRESS_SERVING_KEY:
                        export_output.RegressionOutput(value=logistic),
                        _PREDICT_SERVING_KEY:
                        export_output.PredictOutput(predictions)
                    })

            # Eval.
            unweighted_loss, processed_labels = self.create_loss(
                features=features, mode=mode, logits=logits, labels=labels)
            weights = _weights(features, self._weight_column)
            training_loss = losses.compute_weighted_loss(
                unweighted_loss,
                weights=weights,
                reduction=losses.Reduction.SUM)
            if mode == model_fn.ModeKeys.EVAL:
                return model_fn.EstimatorSpec(
                    mode=model_fn.ModeKeys.EVAL,
                    predictions=predictions,
                    loss=training_loss,
                    eval_metric_ops=self._eval_metric_ops(
                        labels=processed_labels,
                        logits=logits,
                        logistic=logistic,
                        class_ids=class_ids,
                        unweighted_loss=unweighted_loss,
                        weights=weights))

            # Train.
            if train_op_fn is None:
                raise ValueError('train_op_fn can not be None.')
        with ops.name_scope(''):
            summary.scalar(
                _summary_key(self._name, metric_keys.MetricKeys.LOSS),
                training_loss)
            summary.scalar(
                _summary_key(self._name, metric_keys.MetricKeys.LOSS_MEAN),
                losses.compute_weighted_loss(unweighted_loss,
                                             weights=weights,
                                             reduction=losses.Reduction.MEAN))
        return model_fn.EstimatorSpec(mode=model_fn.ModeKeys.TRAIN,
                                      predictions=predictions,
                                      loss=training_loss,
                                      train_op=train_op_fn(training_loss))
Example #40
0
    def build_controller(self):
        """RL optimization interface.

    Returns:
      ops: A dictionary holding handles of the model used for training.
    """

        self._global_step = training_util.get_or_create_global_step()
        ops = {}
        ops["loss"] = 0

        failing_signal = self.compute_reward(self.hparams.failing_signal)

        ctr = {}

        with tf_ops.name_scope("controller_{}".format(self.ctrl_id)):
            with variable_scope.variable_scope("controller_{}".format(
                    self.ctrl_id)):
                ctr["reward"] = {"value": [], "ph": [], "update": []}
                ctr["ready"] = {"value": [], "ph": [], "update": []}
                ctr["best_reward"] = {"value": [], "update": []}
                for i in range(self.hparams.num_children):
                    reward_value = variable_scope.get_local_variable(
                        "reward_{}".format(i),
                        initializer=0.0,
                        dtype=dtypes.float32,
                        trainable=False)
                    reward_ph = array_ops.placeholder(
                        dtypes.float32,
                        shape=(),
                        name="reward_ph_{}".format(i))
                    reward_update = state_ops.assign(reward_value,
                                                     reward_ph,
                                                     use_locking=True)
                    ctr["reward"]["value"].append(reward_value)
                    ctr["reward"]["ph"].append(reward_ph)
                    ctr["reward"]["update"].append(reward_update)
                    best_reward = variable_scope.get_local_variable(
                        "best_reward_{}".format(i),
                        initializer=failing_signal,
                        dtype=dtypes.float32,
                        trainable=False)
                    ctr["best_reward"]["value"].append(best_reward)
                    ctr["best_reward"]["update"].append(
                        state_ops.assign(
                            best_reward,
                            math_ops.minimum(best_reward, reward_update)))

                    ready_value = variable_scope.get_local_variable(
                        "ready_{}".format(i),
                        initializer=True,
                        dtype=dtypes.bool,
                        trainable=False)
                    ready_ph = array_ops.placeholder(
                        dtypes.bool, shape=(), name="ready_ph_{}".format(i))
                    ready_update = state_ops.assign(ready_value,
                                                    ready_ph,
                                                    use_locking=True)
                    ctr["ready"]["value"].append(ready_value)
                    ctr["ready"]["ph"].append(ready_ph)
                    ctr["ready"]["update"].append(ready_update)

            ctr["grouping_y_preds"], ctr[
                "grouping_log_probs"] = self.get_groupings()
            summary.histogram(
                "grouping_actions",
                array_ops.slice(ctr["grouping_y_preds"]["sample"], [0, 0],
                                [1, array_ops.shape(self.op_embeddings)[0]]))

            with variable_scope.variable_scope("controller_{}".format(
                    self.ctrl_id)):
                ctr["baseline"] = variable_scope.get_local_variable(
                    "baseline",
                    initializer=failing_signal
                    if self.hparams.start_with_failing_signal else 0.0,
                    dtype=dtypes.float32,
                    trainable=False)

            new_baseline = self.hparams.bl_dec * ctr["baseline"] + (
                1 - self.hparams.bl_dec) * math_ops.reduce_mean(
                    ctr["reward"]["value"])
            if not self.hparams.always_update_baseline:
                baseline_mask = math_ops.less(ctr["reward"]["value"],
                                              failing_signal)
                selected_reward = array_ops.boolean_mask(
                    ctr["reward"]["value"], baseline_mask)
                selected_baseline = control_flow_ops.cond(
                    math_ops.reduce_any(baseline_mask),
                    lambda: math_ops.reduce_mean(selected_reward),
                    lambda: constant_op.constant(0, dtype=dtypes.float32))
                ctr["pos_reward"] = selected_baseline
                pos_ = math_ops.less(
                    constant_op.constant(0, dtype=dtypes.float32),
                    selected_baseline)
                selected_baseline = self.hparams.bl_dec * ctr["baseline"] + (
                    1 - self.hparams.bl_dec) * selected_baseline
                selected_baseline = control_flow_ops.cond(
                    pos_, lambda: selected_baseline, lambda: ctr["baseline"])
                new_baseline = control_flow_ops.cond(
                    math_ops.less(self.global_step,
                                  self.hparams.stop_updating_after_steps),
                    lambda: new_baseline, lambda: selected_baseline)
            ctr["baseline_update"] = state_ops.assign(ctr["baseline"],
                                                      new_baseline,
                                                      use_locking=True)

            ctr["y_preds"], ctr["log_probs"] = self.get_placements()
            summary.histogram("actions", ctr["y_preds"]["sample"])
            mask = math_ops.less(ctr["reward"]["value"], failing_signal)
            ctr["loss"] = ctr["reward"]["value"] - ctr["baseline"]
            ctr["loss"] *= (ctr["log_probs"]["sample"] +
                            ctr["grouping_log_probs"]["sample"])

            selected_loss = array_ops.boolean_mask(ctr["loss"], mask)
            selected_loss = control_flow_ops.cond(
                math_ops.reduce_any(mask),
                lambda: math_ops.reduce_mean(-selected_loss),
                lambda: constant_op.constant(0, dtype=dtypes.float32))

            ctr["loss"] = control_flow_ops.cond(
                math_ops.less(self.global_step,
                              self.hparams.stop_updating_after_steps),
                lambda: math_ops.reduce_mean(-ctr["loss"]),
                lambda: selected_loss)

            ctr["reward_s"] = math_ops.reduce_mean(ctr["reward"]["value"])
            summary.scalar("loss", ctr["loss"])
            summary.scalar("avg_reward", ctr["reward_s"])
            summary.scalar("best_reward_so_far", best_reward)
            summary.scalar(
                "advantage",
                math_ops.reduce_mean(ctr["reward"]["value"] - ctr["baseline"]))

        with variable_scope.variable_scope("optimizer",
                                           reuse=variable_scope.AUTO_REUSE):
            (ctr["train_op"], ctr["lr"], ctr["grad_norm"],
             ctr["grad_norms"]) = self._get_train_ops(
                 ctr["loss"],
                 tf_ops.get_collection(tf_ops.GraphKeys.TRAINABLE_VARIABLES),
                 self.global_step,
                 grad_bound=self.hparams.grad_bound,
                 lr_init=self.hparams.lr,
                 lr_dec=self.hparams.lr_dec,
                 start_decay_step=self.hparams.start_decay_step,
                 decay_steps=self.hparams.decay_steps,
                 optimizer_type=self.hparams.optimizer_type)

        summary.scalar("gradnorm", ctr["grad_norm"])
        summary.scalar("lr", ctr["lr"])
        ctr["summary"] = summary.merge_all()
        ops["controller"] = ctr

        self.ops = ops
        return ops
def queue_parsed_features(parsed_features,
                          keys=None,
                          feature_queue_capacity=100,
                          num_enqueue_threads=2,
                          name=None):
  """Speeds up parsing by using queues to do it asynchronously.

  This function adds the tensors in `parsed_features` to a queue, which allows
  the parsing (or any other expensive op before this) to be asynchronous wrt the
  rest of the training graph. This greatly improves read latency and speeds up
  training since the data will already be parsed and ready when each step of
  training needs it.

  All queue runners are added to the queue runners collection, and may be
  started via `start_queue_runners`.

  All ops are added to the default graph.

  Args:
    parsed_features: A dict of string key to `Tensor` or `SparseTensor` objects.
    keys: `Tensor` of string keys.
    feature_queue_capacity: Capacity of the parsed features queue.
    num_enqueue_threads: Number of threads to enqueue the parsed example queue.
      Using multiple threads to enqueue the parsed example queue helps maintain
      a full queue when the subsequent computations overall are cheaper than
      parsing. In order to have predictable and repeatable order of reading and
      enqueueing, such as in prediction and evaluation mode,
      `num_enqueue_threads` should be 1.
    name: Name of resulting op.

  Returns:
    Returns tuple of:
    - `Tensor` corresponding to `keys` if provided, otherwise `None`.
    -  A dict of string key to `Tensor` or `SparseTensor` objects corresponding
       to `parsed_features`.
  Raises:
    ValueError: for invalid inputs.
  """

  args = list(parsed_features.values())
  if keys is not None:
    args += [keys]

  with ops.name_scope(name, 'queue_parsed_features', args):
    # Lets also add preprocessed tensors into the queue types for each item of
    # the queue.
    tensors_to_enqueue = []
    # Each entry contains the key, and a boolean which indicates whether the
    # tensor was a sparse tensor.
    tensors_mapping = []
    # TODO(sibyl-Aix6ihai): Most of the functionality here is about pushing sparse
    # tensors into a queue. This could be taken care in somewhere else so others
    # can reuse it. Also, QueueBase maybe extended to handle sparse tensors
    # directly.
    for key in sorted(parsed_features.keys()):
      tensor = parsed_features[key]
      if isinstance(tensor, sparse_tensor.SparseTensor):
        tensors_mapping.append((key, True))
        tensors_to_enqueue.extend(
            [tensor.indices, tensor.values, tensor.dense_shape])
      else:
        tensors_mapping.append((key, False))
        tensors_to_enqueue.append(tensor)

    if keys is not None:
      tensors_to_enqueue.append(keys)

    queue_dtypes = [x.dtype for x in tensors_to_enqueue]
    input_queue = data_flow_ops.FIFOQueue(feature_queue_capacity, queue_dtypes)

    # Add a summary op to debug if our feature queue is full or not.
    summary.scalar('queue/parsed_features/%s/fraction_of_%d_full' %
                   (input_queue.name, feature_queue_capacity),
                   math_ops.cast(input_queue.size(), dtypes.float32) *
                   (1. / feature_queue_capacity))

    # Use a single QueueRunner with multiple threads to enqueue so the queue is
    # always full. The threads are coordinated so the last batch will not be
    # lost.
    enqueue_ops = [
        input_queue.enqueue(tensors_to_enqueue)
        for _ in range(num_enqueue_threads)
    ]
    queue_runner.add_queue_runner(
        queue_runner.QueueRunner(
            input_queue,
            enqueue_ops,
            queue_closed_exception_types=(errors.OutOfRangeError,
                                          errors.CancelledError)))

    dequeued_tensors = input_queue.dequeue()
    if not isinstance(dequeued_tensors, list):
      # input_queue.dequeue() returns a single tensor instead of a list of
      # tensors if there is only one tensor to dequeue, which breaks the
      # assumption of a list below.
      dequeued_tensors = [dequeued_tensors]

    # Reset shapes on dequeued tensors.
    for i in range(len(tensors_to_enqueue)):
      dequeued_tensors[i].set_shape(tensors_to_enqueue[i].get_shape())

    # Recreate feature mapping according to the original dictionary.
    dequeued_parsed_features = {}
    index = 0
    for key, is_sparse_tensor in tensors_mapping:
      if is_sparse_tensor:
        # Three tensors are (indices, values, shape).
        dequeued_parsed_features[key] = sparse_tensor.SparseTensor(
            dequeued_tensors[index], dequeued_tensors[index + 1],
            dequeued_tensors[index + 2])
        index += 3
      else:
        dequeued_parsed_features[key] = dequeued_tensors[index]
        index += 1

    dequeued_keys = None
    if keys is not None:
      dequeued_keys = dequeued_tensors[-1]

    return dequeued_keys, dequeued_parsed_features
Example #42
0
def parallel_read(data_sources,
                  reader_class,
                  num_epochs=None,
                  num_readers=4,
                  reader_kwargs=None,
                  shuffle=True,
                  dtypes=None,
                  capacity=256,
                  min_after_dequeue=128,
                  seed=None,
                  scope=None):
    """Reads multiple records in parallel from data_sources using n readers.

  It uses a ParallelReader to read from multiple files in parallel using
  multiple readers created using `reader_class` with `reader_kwargs'.

  If shuffle is True the common_queue would be a RandomShuffleQueue otherwise
  it would be a FIFOQueue.

  Usage:
      data_sources = ['path_to/train*']
      key, value = parallel_read(data_sources, tf.CSVReader, num_readers=4)

  Args:
    data_sources: a list/tuple of files or the location of the data, i.e.
      /path/to/train@128, /path/to/train* or /tmp/.../train*
    reader_class: one of the io_ops.ReaderBase subclasses ex: TFRecordReader
    num_epochs: The number of times each data source is read. If left as None,
        the data will be cycled through indefinitely.
    num_readers: a integer, number of Readers to create.
    reader_kwargs: an optional dict, of kwargs for the reader.
    shuffle: boolean, whether should shuffle the files and the records by using
      RandomShuffleQueue as common_queue.
    dtypes:  A list of types.  The length of dtypes must equal the number
        of elements in each record. If it is None it will default to
        [tf.string, tf.string] for (key, value).
    capacity: integer, capacity of the common_queue.
    min_after_dequeue: integer, minimum number of records in the common_queue
      after dequeue. Needed for a good shuffle.
    seed: A seed for RandomShuffleQueue.
    scope: Optional name scope for the ops.

  Returns:
    key, value: a tuple of keys and values from the data_source.
  """
    data_files = get_data_files(data_sources)
    with ops.name_scope(scope, 'parallel_read'):
        filename_queue = tf_input.string_input_producer(data_files,
                                                        num_epochs=num_epochs,
                                                        shuffle=shuffle,
                                                        seed=seed,
                                                        name='filenames')
        dtypes = dtypes or [tf_dtypes.string, tf_dtypes.string]
        if shuffle:
            common_queue = data_flow_ops.RandomShuffleQueue(
                capacity=capacity,
                min_after_dequeue=min_after_dequeue,
                dtypes=dtypes,
                seed=seed,
                name='common_queue')
        else:
            common_queue = data_flow_ops.FIFOQueue(capacity=capacity,
                                                   dtypes=dtypes,
                                                   name='common_queue')

        summary.scalar(
            'fraction_of_%d_full' % capacity,
            math_ops.to_float(common_queue.size()) * (1. / capacity))

        return ParallelReader(reader_class,
                              common_queue,
                              num_readers=num_readers,
                              reader_kwargs=reader_kwargs).read(filename_queue)
def bucket(tensors,
           which_bucket,
           batch_size,
           num_buckets,
           num_threads=1,
           capacity=32,
           shapes=None,
           dynamic_pad=False,
           allow_smaller_final_batch=False,
           keep_input=None,
           shared_name=None,
           name=None):
    """Lazy bucketing of input tensors according to `which_bucket`.

  The argument `tensors` can be a list or a dictionary of tensors.
  The value returned by the function will be of the same type
  as `tensors`.

  The tensors entering this function are put into the bucket given by
  `which_bucket`.  Each bucket has its own queue.  When a bucket contains
  `batch_size` elements, this minibatch is pushed onto a top queue.  The
  tensors returned from this function are a the result of dequeueing the
  next minibatch from this top queue.

  This function is implemented using several queues. A `QueueRunner` for the
  queues is added to the current `Graph`'s `QUEUE_RUNNER` collection.

  As the returned tensors are the result of of a dequeue operation, evaluating
  them will throw a `tf.errors.OutOfRangeError` when the input queue is
  exhausted.  If these tensors are feeding another input queue, its queue runner
  will catch this exception, however, if they are used in your main thread
  you are responsible for catching this yourself.

  *N.B.:* If `dynamic_pad` is `False`, you must ensure that either
  (i) the `shapes` argument is passed, or (ii) all of the tensors in
  `tensors` must have fully-defined shapes. `ValueError` will be
  raised if neither of these conditions holds.

  If `dynamic_pad` is `True`, it is sufficient that the *rank* of the
  tensors is known, but individual dimensions may have shape `None`.
  In this case, for each enqueue the dimensions with value `None`
  may have a variable length; upon dequeue, the output tensors will be padded
  on the right to the maximum shape of the tensors in the current minibatch.
  For numbers, this padding takes value 0.  For strings, this padding is
  the empty string.  See `PaddingFIFOQueue` for more info.

  If `allow_smaller_final_batch` is `True`, a smaller batch value than
  `batch_size` is returned when the queues are closed and there are not enough
  elements to fill the batch, otherwise the pending elements are discarded.
  In addition, all output tensors' static shapes, as accessed via the
  `get_shape()` method will have a 0th `Dimension` value of `None`, and
  operations that depend on fixed batch_size would fail.

  Args:
    tensors: The list or dictionary of tensors, representing a single element,
      to bucket.  Nested lists are not supported.
    which_bucket: An `int32` scalar Tensor taking a value in `[0, num_buckets)`.
    batch_size: The new batch size pulled from the queue
      (python int or int32 scalar).
    num_buckets: A python integer, the number of buckets.
    num_threads: An integer.  The number of threads enqueuing `tensors`.
    capacity: An integer. The maximum number of minibatches in the top queue,
      and also the maximum number of elements within each bucket.
    shapes: (Optional) The shapes for each example.  Defaults to the
      inferred shapes for `tensors`.
    dynamic_pad: Boolean.  Allow variable dimensions in input shapes.
      The given dimensions are padded upon dequeue so that tensors within a
      batch have the same shapes.
    allow_smaller_final_batch: (Optional) Boolean. If `True`, allow the final
      batches to be smaller if there are insufficient items left in the queues.
    keep_input: (Optional).  A `bool` scalar Tensor.  If provided, this tensor
      controls whether the input is added to the queue or not.  If it evaluates
      `True`, then `tensors` are added to the bucket; otherwise they are
      dropped.  This tensor essentially acts as a filtering mechanism.
      The default behavior is to assume `keep_input=True`.
    shared_name: (Optional). If set, the queues will be shared under the given
      name across multiple sessions.
    name: (Optional) A name for the operations.

  Returns:
    A tuple `(bucket, outputs)` where `bucket` is
    a `int32` scalar tensor and `outputs` is a list or
    dictionary of batched outputs corresponding to elements of `tensors`.
    Every step will receive a new bucket of outputs.

  Raises:
    ValueError: If the `shapes` are not specified, and cannot be
      inferred from the elements of `tensors`.
  """
    tensor_list = _as_tensor_list(tensors)
    with ops.name_scope(name, "bucket", tensor_list) as name:
        tensor_list = _validate_bucket(tensor_list)
        (tensor_list, sparse_info) = _store_sparse_tensors(
            tensor_list,
            enqueue_many=False,
            keep_input=constant_op.constant(True))

        # Round-trip batch_size to a tensor, and possibly back
        batch_size = ops.convert_to_tensor(batch_size,
                                           dtype=dtypes.int32,
                                           name="batch_size")
        static_batch_size = tensor_util.constant_value(batch_size)
        batch_size = (static_batch_size
                      if static_batch_size is not None else batch_size)

        types = _dtypes([tensor_list])
        shapes = _shapes([tensor_list], shapes, enqueue_many=False)

        which_bucket = ops.convert_to_tensor(which_bucket,
                                             dtype=dtypes.int32,
                                             name="which_bucket")

        queue_creator = _which_queue(dynamic_pad)
        bucket_queues = []
        for i in range(num_buckets):
            shared_name_i = ("%s_%d" % (shared_name, i)
                             if shared_name is not None else None)
            bucket_queues.append(
                queue_creator(capacity=capacity,
                              dtypes=types,
                              shapes=shapes,
                              shared_name=shared_name_i,
                              name="bucket_queue_%d" % i))

        maybe_static_batch_size = (None if allow_smaller_final_batch else
                                   static_batch_size)

        bucket_shapes = [
            tensor_shape.vector(maybe_static_batch_size).concatenate(s)
            for s in bucket_queues[0].shapes
        ]
        # top_queue is a PaddingFIFOQueue even if the bucket queues are regular FIFO
        # queues because if we use allow_smaller_final_batch, shapes will
        # contain Nones in their first entry; as a result, a regular
        # FIFOQueue would die when being passed shapes that are not fully defined.
        top_queue = data_flow_ops.PaddingFIFOQueue(
            capacity=capacity,
            dtypes=[dtypes.int32] + types,
            shapes=[tensor_shape.scalar()] + bucket_shapes,
            shared_name=shared_name,
            name="top_queue")

        def enqueue_which():
            def enqueue_single(i):
                return bucket_queues[i].enqueue(tensor_list)

            enqueues = [
                control_flow_ops.cond(math_ops.equal(which_bucket, i),
                                      functools.partial(enqueue_single, i),
                                      control_flow_ops.no_op)
                for i in range(num_buckets)
            ]
            return control_flow_ops.group(*enqueues, name="group_enqueues")

        if keep_input is not None:
            # TODO(ebrevdo): Expand keep_input param to core training
            # methods, and pipe through to _store_sparse_tensors; so
            # that expensive serialization is guarded by keep_input.
            maybe_enqueue = control_flow_ops.cond(keep_input, enqueue_which,
                                                  control_flow_ops.no_op)
        else:
            maybe_enqueue = enqueue_which()

        bucket_enqueue_ops = [maybe_enqueue] * num_threads

        if allow_smaller_final_batch:
            which_dequeue = lambda q: q.dequeue_up_to
        else:
            which_dequeue = lambda q: q.dequeue_many

        enqueues_to_top = [
            top_queue.enqueue(
                [constant_op.constant(i)] +
                which_dequeue(q)(batch_size, name="read_bucket_%d" % i),
                name="enqueue_from_bucket_%d" % i)
            for i, q in enumerate(bucket_queues)
        ]

        for i, q in enumerate(bucket_queues):
            queue_runner.add_queue_runner(
                queue_runner.QueueRunner(
                    q, [enqueues_to_top[i]],
                    queue_closed_exception_types=(errors.OutOfRangeError,
                                                  errors.CancelledError)))
        queue_runner.add_queue_runner(
            queue_runner.QueueRunner(
                top_queue,
                bucket_enqueue_ops,
                queue_closed_exception_types=(errors.OutOfRangeError,
                                              errors.CancelledError)))

        for q in bucket_queues:
            summary.scalar("bucket/%s/size" % q.name,
                           math_ops.cast(top_queue.size(), dtypes.float32))
        summary.scalar(
            "bucket/%s/fraction_of_%d_full" % (top_queue.name, capacity),
            math_ops.cast(top_queue.size(), dtypes.float32) * (1. / capacity))

        dequeued = top_queue.dequeue(name="dequeue_top")
        which_bucket_dequeued = dequeued[0]
        dequeued = dequeued[1:]
        dequeued = _restore_sparse_tensors(dequeued, sparse_info)
        return (which_bucket_dequeued, _as_original_type(tensors, dequeued))
Example #44
0
def zero_fraction(name, value):
    summary.scalar("ZeroFraction_{}".format(name), tf.nn.zero_fraction(value))
Example #45
0
def _add_hidden_layer_summary(value, tag):
    summary.scalar("%s_fraction_of_zero_values" % tag, nn.zero_fraction(value))
    summary.histogram("%s_activation" % tag, value)
Example #46
0
def _add_layer_summary(value, tag):
  summary.scalar('%s/fraction_of_zero_values' % tag, nn.zero_fraction(value))
  summary.histogram('%s/activation' % tag, value)
def _enqueue_data(data,
                  capacity,
                  shuffle=False,
                  min_after_dequeue=None,
                  num_threads=1,
                  seed=None,
                  name="enqueue_input",
                  enqueue_size=1,
                  num_epochs=None,
                  pad_value=None):
    """Creates a queue filled from a numpy array or pandas `DataFrame`.

    Returns a queue filled with the rows of the given (`OrderedDict` of) array
    or `DataFrame`. In the case of a pandas `DataFrame`, the first enqueued
    `Tensor` corresponds to the index of the `DataFrame`. For (`OrderedDict` of)
    numpy arrays, the first enqueued `Tensor` contains the row number.

  Args:
    data: a numpy `ndarray`, `OrderedDict` of numpy arrays, or a generator
       yielding `dict`s of numpy arrays or pandas `DataFrame` that will be read
       into the queue.
    capacity: the capacity of the queue.
    shuffle: whether or not to shuffle the rows of the array.
    min_after_dequeue: minimum number of elements that can remain in the queue
    after a dequeue operation. Only used when `shuffle` is true. If not set,
    defaults to `capacity` / 4.
    num_threads: number of threads used for reading and enqueueing.
    seed: used to seed shuffling and reader starting points.
    name: a scope name identifying the data.
    enqueue_size: the number of rows to enqueue per step.
    num_epochs: limit enqueuing to a specified number of epochs, if provided.
    pad_value: default value for dynamic padding of data samples, if provided.

  Returns:
    A queue filled with the rows of the given (`OrderedDict` of) array or
      `DataFrame`.

  Raises:
    TypeError: `data` is not a Pandas `DataFrame`, an `OrderedDict` of numpy
      arrays, a numpy `ndarray`, or a generator producing these.
    NotImplementedError: padding and shuffling data at the same time.
    NotImplementedError: padding usage with non generator data type.
  """
    with ops.name_scope(name):
        if isinstance(data, np.ndarray):
            types = [dtypes.int64, dtypes.as_dtype(data.dtype)]
            queue_shapes = [(), data.shape[1:]]
            get_feed_fn = _ArrayFeedFn
        elif isinstance(data, collections.OrderedDict):
            types = [dtypes.int64
                     ] + [dtypes.as_dtype(col.dtype) for col in data.values()]
            queue_shapes = [()] + [col.shape[1:] for col in data.values()]
            get_feed_fn = _OrderedDictNumpyFeedFn
        elif isinstance(data, tp.FunctionType):
            x_first_el = six.next(data())
            x_first_keys = sorted(x_first_el.keys())
            x_first_values = [x_first_el[key] for key in x_first_keys]
            types = [dtypes.as_dtype(col.dtype) for col in x_first_values]
            queue_shapes = [col.shape for col in x_first_values]
            get_feed_fn = _GeneratorFeedFn
        elif HAS_PANDAS and isinstance(data, pd.DataFrame):
            types = [
                dtypes.as_dtype(dt)
                for dt in [data.index.dtype] + list(data.dtypes)
            ]
            queue_shapes = [() for _ in types]
            get_feed_fn = _PandasFeedFn
        else:
            raise TypeError(
                "data must be either a numpy array or pandas DataFrame if pandas is "
                "installed; got {}".format(type(data).__name__))

        pad_data = pad_value is not None
        if pad_data and get_feed_fn is not _GeneratorFeedFn:
            raise NotImplementedError(
                "padding is only available with generator usage")
        if shuffle and pad_data:
            raise NotImplementedError(
                "padding and shuffling data at the same time is not implemented"
            )

        # TODO(jamieas): TensorBoard warnings for all warnings below once available.

        if num_threads > 1 and num_epochs is not None:
            logging.warning(
                "enqueue_data was called with num_epochs and num_threads > 1. "
                "num_epochs is applied per thread, so this will produce more "
                "epochs than you probably intend. "
                "If you want to limit epochs, use one thread.")

        if shuffle and num_threads > 1 and num_epochs is not None:
            logging.warning(
                "enqueue_data was called with shuffle=True, num_threads > 1, and "
                "num_epochs. This will create multiple threads, all reading the "
                "array/dataframe in order adding to the same shuffling queue; the "
                "results will likely not be sufficiently shuffled.")

        if not shuffle and num_threads > 1:
            logging.warning(
                "enqueue_data was called with shuffle=False and num_threads > 1. "
                "This will create multiple threads, all reading the "
                "array/dataframe in order. If you want examples read in order, use"
                " one thread; if you want multiple threads, enable shuffling.")

        if shuffle:
            min_after_dequeue = int(
                capacity /
                4 if min_after_dequeue is None else min_after_dequeue)
            queue = data_flow_ops.RandomShuffleQueue(capacity,
                                                     min_after_dequeue,
                                                     dtypes=types,
                                                     shapes=queue_shapes,
                                                     seed=seed)
        elif pad_data:
            min_after_dequeue = 0  # just for the summary text
            queue_shapes = list(
                map(
                    lambda x: tuple(list(x[:-1]) + [None])
                    if len(x) > 0 else x, queue_shapes))
            queue = data_flow_ops.PaddingFIFOQueue(capacity,
                                                   dtypes=types,
                                                   shapes=queue_shapes)
        else:
            min_after_dequeue = 0  # just for the summary text
            queue = data_flow_ops.FIFOQueue(capacity,
                                            dtypes=types,
                                            shapes=queue_shapes)

        enqueue_ops = []
        feed_fns = []

        for i in range(num_threads):
            # Note the placeholders have no shapes, so they will accept any
            # enqueue_size.  enqueue_many below will break them up.
            placeholders = [array_ops.placeholder(t) for t in types]

            enqueue_ops.append(queue.enqueue_many(placeholders))
            seed_i = None if seed is None else (i + 1) * seed

            if not pad_data:
                feed_fns.append(
                    get_feed_fn(placeholders,
                                data,
                                enqueue_size,
                                random_start=shuffle,
                                seed=seed_i,
                                num_epochs=num_epochs))
            else:
                feed_fns.append(
                    get_feed_fn(placeholders,
                                data,
                                enqueue_size,
                                random_start=shuffle,
                                seed=seed_i,
                                num_epochs=num_epochs,
                                pad_value=pad_value))

        runner = fqr._FeedingQueueRunner(  # pylint: disable=protected-access
            queue=queue,
            enqueue_ops=enqueue_ops,
            feed_fns=feed_fns)
        queue_runner.add_queue_runner(runner)

        full = (math_ops.cast(
            math_ops.maximum(0,
                             queue.size() - min_after_dequeue), dtypes.float32)
                * (1. / (capacity - min_after_dequeue)))
        # Note that name contains a '/' at the end so we intentionally do not place
        # a '/' after %s below.
        summary_name = (
            "queue/%sfraction_over_%d_of_%d_full" %
            (queue.name, min_after_dequeue, capacity - min_after_dequeue))
        summary.scalar(summary_name, full)
        return queue
Example #48
0
def optimize_loss(loss,
                  global_step,
                  learning_rate,
                  optimizer,
                  gradient_noise_scale=None,
                  gradient_multipliers=None,
                  clip_gradients=None,
                  learning_rate_decay_fn=None,
                  update_ops=None,
                  variables=None,
                  name=None,
                  summaries=None,
                  colocate_gradients_with_ops=False,
                  increment_global_step=True):
    """Given loss and parameters for optimizer, returns a training op.

  Various ways of passing optimizers include:

  - by string specifying the name of the optimizer. See OPTIMIZER_CLS_NAMES
      for full list. E.g. `optimize_loss(..., optimizer='Adam')`.
  - by function taking learning rate `Tensor` as argument and returning an
      `Optimizer` instance. E.g. `optimize_loss(...,
      optimizer=lambda lr: tf.train.MomentumOptimizer(lr, momentum=0.5))`.
    Alternatively, if `learning_rate` is `None`, the function takes no
    arguments. E.g. `optimize_loss(..., learning_rate=None,
      optimizer=lambda: tf.train.MomentumOptimizer(0.5, momentum=0.5))`.
  - by a subclass of `Optimizer` having a single-argument constructor
      (the argument is the learning rate), such as AdamOptimizer or
      AdagradOptimizer. E.g. `optimize_loss(...,
      optimizer=tf.train.AdagradOptimizer)`.
  - by an instance of a subclass of `Optimizer`.
      E.g., `optimize_loss(..., optimizer=tf.train.AdagradOptimizer(0.5))`.

  Args:
    loss: Scalar `Tensor`.
    global_step: Scalar int `Tensor`, step counter to update on each step
                 unless `increment_global_step` is `False`. If not supplied,
                 it will be fetched from the default graph (see
                 `tf.train.get_global_step` for details). If it has
                 not been created, no step will be incremented with each weight
                 update. `learning_rate_decay_fn` requires `global_step`.
    learning_rate: float or `Tensor`, magnitude of update per each training
                   step. Can be `None`.
    optimizer: string, class or optimizer instance, used as trainer.
               string should be name of optimizer, like 'SGD',
                 'Adam', 'Adagrad'. Full list in OPTIMIZER_CLS_NAMES constant.
               class should be sub-class of `tf.Optimizer` that implements
                 `compute_gradients` and `apply_gradients` functions.
               optimizer instance should be instantiation of `tf.Optimizer`
                 sub-class and have `compute_gradients` and `apply_gradients`
                 functions.
    gradient_noise_scale: float or None, adds 0-mean normal noise scaled by this
                          value.
    gradient_multipliers: dict of variables or variable names to floats.
                          If present, gradients for specified
                          variables will be multiplied by given constant.
    clip_gradients: float, callable or `None`. If a float is provided, a global
      clipping is applied to prevent the norm of the gradient from exceeding
      this value. Alternatively, a callable can be provided, e.g.,
      `adaptive_clipping_fn()`.  This callable takes a list of 
      `(gradients, variables)` tuples and returns the same thing with the 
      gradients modified.
    learning_rate_decay_fn: function, takes `learning_rate` and `global_step`
                            `Tensor`s, returns `Tensor`.
                            Can be used to implement any learning rate decay
                            functions.
                            For example: `tf.train.exponential_decay`.
                            Ignored if `learning_rate` is not supplied.
    update_ops: list of update `Operation`s to execute at each step. If `None`,
                uses elements of UPDATE_OPS collection. The order of execution
                between `update_ops` and `loss` is non-deterministic.
    variables: list of variables to optimize or
               `None` to use all trainable variables.
    name: The name for this operation is used to scope operations and summaries.
    summaries: List of internal quantities to visualize on tensorboard. If not
               set, the loss, the learning rate, and the global norm of the
               gradients will be reported. The complete list of possible values
               is in OPTIMIZER_SUMMARIES.
    colocate_gradients_with_ops: If True, try colocating gradients with the
                                 corresponding op.
    increment_global_step: Whether to increment `global_step`. If your model
      calls `optimize_loss` multiple times per training step (e.g. to optimize
      different parts of the model), use this arg to avoid incrementing
      `global_step` more times than necessary.

  Returns:
    Training op.

  Raises:
    ValueError: if:
        * `loss` is an invalid type or shape.
        * `global_step` is an invalid type or shape.
        * `learning_rate` is an invalid type or value.
        * `optimizer` has the wrong type.
        * `clip_gradients` is neither float nor callable.
        * `learning_rate` and `learning_rate_decay_fn` are supplied, but no
          `global_step` is available.
        * `gradients` is empty.
  """
    loss = ops.convert_to_tensor(loss)
    contrib_framework.assert_scalar(loss)
    if global_step is None:
        global_step = train.get_global_step()
    else:
        train.assert_global_step(global_step)
    with vs.variable_scope(name, "OptimizeLoss", [loss, global_step]):
        # Update ops take UPDATE_OPS collection if not provided.
        if update_ops is None:
            update_ops = set(ops.get_collection(ops.GraphKeys.UPDATE_OPS))
        # Make sure update ops are ran before computing loss.
        if update_ops:
            loss = control_flow_ops.with_dependencies(list(update_ops), loss)

        # Learning rate variable, with possible decay.
        lr = None
        if learning_rate is not None:
            if (isinstance(learning_rate, ops.Tensor)
                    and learning_rate.get_shape().ndims == 0):
                lr = learning_rate
            elif isinstance(learning_rate, float):
                if learning_rate < 0.0:
                    raise ValueError("Invalid learning_rate %s.",
                                     learning_rate)
                lr = vs.get_variable(
                    "learning_rate", [],
                    trainable=False,
                    initializer=init_ops.constant_initializer(learning_rate))
            else:
                raise ValueError(
                    "Learning rate should be 0d Tensor or float. "
                    "Got %s of type %s" %
                    (str(learning_rate), str(type(learning_rate))))
        if summaries is None:
            summaries = ["loss", "learning_rate", "global_gradient_norm"]
        else:
            for summ in summaries:
                if summ not in OPTIMIZER_SUMMARIES:
                    raise ValueError(
                        "Summaries should be one of [%s], you provided %s." %
                        (", ".join(OPTIMIZER_SUMMARIES), summ))
        if learning_rate is not None and learning_rate_decay_fn is not None:
            if global_step is None:
                raise ValueError(
                    "global_step is required for learning_rate_decay_fn.")
            lr = learning_rate_decay_fn(lr, global_step)
            if "learning_rate" in summaries:
                summary.scalar("learning_rate", lr)

        # Create optimizer, given specified parameters.
        if isinstance(optimizer, six.string_types):
            if lr is None:
                raise ValueError(
                    "Learning rate is None, but should be specified if "
                    "optimizer is string (%s)." % optimizer)
            if optimizer not in OPTIMIZER_CLS_NAMES:
                raise ValueError(
                    "Optimizer name should be one of [%s], you provided %s." %
                    (", ".join(OPTIMIZER_CLS_NAMES), optimizer))
            opt = OPTIMIZER_CLS_NAMES[optimizer](learning_rate=lr)
        elif (isinstance(optimizer, type)
              and issubclass(optimizer, optimizer_.Optimizer)):
            if lr is None:
                raise ValueError(
                    "Learning rate is None, but should be specified if "
                    "optimizer is class (%s)." % optimizer)
            opt = optimizer(learning_rate=lr)
        elif isinstance(optimizer, optimizer_.Optimizer):
            opt = optimizer
        elif callable(optimizer):
            if learning_rate is not None:
                opt = optimizer(lr)
            else:
                opt = optimizer()
            if not isinstance(opt, optimizer_.Optimizer):
                raise ValueError(
                    "Unrecognized optimizer: function should return "
                    "subclass of Optimizer. Got %s." % str(opt))
        else:
            raise ValueError(
                "Unrecognized optimizer: should be string, "
                "subclass of Optimizer, instance of "
                "subclass of Optimizer or function with one argument. "
                "Got %s." % str(optimizer))

        # All trainable variables, if specific variables are not specified.
        if variables is None:
            variables = vars_.trainable_variables()

        # Compute gradients.
        gradients = opt.compute_gradients(
            loss,
            variables,
            colocate_gradients_with_ops=colocate_gradients_with_ops)

        # Optionally add gradient noise.
        if gradient_noise_scale is not None:
            gradients = _add_scaled_noise_to_gradients(gradients,
                                                       gradient_noise_scale)

        # Multiply some gradients.
        if gradient_multipliers is not None:
            gradients = _multiply_gradients(gradients, gradient_multipliers)
            if not gradients:
                raise ValueError(
                    "Empty list of (gradient, var) pairs encountered. This is most "
                    "likely to be caused by an improper value of gradient_multipliers."
                )

        if "global_gradient_norm" in summaries or "gradient_norm" in summaries:
            summary.scalar("global_norm/gradient_norm",
                           clip_ops.global_norm(list(zip(*gradients))[0]))

        # Optionally clip gradients by global norm.
        if isinstance(clip_gradients, float):
            gradients = _clip_gradients_by_norm(gradients, clip_gradients)
        elif callable(clip_gradients):
            gradients = clip_gradients(gradients)
        elif clip_gradients is not None:
            raise ValueError("Unknown type %s for clip_gradients" %
                             type(clip_gradients))

        # Add scalar summary for loss.
        if "loss" in summaries:
            summary.scalar("loss", loss)

        # Add histograms for variables, gradients and gradient norms.
        for gradient, variable in gradients:
            if isinstance(gradient, ops.IndexedSlices):
                grad_values = gradient.values
            else:
                grad_values = gradient

            if grad_values is not None:
                var_name = variable.name.replace(":", "_")
                if "gradients" in summaries:
                    summary.histogram("gradients/%s" % var_name, grad_values)
                if "gradient_norm" in summaries:
                    summary.scalar("gradient_norm/%s" % var_name,
                                   clip_ops.global_norm([grad_values]))

        if clip_gradients is not None and ("global_gradient_norm" in summaries
                                           or "gradient_norm" in summaries):
            summary.scalar("global_norm/clipped_gradient_norm",
                           clip_ops.global_norm(list(zip(*gradients))[0]))

        # Create gradient updates.
        grad_updates = opt.apply_gradients(
            gradients,
            global_step=global_step if increment_global_step else None,
            name="train")

        # Ensure the train_tensor computes grad_updates.
        train_tensor = control_flow_ops.with_dependencies([grad_updates], loss)

        return train_tensor
Example #49
0
 def add_summary_ops(self, name, value):
     with ops.name_scope(self.name):
         summary_op = summary.scalar(name=name, tensor=value)
         fops.add_to_collection(fops.GraphKeys.SUMMARIES, summary_op)
Example #50
0
    def train(self, loss, predictions_dict, labels):
        """Grows a new tree and adds it to the ensemble.

    Args:
      loss: A scalar tensor representing average loss of examples.
      predictions_dict: Dictionary of Rank 2 `Tensor` representing information
          about predictions per example.
      labels: Rank 2 `Tensor` representing labels per example.

    Returns:
      An op that adds a new tree to the ensemble.

    Raises:
      ValueError: if inputs are not valid.
    """
        # Get the worker device from input dependencies.
        input_deps = (self._dense_floats + self._sparse_float_indices +
                      self._sparse_int_indices)
        worker_device = input_deps[0].device

        # Get tensors relevant for training and form the loss.
        predictions = predictions_dict[PREDICTIONS]
        partition_ids = predictions_dict[PARTITION_IDS]
        ensemble_stamp = predictions_dict[ENSEMBLE_STAMP]
        gradients = gradients_impl.gradients(loss,
                                             predictions,
                                             name="Gradients",
                                             colocate_gradients_with_ops=False,
                                             gate_gradients=0,
                                             aggregation_method=None)[0]
        strategy = self._learner_config.multi_class_strategy

        class_id = -1
        # Handle different multiclass strategies.
        if strategy == learner_pb2.LearnerConfig.TREE_PER_CLASS:
            # We build one vs rest trees.
            gradient_shape = tensor_shape.scalar()
            hessian_shape = tensor_shape.scalar()

            if self._logits_dimension == 1:
                # We have only 1 score, gradients is of shape [batch, 1].
                hessians = gradients_impl.gradients(
                    gradients,
                    predictions,
                    name="Hessian",
                    colocate_gradients_with_ops=False,
                    gate_gradients=0,
                    aggregation_method=None)[0]

                squeezed_gradients = array_ops.squeeze(gradients, axis=[1])
                squeezed_hessians = array_ops.squeeze(hessians, axis=[1])
            else:
                hessian_list = self._diagonal_hessian(gradients, predictions)
                # Assemble hessian list into a tensor.
                hessians = array_ops.stack(hessian_list, axis=1)

                # Choose the class for which the tree is built (one vs rest).
                class_id = math_ops.to_int32(
                    predictions_dict[NUM_TREES_ATTEMPTED] %
                    self._logits_dimension)

                # Use class id tensor to get the column with that index from gradients
                # and hessians.
                squeezed_gradients = array_ops.squeeze(
                    _get_column_by_index(gradients, class_id))
                squeezed_hessians = array_ops.squeeze(
                    _get_column_by_index(hessians, class_id))
        else:
            # Other multiclass strategies.
            gradient_shape = tensor_shape.TensorShape([self._logits_dimension])

            if strategy == learner_pb2.LearnerConfig.FULL_HESSIAN:
                hessian_shape = tensor_shape.TensorShape(
                    ([self._logits_dimension, self._logits_dimension]))
                hessian_list = self._full_hessian(gradients, predictions)
            else:
                # Diagonal hessian strategy.
                hessian_shape = tensor_shape.TensorShape(
                    ([self._logits_dimension]))
                hessian_list = self._diagonal_hessian(gradients, predictions)

            squeezed_gradients = gradients
            hessians = array_ops.stack(hessian_list, axis=1)
            squeezed_hessians = hessians

        # Get the weights for each example for quantiles calculation,
        weights = self._get_weights(hessian_shape, squeezed_hessians)

        regularization_config = self._learner_config.regularization
        min_node_weight = self._learner_config.constraints.min_node_weight
        # Create all handlers ensuring resources are evenly allocated across PS.
        fc_name_idx = 0
        handlers = []
        init_stamp_token = constant_op.constant(0, dtype=dtypes.int64)
        with ops.device(self._get_replica_device_setter(worker_device)):
            # Create handlers for dense float columns
            for dense_float_column_idx in range(len(self._dense_floats)):
                fc_name = self._fc_names[fc_name_idx]
                handlers.append(
                    ordinal_split_handler.DenseSplitHandler(
                        l1_regularization=regularization_config.l1,
                        l2_regularization=regularization_config.l2,
                        tree_complexity_regularization=(
                            regularization_config.tree_complexity),
                        min_node_weight=min_node_weight,
                        feature_column_group_id=dense_float_column_idx,
                        epsilon=0.01,
                        num_quantiles=100,
                        dense_float_column=self.
                        _dense_floats[dense_float_column_idx],
                        name=fc_name,
                        gradient_shape=gradient_shape,
                        hessian_shape=hessian_shape,
                        multiclass_strategy=strategy,
                        init_stamp_token=init_stamp_token))
                fc_name_idx += 1

            # Create handlers for sparse float columns.
            for sparse_float_column_idx in range(
                    len(self._sparse_float_indices)):
                fc_name = self._fc_names[fc_name_idx]
                handlers.append(
                    ordinal_split_handler.SparseSplitHandler(
                        l1_regularization=regularization_config.l1,
                        l2_regularization=regularization_config.l2,
                        tree_complexity_regularization=(
                            regularization_config.tree_complexity),
                        min_node_weight=min_node_weight,
                        feature_column_group_id=sparse_float_column_idx,
                        epsilon=0.01,
                        num_quantiles=100,
                        sparse_float_column=sparse_tensor.SparseTensor(
                            self.
                            _sparse_float_indices[sparse_float_column_idx],
                            self._sparse_float_values[sparse_float_column_idx],
                            self._sparse_float_shapes[sparse_float_column_idx]
                        ),
                        name=fc_name,
                        gradient_shape=gradient_shape,
                        hessian_shape=hessian_shape,
                        multiclass_strategy=strategy,
                        init_stamp_token=init_stamp_token))
                fc_name_idx += 1

            # Create handlers for sparse int columns.
            for sparse_int_column_idx in range(len(self._sparse_int_indices)):
                fc_name = self._fc_names[fc_name_idx]
                handlers.append(
                    categorical_split_handler.EqualitySplitHandler(
                        l1_regularization=regularization_config.l1,
                        l2_regularization=regularization_config.l2,
                        tree_complexity_regularization=(
                            regularization_config.tree_complexity),
                        min_node_weight=min_node_weight,
                        feature_column_group_id=sparse_int_column_idx,
                        sparse_int_column=sparse_tensor.SparseTensor(
                            self._sparse_int_indices[sparse_int_column_idx],
                            self._sparse_int_values[sparse_int_column_idx],
                            self._sparse_int_shapes[sparse_int_column_idx]),
                        name=fc_name,
                        gradient_shape=gradient_shape,
                        hessian_shape=hessian_shape,
                        multiclass_strategy=strategy,
                        init_stamp_token=init_stamp_token))
                fc_name_idx += 1

            # Create steps accumulator.
            steps_accumulator = stats_accumulator_ops.StatsAccumulator(
                stamp_token=0,
                gradient_shape=tensor_shape.scalar(),
                hessian_shape=tensor_shape.scalar(),
                name="StepsAccumulator")

            # Create bias stats accumulator.
            bias_stats_accumulator = stats_accumulator_ops.StatsAccumulator(
                stamp_token=0,
                gradient_shape=gradient_shape,
                hessian_shape=hessian_shape,
                name="BiasAccumulator")

            # Create ensemble stats variables.
            num_layer_examples = variables.Variable(
                initial_value=array_ops.zeros([], dtypes.int64),
                name="num_layer_examples",
                trainable=False)
            num_layer_steps = variables.Variable(initial_value=array_ops.zeros(
                [], dtypes.int64),
                                                 name="num_layer_steps",
                                                 trainable=False)
            num_layers = variables.Variable(initial_value=array_ops.zeros(
                [], dtypes.int64),
                                            name="num_layers",
                                            trainable=False)
            active_tree = variables.Variable(initial_value=array_ops.zeros(
                [], dtypes.int64),
                                             name="active_tree",
                                             trainable=False)
            active_layer = variables.Variable(initial_value=array_ops.zeros(
                [], dtypes.int64),
                                              name="active_layer",
                                              trainable=False)

        # Create ensemble stats summaries.
        summary.scalar("layer_stats/num_examples", num_layer_examples)
        summary.scalar("layer_stats/num_steps", num_layer_steps)
        summary.scalar("ensemble_stats/active_tree", active_tree)
        summary.scalar("ensemble_stats/active_layer", active_layer)

        # Update bias stats.
        stats_update_ops = []
        continue_centering = variables.Variable(
            initial_value=self._center_bias,
            name="continue_centering",
            trainable=False)
        stats_update_ops.append(
            control_flow_ops.cond(
                continue_centering,
                self._make_update_bias_stats_fn(ensemble_stamp, predictions,
                                                gradients,
                                                bias_stats_accumulator),
                control_flow_ops.no_op))

        # Update handler stats.
        handler_reads = {}
        for handler in handlers:
            handler_reads[handler] = handler.scheduled_reads()

        handler_results = batch_ops_utils.run_handler_scheduled_ops(
            handler_reads, ensemble_stamp, worker_device)
        per_handler_updates = {}
        # Two values per handler. First one is if the handler is active for the
        # current layer. The second one is if the handler is going to be active
        # for the next layer.
        subsampling_type = self._learner_config.WhichOneof("feature_fraction")
        if subsampling_type == "feature_fraction_per_level":
            seed = predictions_dict[NUM_LAYERS_ATTEMPTED]
            active_handlers_current_layer = stateless.stateless_random_uniform(
                shape=[len(handlers)], seed=[seed, 1])
            active_handlers_next_layer = stateless.stateless_random_uniform(
                shape=[len(handlers)], seed=[seed + 1, 1])
            active_handlers = array_ops.stack(
                [active_handlers_current_layer, active_handlers_next_layer],
                axis=1)
            active_handlers = (active_handlers <
                               self._learner_config.feature_fraction_per_level)
        elif subsampling_type == "feature_fraction_per_tree":
            seed = predictions_dict[NUM_TREES_ATTEMPTED]
            active_handlers_current_layer = stateless.stateless_random_uniform(
                shape=[len(handlers)], seed=[seed, 2])
            active_handlers_current_layer = (
                active_handlers_current_layer <
                self._learner_config.feature_fraction_per_tree)
            active_handlers = array_ops.stack(
                active_handlers_current_layer,
                array_ops.ones([len(handlers)], dtype=dtypes.bool))
        else:
            active_handlers = array_ops.ones([len(handlers), 2],
                                             dtype=dtypes.bool)

        # Prepare empty gradients and hessians when handlers are not ready.
        empty_hess_shape = [1] + hessian_shape.as_list()
        empty_grad_shape = [1] + gradient_shape.as_list()

        empty_gradients = constant_op.constant([],
                                               dtype=dtypes.float32,
                                               shape=empty_grad_shape)
        empty_hessians = constant_op.constant([],
                                              dtype=dtypes.float32,
                                              shape=empty_hess_shape)

        for handler_idx in range(len(handlers)):
            handler = handlers[handler_idx]
            is_active = active_handlers[handler_idx]
            updates, scheduled_updates = handler.update_stats(
                ensemble_stamp, partition_ids, squeezed_gradients,
                squeezed_hessians, empty_gradients, empty_hessians, weights,
                is_active, handler_results[handler])
            stats_update_ops.append(updates)
            per_handler_updates[handler] = scheduled_updates

        update_results = batch_ops_utils.run_handler_scheduled_ops(
            per_handler_updates, ensemble_stamp, worker_device)
        for update in update_results.values():
            stats_update_ops += update
        # Accumulate a step after updating stats.
        batch_size = math_ops.cast(array_ops.shape(labels)[0], dtypes.float32)
        with ops.control_dependencies(stats_update_ops):
            add_step_op = steps_accumulator.add(ensemble_stamp, [0], [[0, 0]],
                                                [batch_size], [1.0])

        # Determine learning rate.
        learning_rate_tuner = self._learner_config.learning_rate_tuner.WhichOneof(
            "tuner")
        if learning_rate_tuner == "fixed" or learning_rate_tuner == "dropout":
            tuner = getattr(self._learner_config.learning_rate_tuner,
                            learning_rate_tuner)
            learning_rate = tuner.learning_rate
        else:
            # TODO (nponomareva, soroush) do the line search. id:498 gh:499
            raise ValueError("Line search learning rate is not yet supported.")

        # After adding the step, decide if further processing is needed.
        ensemble_update_ops = [add_step_op]
        with ops.control_dependencies([add_step_op]):
            if self._is_chief:
                dropout_seed = predictions_dict[NUM_TREES_ATTEMPTED]

                # Get accumulated steps and examples for the current layer.
                _, _, _, _, acc_examples, acc_steps = steps_accumulator.serialize(
                )
                acc_examples = math_ops.cast(acc_examples[0], dtypes.int64)
                acc_steps = math_ops.cast(acc_steps[0], dtypes.int64)
                ensemble_update_ops.append(
                    num_layer_examples.assign(acc_examples))
                ensemble_update_ops.append(num_layer_steps.assign(acc_steps))
                # Determine whether we need to update tree ensemble.
                examples_per_layer = self._examples_per_layer
                if callable(examples_per_layer):
                    examples_per_layer = examples_per_layer(active_layer)
                ensemble_update_ops.append(
                    control_flow_ops.cond(
                        acc_examples >= examples_per_layer,
                        self._make_update_ensemble_fn(
                            ensemble_stamp, steps_accumulator,
                            bias_stats_accumulator, continue_centering,
                            learning_rate, handlers, num_layers, active_tree,
                            active_layer, dropout_seed, class_id),
                        control_flow_ops.no_op))

        # Calculate the loss to be reported.
        # Note, the loss is calculated from the prediction considering dropouts, so
        # that the value might look staggering over steps when the dropout ratio is
        # high. eval_loss might be referred instead in the aspect of convergence.
        return control_flow_ops.group(*ensemble_update_ops)
Example #51
0
def prefetch_queue(tensors,
                   capacity=8,
                   num_threads=1,
                   dynamic_pad=False,
                   shared_name=None,
                   name=None):
    """Creates a queue to prefetch tensors from `tensors`.

  A queue runner for enqueuing tensors into the prefetch_queue is automatically
  added to the TF QueueRunners collection.

  Example:
  This is for example useful to pre-assemble input batches read with
  `tf.compat.v1.train.batch()` and enqueue the pre-assembled batches.  Ops that
  dequeue
  from the pre-assembled queue will not pay the cost of assembling the batch.

  images, labels = tf.compat.v1.train.batch([image, label], batch_size=32,
  num_threads=4)
  batch_queue = prefetch_queue([images, labels])
  images, labels = batch_queue.dequeue()
  logits = Net(images)
  loss = Loss(logits, labels)

  Args:
    tensors: A list or dictionary of `Tensors` to enqueue in the buffer.
    capacity: An integer. The maximum number of elements in the queue.
    num_threads: An integer.  Number of threads running the enqueue op.
    dynamic_pad: Boolean.  Whether to allow variable dimensions in input shapes.
    shared_name: (optional). If set, this queue will be shared under the given
      name across multiple sessions.
    name: (Optional) A name for the operations.

  Returns:
    A queue from which you can dequeue tensors with the same type and shape
    as `tensors`.
  """
    if isinstance(tensors, dict):
        # Need to wrap the keys and values in list() since Python3 returns views.
        # We sort the keys so the order is consistent across runs.
        names = list(sorted(tensors.keys()))
        tensor_list = list([tensors[n] for n in names])
    else:
        names = None
        tensor_list = tensors

    with ops.name_scope(name, "prefetch_queue", tensor_list) as name:
        dtypes = [t.dtype for t in tensor_list]
        shapes = [t.get_shape() for t in tensor_list]
        queue = _which_queue(dynamic_pad)(capacity=capacity,
                                          dtypes=dtypes,
                                          shapes=shapes,
                                          names=names,
                                          shared_name=shared_name)
        enqueue_op = queue.enqueue(tensors)
        queue_runner.add_queue_runner(
            queue_runner.QueueRunner(queue, [enqueue_op] * num_threads))
        summary.scalar(
            "fraction_of_%d_full" % capacity,
            math_ops.cast(queue.size(), _dtypes.float32) * (1. / capacity))
        return queue
Example #52
0
    def _model_fn(features, labels, mode):
        """Function that returns predictions, training loss, and training op."""

        if (isinstance(features, ops.Tensor)
                or isinstance(features, sparse_tensor.SparseTensor)):
            features = {'features': features}
        if feature_columns:
            features = features.copy()

            if output_type == ModelBuilderOutputType.MODEL_FN_OPS:
                features.update(
                    layers.transform_features(features, feature_columns))
            else:
                for fc in feature_columns:
                    tensor = fc_core._transform_features(features, [fc])[fc]  # pylint: disable=protected-access
                    features[fc.name] = tensor

        weights = None
        if weights_name and weights_name in features:
            weights = features.pop(weights_name)

        keys = None
        if keys_name and keys_name in features:
            keys = features.pop(keys_name)

        # If we're doing eval, optionally ignore device_assigner.
        # Also ignore device assigner if we're exporting (mode == INFER)
        dev_assn = device_assigner
        if (mode == model_fn_lib.ModeKeys.INFER
                or (local_eval and mode == model_fn_lib.ModeKeys.EVAL)):
            dev_assn = None

        graph_builder = graph_builder_class(params, device_assigner=dev_assn)

        logits, tree_paths, regression_variance = graph_builder.inference_graph(
            features)

        summary.scalar('average_tree_size', graph_builder.average_size())
        # For binary classification problems, convert probabilities to logits.
        # Includes hack to get around the fact that a probability might be 0 or 1.
        if not params.regression and params.num_classes == 2:
            class_1_probs = array_ops.slice(logits, [0, 1], [-1, 1])
            logits = math_ops.log(
                math_ops.maximum(
                    class_1_probs /
                    math_ops.maximum(1.0 - class_1_probs, EPSILON), EPSILON))

        # labels might be None if we're doing prediction (which brings up the
        # question of why we force everything to adhere to a single model_fn).
        training_graph = None
        training_hooks = []
        if labels is not None and mode == model_fn_lib.ModeKeys.TRAIN:
            with ops.control_dependencies([logits.op]):
                training_graph = control_flow_ops.group(
                    graph_builder.training_graph(features,
                                                 labels,
                                                 input_weights=weights,
                                                 num_trainers=num_trainers,
                                                 trainer_id=trainer_id),
                    state_ops.assign_add(training_util.get_global_step(), 1))

        # Put weights back in
        if weights is not None:
            features[weights_name] = weights

        # TensorForest's training graph isn't calculated directly from the loss
        # like many other models.
        def _train_fn(unused_loss):
            return training_graph

        # Ops are run in lexigraphical order of their keys. Run the resource
        # clean-up op last.
        all_handles = graph_builder.get_all_resource_handles()
        ops_at_end = {
            '9: clean up resources':
            control_flow_ops.group(*[
                resource_variable_ops.destroy_resource_op(handle)
                for handle in all_handles
            ])
        }

        if report_feature_importances:
            ops_at_end['1: feature_importances'] = (
                graph_builder.feature_importances())

        training_hooks = [TensorForestRunOpAtEndHook(ops_at_end)]

        if output_type == ModelBuilderOutputType.MODEL_FN_OPS:
            model_ops = model_head.create_model_fn_ops(features=features,
                                                       labels=labels,
                                                       mode=mode,
                                                       train_op_fn=_train_fn,
                                                       logits=logits,
                                                       scope=head_scope)

            if early_stopping_rounds:
                training_hooks.append(
                    TensorForestLossHook(early_stopping_rounds,
                                         early_stopping_loss_threshold=
                                         early_stopping_loss_threshold,
                                         loss_op=model_ops.loss))

            model_ops.training_hooks.extend(training_hooks)

            if keys is not None:
                model_ops.predictions[keys_name] = keys

            if params.inference_tree_paths:
                model_ops.predictions[TREE_PATHS_PREDICTION_KEY] = tree_paths

            model_ops.predictions[
                VARIANCE_PREDICTION_KEY] = regression_variance

            if include_all_in_serving:
                # In order to serve the variance we need to add the prediction dict
                # to output_alternatives dict.
                if not model_ops.output_alternatives:
                    model_ops.output_alternatives = {}
                model_ops.output_alternatives[ALL_SERVING_KEY] = (
                    constants.ProblemType.UNSPECIFIED, model_ops.predictions)

            return model_ops

        else:
            # Estimator spec
            estimator_spec = model_head.create_estimator_spec(
                features=features,
                mode=mode,
                labels=labels,
                train_op_fn=_train_fn,
                logits=logits)

            if early_stopping_rounds:
                training_hooks.append(
                    TensorForestLossHook(early_stopping_rounds,
                                         early_stopping_loss_threshold=
                                         early_stopping_loss_threshold,
                                         loss_op=estimator_spec.loss))

            estimator_spec = estimator_spec._replace(
                training_hooks=training_hooks +
                list(estimator_spec.training_hooks))
            if keys is not None:
                estimator_spec.predictions[keys_name] = keys
            if params.inference_tree_paths:
                estimator_spec.predictions[
                    TREE_PATHS_PREDICTION_KEY] = tree_paths
            estimator_spec.predictions[
                VARIANCE_PREDICTION_KEY] = regression_variance

            if include_all_in_serving:
                outputs = estimator_spec.export_outputs
                if not outputs:
                    outputs = {}
                outputs = {
                    ALL_SERVING_KEY: PredictOutput(estimator_spec.predictions)
                }
                print(estimator_spec.export_outputs)
                # In order to serve the variance we need to add the prediction dict
                # to output_alternatives dict.
                estimator_spec = estimator_spec._replace(
                    export_outputs=outputs)

            return estimator_spec
Example #53
0
def acgan_discriminator_loss(discriminator_real_classification_logits,
                             discriminator_gen_classification_logits,
                             one_hot_labels,
                             label_smoothing=0.0,
                             real_weights=1.0,
                             generated_weights=1.0,
                             scope=None,
                             loss_collection=ops.GraphKeys.LOSSES,
                             reduction=losses.Reduction.SUM_BY_NONZERO_WEIGHTS,
                             add_summaries=False):
    """ACGAN loss for the discriminator.

  The ACGAN loss adds a classification loss to the conditional discriminator.
  Therefore, the discriminator must output a tuple consisting of
    (1) the real/fake prediction and
    (2) the logits for the classification (usually the last conv layer,
        flattened).

  For more details:
    ACGAN: https://arxiv.org/abs/1610.09585

  Args:
    discriminator_real_classification_logits: Classification logits for real
      data.
    discriminator_gen_classification_logits: Classification logits for generated
      data.
    one_hot_labels: A Tensor holding one-hot labels for the batch.
    label_smoothing: A float in [0, 1]. If greater than 0, smooth the labels for
      "discriminator on real data" as suggested in
      https://arxiv.org/pdf/1701.00160
    real_weights: Optional `Tensor` whose rank is either 0, or the same rank as
      `discriminator_real_outputs`, and must be broadcastable to
      `discriminator_real_outputs` (i.e., all dimensions must be either `1`, or
      the same as the corresponding dimension).
    generated_weights: Same as `real_weights`, but for
      `discriminator_gen_classification_logits`.
    scope: The scope for the operations performed in computing the loss.
    loss_collection: collection to which this loss will be added.
    reduction: A `tf.compat.v1.losses.Reduction` to apply to loss.
    add_summaries: Whether or not to add summaries for the loss.

  Returns:
    A loss Tensor. Shape depends on `reduction`.

  Raises:
    TypeError: If the discriminator does not output a tuple.
  """
    with ops.name_scope(
            scope, 'acgan_discriminator_loss',
        (discriminator_real_classification_logits,
         discriminator_gen_classification_logits, one_hot_labels)) as scope:
        loss_on_generated = losses.softmax_cross_entropy(
            one_hot_labels,
            discriminator_gen_classification_logits,
            weights=generated_weights,
            scope=scope,
            loss_collection=None,
            reduction=reduction)
        loss_on_real = losses.softmax_cross_entropy(
            one_hot_labels,
            discriminator_real_classification_logits,
            weights=real_weights,
            label_smoothing=label_smoothing,
            scope=scope,
            loss_collection=None,
            reduction=reduction)
        loss = loss_on_generated + loss_on_real
        util.add_loss(loss, loss_collection)

        if add_summaries:
            summary.scalar('discriminator_gen_ac_loss', loss_on_generated)
            summary.scalar('discriminator_real_ac_loss', loss_on_real)
            summary.scalar('discriminator_ac_loss', loss)

    return loss
Example #54
0
def wasserstein_gradient_penalty(
        real_data,
        generated_data,
        generator_inputs,
        discriminator_fn,
        discriminator_scope,
        epsilon=1e-10,
        target=1.0,
        one_sided=False,
        weights=1.0,
        scope=None,
        loss_collection=ops.GraphKeys.LOSSES,
        reduction=losses.Reduction.SUM_BY_NONZERO_WEIGHTS,
        add_summaries=False):
    """The gradient penalty for the Wasserstein discriminator loss.

  See `Improved Training of Wasserstein GANs`
  (https://arxiv.org/abs/1704.00028) for more details.

  Args:
    real_data: Real data.
    generated_data: Output of the generator.
    generator_inputs: Exact argument to pass to the generator, which is used as
      optional conditioning to the discriminator.
    discriminator_fn: A discriminator function that conforms to TF-GAN API.
    discriminator_scope: If not `None`, reuse discriminators from this scope.
    epsilon: A small positive number added for numerical stability when
      computing the gradient norm.
    target: Optional Python number or `Tensor` indicating the target value of
      gradient norm. Defaults to 1.0.
    one_sided: If `True`, penalty proposed in https://arxiv.org/abs/1709.08894
      is used. Defaults to `False`.
    weights: Optional `Tensor` whose rank is either 0, or the same rank as
      `real_data` and `generated_data`, and must be broadcastable to them (i.e.,
      all dimensions must be either `1`, or the same as the corresponding
      dimension).
    scope: The scope for the operations performed in computing the loss.
    loss_collection: collection to which this loss will be added.
    reduction: A `tf.compat.v1.losses.Reduction` to apply to loss.
    add_summaries: Whether or not to add summaries for the loss.

  Returns:
    A loss Tensor. The shape depends on `reduction`.

  Raises:
    ValueError: If the rank of data Tensors is unknown.
  """
    with ops.name_scope(scope, 'wasserstein_gradient_penalty',
                        (real_data, generated_data)) as scope:
        real_data = ops.convert_to_tensor(real_data)
        generated_data = ops.convert_to_tensor(generated_data)
        if real_data.shape.ndims is None:
            raise ValueError('`real_data` can\'t have unknown rank.')
        if generated_data.shape.ndims is None:
            raise ValueError('`generated_data` can\'t have unknown rank.')

        differences = generated_data - real_data
        batch_size = differences.shape.dims[0].value or array_ops.shape(
            differences)[0]
        alpha_shape = [batch_size] + [1] * (differences.shape.ndims - 1)
        alpha = random_ops.random_uniform(shape=alpha_shape)
        interpolates = real_data + (alpha * differences)

        with ops.name_scope(
                None):  # Clear scope so update ops are added properly.
            # Reuse variables if variables already exists.
            with variable_scope.variable_scope(
                    discriminator_scope,
                    'gpenalty_dscope',
                    reuse=variable_scope.AUTO_REUSE):
                disc_interpolates = discriminator_fn(interpolates,
                                                     generator_inputs)

        if isinstance(disc_interpolates, tuple):
            # ACGAN case: disc outputs more than one tensor
            disc_interpolates = disc_interpolates[0]

        gradients = gradients_impl.gradients(disc_interpolates,
                                             interpolates)[0]
        gradient_squares = math_ops.reduce_sum(
            math_ops.square(gradients),
            axis=list(range(1, gradients.shape.ndims)))
        # Propagate shape information, if possible.
        if isinstance(batch_size, int):
            gradient_squares.set_shape([batch_size] +
                                       gradient_squares.shape.as_list()[1:])
        # For numerical stability, add epsilon to the sum before taking the square
        # root. Note tf.norm does not add epsilon.
        slopes = math_ops.sqrt(gradient_squares + epsilon)
        penalties = slopes / target - 1.0
        if one_sided:
            penalties = math_ops.maximum(0., penalties)
        penalties_squared = math_ops.square(penalties)
        penalty = losses.compute_weighted_loss(penalties_squared,
                                               weights,
                                               scope=scope,
                                               loss_collection=loss_collection,
                                               reduction=reduction)

        if add_summaries:
            summary.scalar('gradient_penalty_loss', penalty)

        return penalty
Example #55
0
  def _train_model(self, input_fn, hooks, saving_listeners):
    worker_hooks = []
    with ops.Graph().as_default() as g, g.device(self._device_fn):
      random_seed.set_random_seed(self._config.tf_random_seed)
      global_step_tensor = self._create_and_assert_global_step(g)
      global_step_read_tensor = training_util._get_or_create_global_step_read()  # pylint: disable=protected-access
      features, labels = self._get_features_and_labels_from_input_fn(
          input_fn, model_fn_lib.ModeKeys.TRAIN)
      with ops.control_dependencies([global_step_read_tensor]):
        estimator_spec = self._call_model_fn(
            features, labels, model_fn_lib.ModeKeys.TRAIN, self.config)
      # Check if the user created a loss summary, and add one if they didn't.
      # We assume here that the summary is called 'loss'. If it is not, we will
      # make another one with the name 'loss' to ensure it shows up in the right
      # graph in TensorBoard.
      if not any([x.op.name == 'loss'
                  for x in ops.get_collection(ops.GraphKeys.SUMMARIES)]):
        summary.scalar('loss', estimator_spec.loss)
      ops.add_to_collection(ops.GraphKeys.LOSSES, estimator_spec.loss)
      worker_hooks.extend(hooks)
      worker_hooks.extend([
          training.NanTensorHook(estimator_spec.loss),
          training.LoggingTensorHook(
              {
                  'loss': estimator_spec.loss,
                  'step': global_step_tensor
              },
              every_n_iter=100)
      ])
      worker_hooks.extend(estimator_spec.training_hooks)

      if not (estimator_spec.scaffold.saver or
              ops.get_collection(ops.GraphKeys.SAVERS)):
        ops.add_to_collection(
            ops.GraphKeys.SAVERS,
            training.Saver(
                sharded=True,
                max_to_keep=self._config.keep_checkpoint_max,
                keep_checkpoint_every_n_hours=(
                    self._config.keep_checkpoint_every_n_hours),
                defer_build=True,
                save_relative_paths=True))

      chief_hooks = []
      all_hooks = worker_hooks + list(estimator_spec.training_chief_hooks)
      saver_hooks = [
          h for h in all_hooks if isinstance(h, training.CheckpointSaverHook)]
      if (self._config.save_checkpoints_secs or
          self._config.save_checkpoints_steps):
        if not saver_hooks:
          chief_hooks = [
              training.CheckpointSaverHook(
                  self._model_dir,
                  save_secs=self._config.save_checkpoints_secs,
                  save_steps=self._config.save_checkpoints_steps,
                  scaffold=estimator_spec.scaffold)
          ]
          saver_hooks = [chief_hooks[0]]
      if saving_listeners:
        if not saver_hooks:
          raise ValueError(
              'There should be a CheckpointSaverHook to use saving_listeners. '
              'Please set one of the RunConfig.save_checkpoints_steps or '
              'RunConfig.save_checkpoints_secs.')
        else:
          # It is expected to have one CheckpointSaverHook. If multiple, we pick
          # up the first one to add listener.
          saver_hooks[0]._listeners.extend(saving_listeners)  # pylint: disable=protected-access
      with training.MonitoredTrainingSession(
          master=self._config.master,
          is_chief=self._config.is_chief,
          checkpoint_dir=self._model_dir,
          scaffold=estimator_spec.scaffold,
          hooks=worker_hooks,
          chief_only_hooks=(
              tuple(chief_hooks) + tuple(estimator_spec.training_chief_hooks)),
          save_checkpoint_secs=0,  # Saving is handled by a hook.
          save_summaries_steps=self._config.save_summary_steps,
          config=self._session_config,
          log_step_count_steps=self._config.log_step_count_steps) as mon_sess:
        loss = None
        while not mon_sess.should_stop():
          _, loss = mon_sess.run([estimator_spec.train_op, estimator_spec.loss])
      return loss
Example #56
0
def minimax_discriminator_loss(
        discriminator_real_outputs,
        discriminator_gen_outputs,
        label_smoothing=0.25,
        real_weights=1.0,
        generated_weights=1.0,
        scope=None,
        loss_collection=ops.GraphKeys.LOSSES,
        reduction=losses.Reduction.SUM_BY_NONZERO_WEIGHTS,
        add_summaries=False):
    """Original minimax discriminator loss for GANs, with label smoothing.

  Note that the authors don't recommend using this loss. A more practically
  useful loss is `modified_discriminator_loss`.

  L = - real_weights * log(sigmoid(D(x)))
      - generated_weights * log(1 - sigmoid(D(G(z))))

  See `Generative Adversarial Nets` (https://arxiv.org/abs/1406.2661) for more
  details.

  Args:
    discriminator_real_outputs: Discriminator output on real data.
    discriminator_gen_outputs: Discriminator output on generated data. Expected
      to be in the range of (-inf, inf).
    label_smoothing: The amount of smoothing for positive labels. This technique
      is taken from `Improved Techniques for Training GANs`
      (https://arxiv.org/abs/1606.03498). `0.0` means no smoothing.
    real_weights: Optional `Tensor` whose rank is either 0, or the same rank as
      `real_data`, and must be broadcastable to `real_data` (i.e., all
      dimensions must be either `1`, or the same as the corresponding
      dimension).
    generated_weights: Same as `real_weights`, but for `generated_data`.
    scope: The scope for the operations performed in computing the loss.
    loss_collection: collection to which this loss will be added.
    reduction: A `tf.compat.v1.losses.Reduction` to apply to loss.
    add_summaries: Whether or not to add summaries for the loss.

  Returns:
    A loss Tensor. The shape depends on `reduction`.
  """
    with ops.name_scope(
            scope, 'discriminator_minimax_loss',
        (discriminator_real_outputs, discriminator_gen_outputs, real_weights,
         generated_weights, label_smoothing)) as scope:

        # -log((1 - label_smoothing) - sigmoid(D(x)))
        loss_on_real = losses.sigmoid_cross_entropy(
            array_ops.ones_like(discriminator_real_outputs),
            discriminator_real_outputs,
            real_weights,
            label_smoothing,
            scope,
            loss_collection=None,
            reduction=reduction)
        # -log(- sigmoid(D(G(x))))
        loss_on_generated = losses.sigmoid_cross_entropy(
            array_ops.zeros_like(discriminator_gen_outputs),
            discriminator_gen_outputs,
            generated_weights,
            scope=scope,
            loss_collection=None,
            reduction=reduction)

        loss = loss_on_real + loss_on_generated
        util.add_loss(loss, loss_collection)

        if add_summaries:
            summary.scalar('discriminator_gen_minimax_loss', loss_on_generated)
            summary.scalar('discriminator_real_minimax_loss', loss_on_real)
            summary.scalar('discriminator_minimax_loss', loss)

    return loss
Example #57
0
    def create_estimator_spec(self,
                              features,
                              mode,
                              logits,
                              labels=None,
                              train_op_fn=None):
        """See `Head`."""
        # Predict.
        with ops.name_scope(self._name, 'head'):
            logits = _check_logits(logits, self._logits_dimension)
            predictions = {prediction_keys.PredictionKeys.PREDICTIONS: logits}
            if mode == model_fn.ModeKeys.PREDICT:
                regression_output = export_output.RegressionOutput(
                    value=logits)
                return model_fn.EstimatorSpec(
                    mode=model_fn.ModeKeys.PREDICT,
                    predictions=predictions,
                    export_outputs={
                        _DEFAULT_SERVING_KEY:
                        regression_output,
                        _REGRESS_SERVING_KEY:
                        regression_output,
                        _PREDICT_SERVING_KEY:
                        export_output.PredictOutput(predictions)
                    })

            # Eval.
            unweighted_loss, _ = self.create_loss(features=features,
                                                  mode=mode,
                                                  logits=logits,
                                                  labels=labels)
            weights = _weights(features, self._weight_column)
            training_loss = losses.compute_weighted_loss(
                unweighted_loss,
                weights=weights,
                reduction=losses.Reduction.SUM)
            if mode == model_fn.ModeKeys.EVAL:
                # Estimator already adds a metric for loss.
                eval_metric_ops = {
                    metric_keys.MetricKeys.LOSS_MEAN:
                    metrics_lib.mean(unweighted_loss, weights=weights)
                }
                return model_fn.EstimatorSpec(mode=model_fn.ModeKeys.EVAL,
                                              predictions=predictions,
                                              loss=training_loss,
                                              eval_metric_ops=eval_metric_ops)

            # Train.
            if train_op_fn is None:
                raise ValueError('train_op_fn can not be None.')
        with ops.name_scope(''):
            summary.scalar(
                _summary_key(self._name, metric_keys.MetricKeys.LOSS),
                training_loss)
            summary.scalar(
                _summary_key(self._name, metric_keys.MetricKeys.LOSS_MEAN),
                losses.compute_weighted_loss(unweighted_loss,
                                             weights=weights,
                                             reduction=losses.Reduction.MEAN))
        return model_fn.EstimatorSpec(mode=model_fn.ModeKeys.TRAIN,
                                      predictions=predictions,
                                      loss=training_loss,
                                      train_op=train_op_fn(training_loss))
Example #58
0
    def _model_fn(features, labels, mode):
        """Function that returns predictions, training loss, and training op."""
        if (isinstance(features, ops.Tensor)
                or isinstance(features, sparse_tensor.SparseTensor)):
            features = {'features': features}
        if feature_columns:
            features = features.copy()
            features.update(
                layers.transform_features(features, feature_columns))

        weights = None
        if weights_name and weights_name in features:
            weights = features.pop(weights_name)

        keys = None
        if keys_name and keys_name in features:
            keys = features.pop(keys_name)

        # If we're doing eval, optionally ignore device_assigner.
        # Also ignore device assigner if we're exporting (mode == INFER)
        dev_assn = device_assigner
        if (mode == model_fn_lib.ModeKeys.INFER
                or (local_eval and mode == model_fn_lib.ModeKeys.EVAL)):
            dev_assn = None

        graph_builder = graph_builder_class(params, device_assigner=dev_assn)

        logits, tree_paths, regression_variance = graph_builder.inference_graph(
            features)

        summary.scalar('average_tree_size', graph_builder.average_size())
        # For binary classification problems, convert probabilities to logits.
        # Includes hack to get around the fact that a probability might be 0 or 1.
        if not params.regression and params.num_classes == 2:
            class_1_probs = array_ops.slice(logits, [0, 1], [-1, 1])
            logits = math_ops.log(
                math_ops.maximum(
                    class_1_probs /
                    math_ops.maximum(1.0 - class_1_probs, EPSILON), EPSILON))

        # labels might be None if we're doing prediction (which brings up the
        # question of why we force everything to adhere to a single model_fn).
        training_graph = None
        training_hooks = []
        if labels is not None and mode == model_fn_lib.ModeKeys.TRAIN:
            with ops.control_dependencies([logits.op]):
                training_graph = control_flow_ops.group(
                    graph_builder.training_graph(features,
                                                 labels,
                                                 input_weights=weights,
                                                 num_trainers=num_trainers,
                                                 trainer_id=trainer_id),
                    state_ops.assign_add(contrib_framework.get_global_step(),
                                         1))

        # Put weights back in
        if weights is not None:
            features[weights_name] = weights

        # TensorForest's training graph isn't calculated directly from the loss
        # like many other models.
        def _train_fn(unused_loss):
            return training_graph

        model_ops = model_head.create_model_fn_ops(features=features,
                                                   labels=labels,
                                                   mode=mode,
                                                   train_op_fn=_train_fn,
                                                   logits=logits,
                                                   scope=head_scope)

        # Ops are run in lexigraphical order of their keys. Run the resource
        # clean-up op last.
        all_handles = graph_builder.get_all_resource_handles()
        ops_at_end = {
            '9: clean up resources':
            control_flow_ops.group(*[
                resource_variable_ops.destroy_resource_op(handle)
                for handle in all_handles
            ])
        }

        if report_feature_importances:
            ops_at_end['1: feature_importances'] = (
                graph_builder.feature_importances())

        training_hooks.append(TensorForestRunOpAtEndHook(ops_at_end))

        if early_stopping_rounds:
            training_hooks.append(
                TensorForestLossHook(
                    early_stopping_rounds,
                    early_stopping_loss_threshold=early_stopping_loss_threshold,
                    loss_op=model_ops.loss))

        model_ops.training_hooks.extend(training_hooks)

        if keys is not None:
            model_ops.predictions[keys_name] = keys

        if params.inference_tree_paths:
            model_ops.predictions[TREE_PATHS_PREDICTION_KEY] = tree_paths

        if params.regression:
            model_ops.predictions[
                VARIANCE_PREDICTION_KEY] = regression_variance

        return model_ops
def _wals_factorization_model_function(features, labels, mode, params):
    """Model function for the WALSFactorization estimator.

  Args:
    features: Dictionary of features. See WALSMatrixFactorization.
    labels: Must be None.
    mode: A model_fn.ModeKeys object.
    params: Dictionary of parameters containing arguments passed to the
      WALSMatrixFactorization constructor.

  Returns:
    A ModelFnOps object.

  Raises:
    ValueError: If `mode` is not recognized.
  """
    assert labels is None
    use_factors_weights_cache = (
        params["use_factors_weights_cache_for_training"]
        and mode == model_fn.ModeKeys.TRAIN)
    use_gramian_cache = (params["use_gramian_cache_for_training"]
                         and mode == model_fn.ModeKeys.TRAIN)
    max_sweeps = params["max_sweeps"]
    model = factorization_ops.WALSModel(
        params["num_rows"],
        params["num_cols"],
        params["embedding_dimension"],
        unobserved_weight=params["unobserved_weight"],
        regularization=params["regularization_coeff"],
        row_init=params["row_init"],
        col_init=params["col_init"],
        num_row_shards=params["num_row_shards"],
        num_col_shards=params["num_col_shards"],
        row_weights=params["row_weights"],
        col_weights=params["col_weights"],
        use_factors_weights_cache=use_factors_weights_cache,
        use_gramian_cache=use_gramian_cache)

    # Get input rows and cols. We either update rows or columns depending on
    # the value of row_sweep, which is maintained using a session hook.
    input_rows = features[WALSMatrixFactorization.INPUT_ROWS]
    input_cols = features[WALSMatrixFactorization.INPUT_COLS]

    # TRAIN mode:
    if mode == model_fn.ModeKeys.TRAIN:
        # Training consists of the following ops (controlled using a SweepHook).
        # Before a row sweep:
        #   row_update_prep_gramian_op
        #   initialize_row_update_op
        # During a row sweep:
        #   update_row_factors_op
        # Before a col sweep:
        #   col_update_prep_gramian_op
        #   initialize_col_update_op
        # During a col sweep:
        #   update_col_factors_op

        is_row_sweep_var = variable_scope.variable(
            True,
            trainable=False,
            name="is_row_sweep",
            collections=[ops.GraphKeys.GLOBAL_VARIABLES])
        is_sweep_done_var = variable_scope.variable(
            False,
            trainable=False,
            name="is_sweep_done",
            collections=[ops.GraphKeys.GLOBAL_VARIABLES])
        completed_sweeps_var = variable_scope.variable(
            0,
            trainable=False,
            name=WALSMatrixFactorization.COMPLETED_SWEEPS,
            collections=[ops.GraphKeys.GLOBAL_VARIABLES])
        loss_var = variable_scope.variable(
            0.,
            trainable=False,
            name=WALSMatrixFactorization.LOSS,
            collections=[ops.GraphKeys.GLOBAL_VARIABLES])
        # The root weighted squared error =
        #   \\(\sqrt( \sum_{i,j} w_ij * (a_ij - r_ij)^2 / \sum_{i,j} w_ij )\\)
        rwse_var = variable_scope.variable(
            0.,
            trainable=False,
            name=WALSMatrixFactorization.RWSE,
            collections=[ops.GraphKeys.GLOBAL_VARIABLES])

        summary.scalar("loss", loss_var)
        summary.scalar("root_weighted_squared_error", rwse_var)
        summary.scalar("completed_sweeps", completed_sweeps_var)

        def create_axis_ops(sp_input, num_items, update_fn, axis_name):
            """Creates book-keeping and training ops for a given axis.

      Args:
        sp_input: A SparseTensor corresponding to the row or column batch.
        num_items: An integer, the total number of items of this axis.
        update_fn: A function that takes one argument (`sp_input`), and that
        returns a tuple of
          * new_factors: A float Tensor of the factor values after update.
          * update_op: a TensorFlow op which updates the factors.
          * loss: A float Tensor, the unregularized loss.
          * reg_loss: A float Tensor, the regularization loss.
          * sum_weights: A float Tensor, the sum of factor weights.
        axis_name: A string that specifies the name of the axis.

      Returns:
        A tuple consisting of:
          * reset_processed_items_op: A TensorFlow op, to be run before the
            beginning of any sweep. It marks all items as not-processed.
          * axis_train_op: A Tensorflow op, to be run during this axis' sweeps.
      """
            processed_items_init = array_ops.fill(dims=[num_items],
                                                  value=False)
            with ops.colocate_with(processed_items_init):
                processed_items = variable_scope.variable(
                    processed_items_init,
                    collections=[ops.GraphKeys.GLOBAL_VARIABLES],
                    trainable=False,
                    name="processed_" + axis_name)
            _, update_op, loss, reg, sum_weights = update_fn(sp_input)
            input_indices = sp_input.indices[:, 0]
            with ops.control_dependencies([
                    update_op,
                    state_ops.assign(loss_var, loss + reg),
                    state_ops.assign(rwse_var,
                                     math_ops.sqrt(loss / sum_weights))
            ]):
                with ops.colocate_with(processed_items):
                    update_processed_items = state_ops.scatter_update(
                        processed_items,
                        input_indices,
                        array_ops.ones_like(input_indices, dtype=dtypes.bool),
                        name="update_processed_{}_indices".format(axis_name))
                with ops.control_dependencies([update_processed_items]):
                    is_sweep_done = math_ops.reduce_all(processed_items)
                    axis_train_op = control_flow_ops.group(
                        state_ops.assign(is_sweep_done_var, is_sweep_done),
                        state_ops.assign_add(
                            completed_sweeps_var,
                            math_ops.cast(is_sweep_done, dtypes.int32)),
                        name="{}_sweep_train_op".format(axis_name))
            return processed_items.initializer, axis_train_op

        reset_processed_rows_op, row_train_op = create_axis_ops(
            input_rows, params["num_rows"],
            lambda x: model.update_row_factors(sp_input=x,
                                               transpose_input=False), "rows")
        reset_processed_cols_op, col_train_op = create_axis_ops(
            input_cols, params["num_cols"],
            lambda x: model.update_col_factors(sp_input=x,
                                               transpose_input=True), "cols")
        switch_op = control_flow_ops.group(state_ops.assign(
            is_row_sweep_var, math_ops.logical_not(is_row_sweep_var)),
                                           reset_processed_rows_op,
                                           reset_processed_cols_op,
                                           name="sweep_switch_op")
        row_prep_ops = [
            model.row_update_prep_gramian_op, model.initialize_row_update_op
        ]
        col_prep_ops = [
            model.col_update_prep_gramian_op, model.initialize_col_update_op
        ]
        init_op = model.worker_init
        sweep_hook = _SweepHook(is_row_sweep_var, is_sweep_done_var, init_op,
                                row_prep_ops, col_prep_ops, row_train_op,
                                col_train_op, switch_op)
        global_step_hook = _IncrementGlobalStepHook()
        training_hooks = [sweep_hook, global_step_hook]
        if max_sweeps is not None:
            training_hooks.append(_StopAtSweepHook(max_sweeps))

        return model_fn.ModelFnOps(mode=model_fn.ModeKeys.TRAIN,
                                   predictions={},
                                   loss=loss_var,
                                   eval_metric_ops={},
                                   train_op=control_flow_ops.no_op(),
                                   training_hooks=training_hooks)

    # INFER mode
    elif mode == model_fn.ModeKeys.INFER:
        projection_weights = features.get(
            WALSMatrixFactorization.PROJECTION_WEIGHTS)

        def get_row_projection():
            return model.project_row_factors(
                sp_input=input_rows,
                projection_weights=projection_weights,
                transpose_input=False)

        def get_col_projection():
            return model.project_col_factors(
                sp_input=input_cols,
                projection_weights=projection_weights,
                transpose_input=True)

        predictions = {
            WALSMatrixFactorization.PROJECTION_RESULT:
            control_flow_ops.cond(
                features[WALSMatrixFactorization.PROJECT_ROW],
                get_row_projection, get_col_projection)
        }

        return model_fn.ModelFnOps(mode=model_fn.ModeKeys.INFER,
                                   predictions=predictions,
                                   loss=None,
                                   eval_metric_ops={},
                                   train_op=control_flow_ops.no_op(),
                                   training_hooks=[])

    # EVAL mode
    elif mode == model_fn.ModeKeys.EVAL:

        def get_row_loss():
            _, _, loss, reg, _ = model.update_row_factors(
                sp_input=input_rows, transpose_input=False)
            return loss + reg

        def get_col_loss():
            _, _, loss, reg, _ = model.update_col_factors(sp_input=input_cols,
                                                          transpose_input=True)
            return loss + reg

        loss = control_flow_ops.cond(
            features[WALSMatrixFactorization.PROJECT_ROW], get_row_loss,
            get_col_loss)
        return model_fn.ModelFnOps(mode=model_fn.ModeKeys.EVAL,
                                   predictions={},
                                   loss=loss,
                                   eval_metric_ops={},
                                   train_op=control_flow_ops.no_op(),
                                   training_hooks=[])

    else:
        raise ValueError("mode=%s is not recognized." % str(mode))
Example #60
0
def _bt_model_fn(
    features,
    labels,
    mode,
    head,
    feature_columns,
    tree_hparams,
    n_batches_per_layer,
    config,
    closed_form_grad_and_hess_fn=None,
    example_id_column_name=None,
    # TODO(youngheek): replace this later using other options.
    train_in_memory=False,
    name='boosted_trees'):
  """Gradient Boosted Trees model_fn.

  Args:
    features: dict of `Tensor`.
    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of
      dtype `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    head: A `head_lib._Head` instance.
    feature_columns: Iterable of `feature_column._FeatureColumn` model inputs.
    tree_hparams: TODO. collections.namedtuple for hyper parameters.
    n_batches_per_layer: A `Tensor` of `int64`. Each layer is built after at
      least n_batches_per_layer accumulations.
    config: `RunConfig` object to configure the runtime settings.
    closed_form_grad_and_hess_fn: a function that accepts logits and labels
      and returns gradients and hessians. By default, they are created by
      tf.gradients() from the loss.
    example_id_column_name: Name of the feature for a unique ID per example.
      Currently experimental -- not exposed to public API.
    train_in_memory: `bool`, when true, it assumes the dataset is in memory,
      i.e., input_fn should return the entire dataset as a single batch, and
      also n_batches_per_layer should be set as 1.
    name: Name to use for the model.

  Returns:
      An `EstimatorSpec` instance.

  Raises:
    ValueError: mode or params are invalid, or features has the wrong type.
  """
  is_single_machine = (config.num_worker_replicas <= 1)

  sorted_feature_columns = sorted(feature_columns, key=lambda tc: tc.name)
  if train_in_memory:
    assert n_batches_per_layer == 1, (
        'When train_in_memory is enabled, input_fn should return the entire '
        'dataset as a single batch, and n_batches_per_layer should be set as '
        '1.')
    if (not config.is_chief or config.num_worker_replicas > 1 or
        config.num_ps_replicas > 0):
      raise ValueError('train_in_memory is supported only for '
                       'non-distributed training.')
  worker_device = control_flow_ops.no_op().device
  # maximum number of splits possible in the whole tree =2^(D-1)-1
  # TODO(youngheek): perhaps storage could be optimized by storing stats with
  # the dimension max_splits_per_layer, instead of max_splits (for the entire
  # tree).
  max_splits = (1 << tree_hparams.max_depth) - 1
  train_op = []
  with ops.name_scope(name) as name:
    # Prepare.
    global_step = training_util.get_or_create_global_step()
    bucket_size_list, feature_ids_list = _group_features_by_num_buckets(
        sorted_feature_columns)
    # Extract input features and set up cache for training.
    training_state_cache = None
    if mode == model_fn.ModeKeys.TRAIN and train_in_memory:
      # cache transformed features as well for in-memory training.
      batch_size = array_ops.shape(labels)[0]
      input_feature_list, input_cache_op = (
          _cache_transformed_features(features, sorted_feature_columns,
                                      batch_size))
      train_op.append(input_cache_op)
      training_state_cache = _CacheTrainingStatesUsingVariables(
          batch_size, head.logits_dimension)
    else:
      input_feature_list = _get_transformed_features(features,
                                                     sorted_feature_columns)
      if mode == model_fn.ModeKeys.TRAIN and example_id_column_name:
        example_ids = features[example_id_column_name]
        training_state_cache = _CacheTrainingStatesUsingHashTable(
            example_ids, head.logits_dimension)

    # Create Ensemble resources.
    tree_ensemble = boosted_trees_ops.TreeEnsemble(name=name)
    # Create logits.
    if mode != model_fn.ModeKeys.TRAIN:
      logits = boosted_trees_ops.predict(
          # For non-TRAIN mode, ensemble doesn't change after initialization,
          # so no local copy is needed; using tree_ensemble directly.
          tree_ensemble_handle=tree_ensemble.resource_handle,
          bucketized_features=input_feature_list,
          logits_dimension=head.logits_dimension)
    else:
      if is_single_machine:
        local_tree_ensemble = tree_ensemble
        ensemble_reload = control_flow_ops.no_op()
      else:
        # Have a local copy of ensemble for the distributed setting.
        with ops.device(worker_device):
          local_tree_ensemble = boosted_trees_ops.TreeEnsemble(
              name=name + '_local', is_local=True)
        # TODO(soroush): Do partial updates if this becomes a bottleneck.
        ensemble_reload = local_tree_ensemble.deserialize(
            *tree_ensemble.serialize())
      if training_state_cache:
        cached_tree_ids, cached_node_ids, cached_logits = (
            training_state_cache.lookup())
      else:
        # Always start from the beginning when no cache is set up.
        batch_size = array_ops.shape(labels)[0]
        cached_tree_ids, cached_node_ids, cached_logits = (
            array_ops.zeros([batch_size], dtype=dtypes.int32),
            array_ops.zeros([batch_size], dtype=dtypes.int32),
            array_ops.zeros(
                [batch_size, head.logits_dimension], dtype=dtypes.float32))
      with ops.control_dependencies([ensemble_reload]):
        (stamp_token, num_trees, num_finalized_trees, num_attempted_layers,
         last_layer_nodes_range) = local_tree_ensemble.get_states()
        summary.scalar('ensemble/num_trees', num_trees)
        summary.scalar('ensemble/num_finalized_trees', num_finalized_trees)
        summary.scalar('ensemble/num_attempted_layers', num_attempted_layers)

        partial_logits, tree_ids, node_ids = boosted_trees_ops.training_predict(
            tree_ensemble_handle=local_tree_ensemble.resource_handle,
            cached_tree_ids=cached_tree_ids,
            cached_node_ids=cached_node_ids,
            bucketized_features=input_feature_list,
            logits_dimension=head.logits_dimension)
      logits = cached_logits + partial_logits

    # Create training graph.
    def _train_op_fn(loss):
      """Run one training iteration."""
      if training_state_cache:
        train_op.append(training_state_cache.insert(tree_ids, node_ids, logits))
      if closed_form_grad_and_hess_fn:
        gradients, hessians = closed_form_grad_and_hess_fn(logits, labels)
      else:
        gradients = gradients_impl.gradients(loss, logits, name='Gradients')[0]
        hessians = gradients_impl.gradients(
            gradients, logits, name='Hessians')[0]

      stats_summaries_list = []
      for i, feature_ids in enumerate(feature_ids_list):
        num_buckets = bucket_size_list[i]
        summaries = [
            array_ops.squeeze(
                boosted_trees_ops.make_stats_summary(
                    node_ids=node_ids,
                    gradients=gradients,
                    hessians=hessians,
                    bucketized_features_list=[input_feature_list[f]],
                    max_splits=max_splits,
                    num_buckets=num_buckets),
                axis=0) for f in feature_ids
        ]
        stats_summaries_list.append(summaries)

      accumulators = []

      def grow_tree_from_stats_summaries(stats_summaries_list,
                                         feature_ids_list):
        """Updates ensemble based on the best gains from stats summaries."""
        node_ids_per_feature = []
        gains_list = []
        thresholds_list = []
        left_node_contribs_list = []
        right_node_contribs_list = []
        all_feature_ids = []

        assert len(stats_summaries_list) == len(feature_ids_list)

        for i, feature_ids in enumerate(feature_ids_list):
          (numeric_node_ids_per_feature, numeric_gains_list,
           numeric_thresholds_list, numeric_left_node_contribs_list,
           numeric_right_node_contribs_list) = (
               boosted_trees_ops.calculate_best_gains_per_feature(
                   node_id_range=last_layer_nodes_range,
                   stats_summary_list=stats_summaries_list[i],
                   l1=tree_hparams.l1,
                   l2=tree_hparams.l2,
                   tree_complexity=tree_hparams.tree_complexity,
                   min_node_weight=tree_hparams.min_node_weight,
                   max_splits=max_splits))

          all_feature_ids += feature_ids
          node_ids_per_feature += numeric_node_ids_per_feature
          gains_list += numeric_gains_list
          thresholds_list += numeric_thresholds_list
          left_node_contribs_list += numeric_left_node_contribs_list
          right_node_contribs_list += numeric_right_node_contribs_list

        grow_op = boosted_trees_ops.update_ensemble(
            # Confirm if local_tree_ensemble or tree_ensemble should be used.
            tree_ensemble.resource_handle,
            feature_ids=all_feature_ids,
            node_ids=node_ids_per_feature,
            gains=gains_list,
            thresholds=thresholds_list,
            left_node_contribs=left_node_contribs_list,
            right_node_contribs=right_node_contribs_list,
            learning_rate=tree_hparams.learning_rate,
            max_depth=tree_hparams.max_depth,
            pruning_mode=boosted_trees_ops.PruningMode.NO_PRUNING)
        return grow_op

      if train_in_memory and is_single_machine:
        train_op.append(distribute_lib.increment_var(global_step))
        train_op.append(
            grow_tree_from_stats_summaries(stats_summaries_list,
                                           feature_ids_list))
      else:
        dependencies = []

        for i, feature_ids in enumerate(feature_ids_list):
          stats_summaries = stats_summaries_list[i]
          accumulator = data_flow_ops.ConditionalAccumulator(
              dtype=dtypes.float32,
              # The stats consist of grads and hessians (the last dimension).
              shape=[len(feature_ids), max_splits, bucket_size_list[i], 2],
              shared_name='numeric_stats_summary_accumulator_' + str(i))
          accumulators.append(accumulator)

          apply_grad = accumulator.apply_grad(
              array_ops.stack(stats_summaries, axis=0), stamp_token)
          dependencies.append(apply_grad)

        def grow_tree_from_accumulated_summaries_fn():
          """Updates the tree with the best layer from accumulated summaries."""
          # Take out the accumulated summaries from the accumulator and grow.
          stats_summaries_list = []

          stats_summaries_list = [
              array_ops.unstack(accumulator.take_grad(1), axis=0)
              for accumulator in accumulators
          ]

          grow_op = grow_tree_from_stats_summaries(stats_summaries_list,
                                                   feature_ids_list)
          return grow_op

        with ops.control_dependencies(dependencies):
          train_op.append(distribute_lib.increment_var(global_step))
          if config.is_chief:
            min_accumulated = math_ops.reduce_min(
                array_ops.stack(
                    [acc.num_accumulated() for acc in accumulators]))

            train_op.append(
                control_flow_ops.cond(
                    math_ops.greater_equal(min_accumulated,
                                           n_batches_per_layer),
                    grow_tree_from_accumulated_summaries_fn,
                    control_flow_ops.no_op,
                    name='wait_until_n_batches_accumulated'))

      return control_flow_ops.group(train_op, name='train_op')

  estimator_spec = head.create_estimator_spec(
      features=features,
      mode=mode,
      labels=labels,
      train_op_fn=_train_op_fn,
      logits=logits)
  if mode == model_fn.ModeKeys.TRAIN:
    # Add an early stop hook.
    estimator_spec = estimator_spec._replace(
        training_hooks=estimator_spec.training_hooks +
        (_StopAtAttemptsHook(num_finalized_trees, num_attempted_layers,
                             tree_hparams.n_trees, tree_hparams.max_depth),))
  return estimator_spec