예제 #1
0
  def _testTypesForAdam(self, var, m, v, grad, use_gpu):
    self.setUp()
    with self.test_session(use_gpu=use_gpu):
      var_t = variables.Variable(var)
      m_t = variables.Variable(m)
      v_t = variables.Variable(v)

      t = 1
      beta1 = np.array(0.9, dtype=var.dtype)
      beta2 = np.array(0.999, dtype=var.dtype)
      beta1_power = beta1**t
      beta2_power = beta2**t
      lr = np.array(0.001, dtype=var.dtype)
      epsilon = np.array(1e-8, dtype=var.dtype)
      beta1_t = constant_op.constant(beta1, self._toType(var.dtype), [])
      beta2_t = constant_op.constant(beta2, self._toType(var.dtype), [])
      beta1_power_t = variables.Variable(beta1_power)
      beta2_power_t = variables.Variable(beta2_power)
      lr_t = constant_op.constant(lr, self._toType(var.dtype), [])
      epsilon_t = constant_op.constant(epsilon, self._toType(var.dtype), [])
      variables.initialize_all_variables().run()

      self.assertAllCloseAccordingToType(var, var_t.eval())
      new_var, _, _ = self._adamUpdateNumpy(var, grad, t, m, v,
                                            lr, beta1, beta2, epsilon)
      apply_adam = training_ops.apply_adam(var_t, m_t, v_t, beta1_power_t,
                                           beta2_power_t, lr_t,
                                           beta1_t, beta2_t, epsilon_t, grad)
      out = apply_adam.eval()
      self.assertShapeEqual(out, apply_adam)
      self.assertAllCloseAccordingToType(new_var, out)
예제 #2
0
  def test_multiple_random_3d_updates_results_in_right_dist(self):
    # Update with uniform 3-D rvs.  Resultant
    # histogram should be uniform.  Use only 3 bins because with many bins it
    # would be unlikely that all would be close to 1/n.  If someone ever wants
    # to test that, it would be better to check that the cdf was linear.
    nbins = [3]
    value_range = [1.0, 4.14159]
    with self.test_session() as sess:
      hist = variables.Variable(array_ops.zeros(nbins, dtype=dtypes.int32))
      new_values = array_ops.placeholder(dtypes.float32, shape=[4, 4, 4])
      hist_update = histogram_ops.histogram_fixed_width(hist, new_values,
                                                        value_range)
      variables.initialize_all_variables().run()

      for _ in range(100):
        # Map the rv: U[0, 1] --> U[value_range[0], value_range[1]].
        new_values_arr = (
            value_range[0] +
            (value_range[1] - value_range[0]) * self.rng.rand(4, 4, 4))

        # The new updated_hist_array is returned by the updating op.
        # hist should contain the updated values.
        updated_hist_array = sess.run(hist_update,
                                      feed_dict={new_values: new_values_arr})

    pmf = updated_hist_array / float(updated_hist_array.sum())
    np.testing.assert_allclose(1 / 3, pmf, atol=0.02)
예제 #3
0
  def _testTypesForFtrl(self, x, y, z, lr, grad, use_gpu=None, l1=0.0,
                        l2=0.0, lr_power=-0.5):
    self.setUp()
    with self.test_session(use_gpu=use_gpu):
      var = variables.Variable(x)
      accum = variables.Variable(y)
      linear = variables.Variable(z)
      variables.initialize_all_variables().run()

      self.assertAllCloseAccordingToType(x, var.eval())
      apply_ftrl = training_ops.apply_ftrl(var, accum, linear, grad, lr, l1, l2,
                                           lr_power)
      out = apply_ftrl.eval()
      self.assertShapeEqual(out, apply_ftrl)
      accum_update = y + grad * grad
      linear_update = z + grad - (accum_update ** (-lr_power) - y ** (
          -lr_power)) / lr * x
      quadratic = 1.0 / (accum_update ** (lr_power) * lr) + 2 * l2
      expected_out = np.array([(np.sign(
          linear_update[i]) * l1 - linear_update[i]) / (
              quadratic[i]) if np.abs(
                  linear_update[i]) > l1 else 0.0 for i in range(
                      linear_update.size)])
      self.assertAllCloseAccordingToType(accum_update, accum.eval())
      if x.dtype == np.float16:
        # The calculations here really are not very precise in float16.
        self.assertAllClose(linear_update, linear.eval(), rtol=2e-2, atol=2e-2)
        self.assertAllClose(expected_out, out, rtol=2e-2, atol=2e-2)
      else:
        self.assertAllClose(linear_update, linear.eval())
        self.assertAllClose(expected_out, out)
예제 #4
0
  def test_two_updates_on_constant_input(self):
    # Bins will be:
    #   (-inf, 1), [1, 2), [2, 3), [3, 4), [4, inf)
    nbins = [5]
    value_range = [0.0, 5.0]
    new_values_1 = [-1.0, 0.0, 1.5, 2.0, 5.0, 15]
    new_values_2 = [1.5, 4.5, 4.5, 4.5, 0.0, 0.0]
    expected_bin_counts_1 = [2, 1, 1, 0, 2]
    expected_bin_counts_2 = [4, 2, 1, 0, 5]
    with self.test_session() as sess:
      hist = variables.Variable(array_ops.zeros(nbins, dtype=dtypes.int32))
      new_values = array_ops.placeholder(dtypes.float32, shape=[6])
      hist_update = histogram_ops.histogram_fixed_width(hist, new_values,
                                                        value_range)
      variables.initialize_all_variables().run()
      updated_hist_array = sess.run(hist_update,
                                    feed_dict={new_values: new_values_1})

      # The new updated_hist_array is returned by the updating op.
      # hist should contain the updated values.
      self.assertAllClose(expected_bin_counts_1, updated_hist_array)
      self.assertAllClose(expected_bin_counts_1, hist.eval())

      updated_hist_array = sess.run(hist_update,
                                    feed_dict={new_values: new_values_2})
      self.assertAllClose(expected_bin_counts_2, updated_hist_array)
      self.assertAllClose(expected_bin_counts_2, hist.eval())
예제 #5
0
  def _CheckDecay(self, ema, actual_decay, dim):
    tens = _Repeat(10.0, dim)
    thirties = _Repeat(30.0, dim)
    var0 = variables.Variable(tens, name="v0")
    var1 = variables.Variable(thirties, name="v1")
    variables.initialize_all_variables().run()
    # Note that tensor2 is not a Variable but just a plain Tensor resulting
    # from the sum operation.
    tensor2 = var0 + var1
    update = ema.apply([var0, var1, tensor2])
    avg0 = ema.average(var0)
    avg1 = ema.average(var1)
    avg2 = ema.average(tensor2)

    self.assertFalse(avg0 in variables.trainable_variables())
    self.assertFalse(avg1 in variables.trainable_variables())
    self.assertFalse(avg2 in variables.trainable_variables())
    variables.initialize_all_variables().run()

    self.assertEqual("v0/ExponentialMovingAverage:0", avg0.name)
    self.assertEqual("v1/ExponentialMovingAverage:0", avg1.name)
    self.assertEqual("add/ExponentialMovingAverage:0", avg2.name)

    # Check initial values.
    self.assertAllClose(tens, var0.eval())
    self.assertAllClose(thirties, var1.eval())
    self.assertAllClose(_Repeat(10.0 + 30.0, dim), tensor2.eval())

    # Check that averages are initialized correctly.
    self.assertAllClose(tens, avg0.eval())
    self.assertAllClose(thirties, avg1.eval())
    # Note that averages of Tensor's initialize to zeros_like since no value
    # of the Tensor is known because the Op has not been run (yet).
    self.assertAllClose(_Repeat(0.0, dim), avg2.eval())

    # Update the averages and check.
    update.run()
    dk = actual_decay

    expected = _Repeat(10.0 * dk + 10.0 * (1 - dk), dim)
    self.assertAllClose(expected, avg0.eval())
    expected = _Repeat(30.0 * dk + 30.0 * (1 - dk), dim)
    self.assertAllClose(expected, avg1.eval())
    expected = _Repeat(0.0 * dk + (10.0 + 30.0) * (1 - dk), dim)
    self.assertAllClose(expected, avg2.eval())

    # Again, update the averages and check.
    update.run()
    expected = _Repeat((10.0 * dk + 10.0 * (1 - dk)) * dk + 10.0 * (1 - dk),
                       dim)
    self.assertAllClose(expected, avg0.eval())
    expected = _Repeat((30.0 * dk + 30.0 * (1 - dk)) * dk + 30.0 * (1 - dk),
                       dim)
    self.assertAllClose(expected, avg1.eval())
    expected = _Repeat(((0.0 * dk + (10.0 + 30.0) * (1 - dk)) * dk +
                        (10.0 + 30.0) * (1 - dk)),
                       dim)
    self.assertAllClose(expected, avg2.eval())
  def testWithExistingEnsembleAndShrinkage(self):
    with self.test_session():
      # Add shrinkage config.
      learning_rate = 0.0001
      tree_ensemble = tree_config_pb2.DecisionTreeEnsembleConfig()
      # Add 10 trees with some weights.
      for i in range(0, 5):
        tree = tree_ensemble.trees.add()
        _append_to_leaf(tree.nodes.add().leaf, 0, -0.4)
        tree_ensemble.tree_weights.append(i + 1)
        meta = tree_ensemble.tree_metadata.add()
        meta.num_tree_weight_updates = 1
      tree_ensemble_handle = model_ops.tree_ensemble_variable(
          stamp_token=0,
          tree_ensemble_config=tree_ensemble.SerializeToString(),
          name="existing")

      # Create non-zero feature importance.
      feature_usage_counts = variables.Variable(
          initial_value=np.array([4, 7], np.int64),
          name="feature_usage_counts",
          trainable=False)
      feature_gains = variables.Variable(
          initial_value=np.array([0.2, 0.8], np.float32),
          name="feature_gains",
          trainable=False)

      resources.initialize_resources(resources.shared_resources()).run()
      variables.initialize_all_variables().run()

      output_ensemble = tree_config_pb2.DecisionTreeEnsembleConfig()
      with ops.control_dependencies([
          ensemble_optimizer_ops.add_trees_to_ensemble(
              tree_ensemble_handle,
              self._ensemble_to_add.SerializeToString(),
              feature_usage_counts, [1, 2],
              feature_gains, [0.5, 0.3], [[], []],
              learning_rate=learning_rate)
      ]):
        output_ensemble.ParseFromString(
            model_ops.tree_ensemble_serialize(tree_ensemble_handle)[1].eval())

      # The weights of previous trees stayed the same, new tree (LAST) is added
      # with shrinkage weight.
      self.assertAllClose([1.0, 2.0, 3.0, 4.0, 5.0, learning_rate],
                          output_ensemble.tree_weights)

      # Check that all number of updates are equal to 1 (e,g, no old tree weight
      # got adjusted.
      for i in range(0, 6):
        self.assertEqual(
            1, output_ensemble.tree_metadata[i].num_tree_weight_updates)

      # Ensure feature importance was aggregated correctly.
      self.assertAllEqual([5, 9], feature_usage_counts.eval())
      self.assertArrayNear(
          [0.2 + 0.5 * learning_rate, 0.8 + 0.3 * learning_rate],
          feature_gains.eval(), 1e-6)
예제 #7
0
 def _testTypes(self, x, alpha, delta, use_gpu=None):
   self.setUp()
   with self.test_session(use_gpu=use_gpu):
     var = variables.Variable(x)
     variables.initialize_all_variables().run()
     self.assertAllCloseAccordingToType(x, var.eval())
     apply_sgd = training_ops.apply_gradient_descent(var, alpha, delta)
     out = apply_sgd.eval()
     self.assertShapeEqual(out, apply_sgd)
     self.assertAllCloseAccordingToType(x - alpha * delta, out)
예제 #8
0
 def testAssignMovingAverage(self):
   with self.test_session():
     var = variables.Variable([10.0, 11.0])
     val = constant_op.constant([1.0, 2.0], types.float32)
     decay = 0.25
     assign = moving_averages.assign_moving_average(var, val, decay)
     variables.initialize_all_variables().run()
     self.assertAllClose([10.0, 11.0], var.eval())
     assign.op.run()
     self.assertAllClose([10.0 * 0.25 + 1.0 * (1.0 - 0.25),
                          11.0 * 0.25 + 2.0 * (1.0 - 0.25)],
                         var.eval())
예제 #9
0
  def _testTypesForAdagrad(self, x, y, lr, grad, use_gpu=None):
    self.setUp()
    with self.test_session(use_gpu=use_gpu):
      var = variables.Variable(x)
      accum = variables.Variable(y)
      variables.initialize_all_variables().run()

      self.assertAllCloseAccordingToType(x, var.eval())
      apply_adagrad = training_ops.apply_adagrad(var, accum, lr, grad)
      out = apply_adagrad.eval()
      self.assertShapeEqual(out, apply_adagrad)
      self.assertAllCloseAccordingToType(
          x - lr * grad * (y + grad * grad) ** (-0.5), out)
      self.assertAllCloseAccordingToType(y + grad * grad, accum.eval())
  def testKernelStateList(self):
    """Test that transition kernel works with list input to `state`."""
    num_chains = 2
    loc_one = variable_scope.get_variable(
        "loc_one", [num_chains],
        initializer=init_ops.zeros_initializer())
    loc_two = variable_scope.get_variable(
        "loc_two", [num_chains], initializer=init_ops.zeros_initializer())

    def target_log_prob_fn(loc_one, loc_two):
      loc = array_ops.stack([loc_one, loc_two])
      log_prob = mvn_tril_lib.MultivariateNormalTriL(
          loc=constant_op.constant([0., 0.]),
          scale_tril=constant_op.constant([[0.1, 0.1], [0.0, 0.1]])).log_prob(
              loc)
      return math_ops.reduce_sum(log_prob, 0)

    def proposal_fn(loc_one, loc_two):
      loc_one_proposal = mh.proposal_normal(scale=0.05)
      loc_two_proposal = mh.proposal_normal(scale=0.05)
      loc_one_sample, _ = loc_one_proposal(loc_one)
      loc_two_sample, _ = loc_two_proposal(loc_two)
      return [loc_one_sample, loc_two_sample], None

    new_state, _ = mh.kernel(
        target_log_prob_fn=target_log_prob_fn,
        proposal_fn=proposal_fn,
        current_state=[loc_one, loc_two],
        seed=12415)
    loc_one_update = loc_one.assign(new_state[0])
    loc_two_update = loc_two.assign(new_state[1])

    init = variables.initialize_all_variables()
    with self.test_session() as sess:
      sess.run(init)
      loc_one_samples = []
      loc_two_samples = []
      for _ in range(10000):
        loc_one_sample, loc_two_sample = sess.run(
            [loc_one_update, loc_two_update])
        loc_one_samples.append(loc_one_sample)
        loc_two_samples.append(loc_two_sample)

    loc_one_samples = np.array(loc_one_samples)
    loc_two_samples = np.array(loc_two_samples)
    loc_one_samples = loc_one_samples[1000:]  # drop samples for burn-in
    loc_two_samples = loc_two_samples[1000:]  # drop samples for burn-in

    self.assertAllClose(np.mean(loc_one_samples, 0),
                        np.array([0.] * num_chains),
                        rtol=1e-5, atol=1e-1)
    self.assertAllClose(np.mean(loc_two_samples, 0),
                        np.array([0.] * num_chains),
                        rtol=1e-5, atol=1e-1)
    self.assertAllClose(np.std(loc_one_samples, 0),
                        np.array([0.1] * num_chains),
                        rtol=1e-5, atol=1e-1)
    self.assertAllClose(np.std(loc_two_samples, 0),
                        np.array([0.1] * num_chains),
                        rtol=1e-5, atol=1e-1)
예제 #11
0
  def testDenseLayerAutoJit(self):
    """Tests dense layer compilation in auto-jit mode.

    Dense layer should be compiled into a single XlaCompile/XlaRun op pair in
    auto-jit mode.
    """

    os.environ["TF_XLA_FLAGS"] = (
        "--tf_xla_cpu_global_jit " + os.environ.get("TF_XLA_FLAGS", ""))
    config = config_pb2.ConfigProto()
    config.graph_options.optimizer_options.global_jit_level = (
        config_pb2.OptimizerOptions.ON_1)

    with self.session(config=config) as sess:
      x = array_ops.placeholder(shape=[None, None, 3], dtype=np.float32)
      y = layers.dense(x, 3)

      self.evaluate(variables.initialize_all_variables())
      run_metadata = config_pb2.RunMetadata()
      test_utils.RunWithWarmup(
          sess,
          y, {x: np.array([[[1, 2, 3], [4, 5, 6]], [[1, 2, 3], [4, 5, 6]]])},
          run_metadata=run_metadata,
          options=config_pb2.RunOptions(
              trace_level=config_pb2.RunOptions.FULL_TRACE))

    labels = GetRunMetadataLabels(run_metadata)
    self.assertEqual(1, self.countXlaOps(labels))
    self.assertFalse(InLabels(labels, "MatMult"))
예제 #12
0
  def testReadWrite(self):
    """Tests initialization, reading, and writing a resource variable."""
    with self.test_session() as session:
      with self.test_scope():
        with variable_scope.variable_scope("ascope", use_resource=True):
          x = variable_scope.get_variable(
              "x",
              shape=[],
              dtype=dtypes.float32,
              initializer=init_ops.constant_initializer(2))
          a = x.read_value()
          with ops.control_dependencies([a]):
            b = state_ops.assign(x, 47)
          with ops.control_dependencies([b]):
            c = x.read_value()
          with ops.control_dependencies([c]):
            d = state_ops.assign_add(x, 3)
          with ops.control_dependencies([d]):
            e = x.read_value()

      session.run(variables.initialize_all_variables())
      v1, v2, v3 = session.run([a, c, e])
      self.assertAllClose(2.0, v1)
      self.assertAllClose(47.0, v2)
      self.assertAllClose(50.0, v3)
예제 #13
0
  def testDenseLayerJitScopeUndefinedShape(self):
    """Tests that the dense layer node is properly compiled in jit scope.

    Dense layer uses shape op to get shape of input tensor if its shape is not
    fully defined. XLA does not cluster shape op with other operators. But in
    experimental_jit_scope, XLA is forced to compile shape op into its own
    cluster, causing dense layer to be split into TWO XlaLaunch ops.
    """

    with self.test_session() as sess:
      x = array_ops.placeholder(shape=[None, None, 3], dtype=np.float32)
      with jit_scope():
        y = layers.dense(x, 3)

      sess.run(variables.initialize_all_variables())
      run_metadata = config_pb2.RunMetadata()
      sess.run(
          y, {x: np.array([[[1, 2, 3], [4, 5, 6]], [[1, 2, 3], [4, 5, 6]]])},
          run_metadata=run_metadata,
          options=config_pb2.RunOptions(
              trace_level=config_pb2.RunOptions.FULL_TRACE))

    labels = GetRunMetadataLabels(run_metadata)
    self.assertEqual(2, XlaLaunchOpCount(labels))
    self.assertFalse(InLabels(labels, "ListDiff"))
예제 #14
0
  def testDenseLayerAutoJit(self):
    """Tests dense layer compilation in auto-jit mode.

    Dense layer should be compiled into a single XlaLaunch op in auto-jit mode.
    """

    os.environ["TF_XLA_FLAGS"] = ("--tf_xla_cpu_global_jit")
    config = config_pb2.ConfigProto()
    config.graph_options.optimizer_options.global_jit_level = (
        config_pb2.OptimizerOptions.ON_1)

    with self.test_session(config=config) as sess:
      x = array_ops.placeholder(shape=[None, None, 3], dtype=np.float32)
      y = layers.dense(x, 3)

      sess.run(variables.initialize_all_variables())
      run_metadata = config_pb2.RunMetadata()
      sess.run(
          y, {x: np.array([[[1, 2, 3], [4, 5, 6]], [[1, 2, 3], [4, 5, 6]]])},
          run_metadata=run_metadata,
          options=config_pb2.RunOptions(
              trace_level=config_pb2.RunOptions.FULL_TRACE))

    labels = GetRunMetadataLabels(run_metadata)
    self.assertEqual(1, XlaLaunchOpCount(labels))
    self.assertFalse(InLabels(labels, "ListDiff"))
예제 #15
0
 def testAcceptsRefs(self):
   var = variables.Variable(10)
   result = math_ops.scalar_mul(3, var)
   init = variables.initialize_all_variables()
   with self.test_session() as sess:
     sess.run(init)
     self.assertEqual(30, result.eval())
예제 #16
0
 def test_train_worker_monitor(self):
   # We need to explicitly set device due to check on non-chief workers
   # requiring all variables to have a device assigned.
   with tf.Graph().as_default() as g, g.device('/cpu:0'):
     global_step = tf.contrib.framework.create_global_step(g)
     train_op = tf.assign_add(global_step, 1)
     loss_op = tf.constant(2.0)
     tf.scalar_summary('loss', loss_op)
     # Add explicit "local" init op to initialize all variables
     # as there's no chief to init here.
     init_op = variables.initialize_all_variables()
     ops.add_to_collection(ops.GraphKeys.LOCAL_INIT_OP, init_op)
     # Create worker monitors where one should be active on the worker
     # and the other chief exclusive.
     chief_exclusive_monitor = _BaseMonitorWrapper(False)
     all_workers_monitor = _BaseMonitorWrapper(True)
     with self.test_session(g):
       loss = learn.graph_actions.train(
           g, output_dir=self._output_dir,
           global_step_tensor=global_step,
           train_op=train_op, loss_op=loss_op,
           supervisor_is_chief=False, steps=1,
           monitors=[chief_exclusive_monitor, all_workers_monitor])
     self.assertEqual(2.0, loss)
     self.assertTrue(not chief_exclusive_monitor.is_active and
                     all_workers_monitor.is_active,
                     'Only non-chief runnable monitor must have been active.')
     self.assertTrue(not chief_exclusive_monitor.has_step and
                     all_workers_monitor.has_step,
                     'Only non-chief runnable monitor must have a step.')
  def testKernelStateTensor(self):
    """Test that transition kernel works with tensor input to `state`."""
    loc = variable_scope.get_variable("loc", initializer=0.)

    def target_log_prob_fn(loc):
      return normal_lib.Normal(loc=0.0, scale=0.1).log_prob(loc)

    new_state, _ = mh.kernel(
        target_log_prob_fn=target_log_prob_fn,
        proposal_fn=mh.proposal_normal(scale=0.05),
        current_state=loc,
        seed=231251)
    loc_update = loc.assign(new_state)

    init = variables.initialize_all_variables()
    with self.test_session() as sess:
      sess.run(init)
      loc_samples = []
      for _ in range(2500):
        loc_sample = sess.run(loc_update)
        loc_samples.append(loc_sample)
    loc_samples = loc_samples[500:]  # drop samples for burn-in

    self.assertAllClose(np.mean(loc_samples), 0.0, rtol=1e-5, atol=1e-1)
    self.assertAllClose(np.std(loc_samples), 0.1, rtol=1e-5, atol=1e-1)
예제 #18
0
  def testTraining(self):
    """Tests a gradient descent step for a simple model."""
    with self.test_session() as session:
      with self.test_scope():
        with variable_scope.variable_scope("ascope", use_resource=True):
          w = variable_scope.get_variable(
              "w",
              shape=[4, 2],
              dtype=dtypes.float32,
              initializer=init_ops.constant_initializer(
                  np.array([[1, 2], [3, 4], [5, 6], [7, 8]], dtype=np.float32)))
          b = variable_scope.get_variable(
              "b",
              shape=[2],
              dtype=dtypes.float32,
              initializer=init_ops.constant_initializer(
                  np.array([2, 3], dtype=np.float32)))

          x = array_ops.placeholder(dtypes.float32, shape=[1, 4])
          y = math_ops.matmul(x, w) + b
          loss = math_ops.reduce_sum(y)
          optimizer = GradientDescentOptimizer(0.1)
          train = optimizer.minimize(loss)

      session.run(variables.initialize_all_variables())
      session.run(train, {x: np.array([[7, 3, 5, 9]], dtype=np.float32)})
      vw, vb = session.run([w, b])
      self.assertAllClose(
          np.array(
              [[0.3, 1.3], [2.7, 3.7], [4.5, 5.5], [6.1, 7.1]],
              dtype=np.float32),
          vw,
          rtol=1e-4)
      self.assertAllClose(np.array([1.9, 2.9], dtype=np.float32), vb, rtol=1e-4)
  def testDocstringExample(self):
    """Tests the simplified docstring example with multiple chains."""

    n = 2  # dimension of the problem

    # Generate 300 initial values randomly. Each of these would be an
    # independent starting point for a Markov chain.
    state = variable_scope.get_variable(
        "state", initializer=random_ops.random_normal(
            [300, n], mean=3.0, dtype=dtypes.float32, seed=42))

    # Computes the log(p(x)) for the unit normal density and ignores the
    # normalization constant.
    def log_density(x):
      return  - math_ops.reduce_sum(x * x, reduction_indices=-1) / 2.0

    # Initial log-density value
    state_log_density = variable_scope.get_variable(
        "state_log_density",
        initializer=log_density(state.initialized_value()))

    # A variable to store the log_acceptance_ratio:
    log_acceptance_ratio = variable_scope.get_variable(
        "log_acceptance_ratio",
        initializer=array_ops.zeros([300], dtype=dtypes.float32))

    # Generates random proposals by moving each coordinate uniformly and
    # independently in a box of size 2 centered around the current value.
    # Returns the new point and also the log of the Hastings ratio (the
    # ratio of the probability of going from the proposal to origin and the
    # probability of the reverse transition). When this ratio is 1, the value
    # may be omitted and replaced by None.
    def random_proposal(x):
      return (x + random_ops.random_uniform(
          array_ops.shape(x), minval=-1, maxval=1,
          dtype=x.dtype, seed=12)), None

    #  Create the op to propagate the chain for 100 steps.
    stepper = mh.evolve(
        state, state_log_density, log_acceptance_ratio,
        log_density, random_proposal, n_steps=100, seed=123)
    init = variables.initialize_all_variables()
    with self.test_session() as sess:
      sess.run(init)
      # Run the chains for a total of 1000 steps.
      for _ in range(10):
        sess.run(stepper)
      samples = sess.run(state)
      covariance = np.eye(n)
      # Verify that the estimated mean and covariance are close to the true
      # values.
      self.assertAlmostEqual(
          np.max(np.abs(np.mean(samples, 0)
                        - np.zeros(n))), 0,
          delta=0.1)
      self.assertAlmostEqual(
          np.max(np.abs(np.reshape(np.cov(samples, rowvar=False), [n**2])
                        - np.reshape(covariance, [n**2]))), 0,
          delta=0.2)
 def testVariables(self):
     with self.test_session():
         step = variables.Variable(1)
         assign_1 = step.assign(1)
         assign_2 = step.assign(2)
         assign_100 = step.assign(100)
         decayed_lr = learning_rate_decay.exponential_decay(0.1, step, 3, 0.96, staircase=True)
         variables.initialize_all_variables().run()
         # No change to learning rate
         assign_1.op.run()
         self.assertAllClose(decayed_lr.eval(), 0.1, 1e-6)
         assign_2.op.run()
         self.assertAllClose(decayed_lr.eval(), 0.1, 1e-6)
         # Decayed learning rate
         assign_100.op.run()
         expected = 0.1 * 0.96 ** (100 // 3)
         self.assertAllClose(decayed_lr.eval(), expected, 1e-6)
  def testWithExistingEnsemble(self):
    with self.test_session():
      # Create existing tree ensemble.
      tree_ensemble_handle = model_ops.tree_ensemble_variable(
          stamp_token=0,
          tree_ensemble_config=self._tree_ensemble.SerializeToString(),
          name="existing")
      # Create non-zero feature importance.
      feature_usage_counts = variables.Variable(
          initial_value=np.array([0, 4, 1], np.int64),
          name="feature_usage_counts",
          trainable=False)
      feature_gains = variables.Variable(
          initial_value=np.array([0.0, 0.3, 0.05], np.float32),
          name="feature_gains",
          trainable=False)

      resources.initialize_resources(resources.shared_resources()).run()
      variables.initialize_all_variables().run()
      output_ensemble = tree_config_pb2.DecisionTreeEnsembleConfig()
      with ops.control_dependencies([
          ensemble_optimizer_ops.add_trees_to_ensemble(
              tree_ensemble_handle,
              self._ensemble_to_add.SerializeToString(),
              feature_usage_counts, [1, 2, 0],
              feature_gains, [0.02, 0.1, 0.0], [[], []],
              learning_rate=1)
      ]):
        output_ensemble.ParseFromString(
            model_ops.tree_ensemble_serialize(tree_ensemble_handle)[1].eval())

      # Output.
      self.assertEqual(3, len(output_ensemble.trees))
      self.assertProtoEquals(self._tree_to_add, output_ensemble.trees[2])

      self.assertAllEqual([1.0, 1.0, 1.0], output_ensemble.tree_weights)

      self.assertEqual(2,
                       output_ensemble.tree_metadata[0].num_tree_weight_updates)
      self.assertEqual(3,
                       output_ensemble.tree_metadata[1].num_tree_weight_updates)
      self.assertEqual(1,
                       output_ensemble.tree_metadata[2].num_tree_weight_updates)
      self.assertAllEqual([1, 6, 1], feature_usage_counts.eval())
      self.assertArrayNear([0.02, 0.4, 0.05], feature_gains.eval(), 1e-6)
예제 #22
0
  def _testTypesForSparseAdagrad(self, x, y, lr, grad, indices):
    self.setUp()
    with self.test_session(use_gpu=False):
      var = variables.Variable(x)
      accum = variables.Variable(y)
      variables.initialize_all_variables().run()

      self.assertAllEqual(x, var.eval())
      sparse_apply_adagrad = training_ops.sparse_apply_adagrad(
          var, accum, lr, grad,
          constant_op.constant(indices, self._toType(indices.dtype)))
      out = sparse_apply_adagrad.eval()
      self.assertShapeEqual(out, sparse_apply_adagrad)

      for (i, index) in enumerate(indices):
        self.assertAllClose(
            x[index] - lr * grad[i] * (y[index] + grad[i] * grad[i]) ** (-0.5),
            var.eval()[index])
        self.assertAllEqual(y[index] + grad[i] * grad[i], accum.eval()[index])
  def testWithEmptyEnsembleAndShrinkage(self):
    with self.test_session():
      # Add shrinkage config.
      learning_rate = 0.0001
      tree_ensemble = tree_config_pb2.DecisionTreeEnsembleConfig()
      tree_ensemble_handle = model_ops.tree_ensemble_variable(
          stamp_token=0,
          tree_ensemble_config=tree_ensemble.SerializeToString(),
          name="existing")

      # Create zero feature importance.
      feature_usage_counts = variables.Variable(
          initial_value=np.array([0, 0], np.int64),
          name="feature_usage_counts",
          trainable=False)
      feature_gains = variables.Variable(
          initial_value=np.array([0.0, 0.0], np.float32),
          name="feature_gains",
          trainable=False)

      resources.initialize_resources(resources.shared_resources()).run()
      variables.initialize_all_variables().run()

      output_ensemble = tree_config_pb2.DecisionTreeEnsembleConfig()
      with ops.control_dependencies([
          ensemble_optimizer_ops.add_trees_to_ensemble(
              tree_ensemble_handle,
              self._ensemble_to_add.SerializeToString(),
              feature_usage_counts, [1, 2],
              feature_gains, [0.5, 0.3], [[], []],
              learning_rate=learning_rate)
      ]):
        output_ensemble.ParseFromString(
            model_ops.tree_ensemble_serialize(tree_ensemble_handle)[1].eval())

      # New tree is added with shrinkage weight.
      self.assertAllClose([learning_rate], output_ensemble.tree_weights)
      self.assertEqual(1,
                       output_ensemble.tree_metadata[0].num_tree_weight_updates)
      self.assertAllEqual([1, 2], feature_usage_counts.eval())
      self.assertArrayNear([0.5 * learning_rate, 0.3 * learning_rate],
                           feature_gains.eval(), 1e-6)
  def testWithEmptyEnsemble(self):
    with self.test_session():
      # Create an empty ensemble.
      tree_ensemble_handle = model_ops.tree_ensemble_variable(
          stamp_token=0, tree_ensemble_config="", name="empty")

      # Create zero feature importance.
      feature_usage_counts = variables.Variable(
          initial_value=array_ops.zeros([1], dtypes.int64),
          name="feature_usage_counts",
          trainable=False)
      feature_gains = variables.Variable(
          initial_value=array_ops.zeros([1], dtypes.float32),
          name="feature_gains",
          trainable=False)

      resources.initialize_resources(resources.shared_resources()).run()
      variables.initialize_all_variables().run()

      with ops.control_dependencies([
          ensemble_optimizer_ops.add_trees_to_ensemble(
              tree_ensemble_handle,
              self._ensemble_to_add.SerializeToString(),
              feature_usage_counts, [2],
              feature_gains, [0.4], [[]],
              learning_rate=1.0)
      ]):
        result = model_ops.tree_ensemble_serialize(tree_ensemble_handle)[1]

      # Output.
      output_ensemble = tree_config_pb2.DecisionTreeEnsembleConfig()
      output_ensemble.ParseFromString(result.eval())
      self.assertProtoEquals(self._tree_to_add, output_ensemble.trees[0])
      self.assertEqual(1, len(output_ensemble.trees))

      self.assertAllEqual([1.0], output_ensemble.tree_weights)

      self.assertEqual(1,
                       output_ensemble.tree_metadata[0].num_tree_weight_updates)

      self.assertAllEqual([2], feature_usage_counts.eval())
      self.assertArrayNear([0.4], feature_gains.eval(), 1e-6)
예제 #25
0
  def test_one_update_on_constant_2d_input(self):
    # Bins will be:
    #   (-inf, 1), [1, 2), [2, 3), [3, 4), [4, inf)
    nbins = [5]
    value_range = [0.0, 5.0]
    new_values = [[-1.0, 0.0, 1.5], [2.0, 5.0, 15]]
    expected_bin_counts = [2, 1, 1, 0, 2]
    with self.test_session() as sess:
      hist = variables.Variable(array_ops.zeros(nbins, dtype=dtypes.int32))
      hist_update = histogram_ops.histogram_fixed_width(hist, new_values,
                                                        value_range)
      variables.initialize_all_variables().run()
      self.assertTrue(hist.dtype.is_compatible_with(hist_update.dtype))
      updated_hist_array = sess.run(hist_update)

      # The new updated_hist_array is returned by the updating op.
      self.assertAllClose(expected_bin_counts, updated_hist_array)

      # hist should contain updated values, but eval() should not change it.
      self.assertAllClose(expected_bin_counts, hist.eval())
      self.assertAllClose(expected_bin_counts, hist.eval())
예제 #26
0
  def run(self,
          num_batches=None,
          graph=None,
          session=None,
          start_queues=True,
          initialize_variables=True,
          **kwargs):
    """Builds and runs the columns of the `DataFrame` and yields batches.

    This is a generator that yields a dictionary mapping column names to
    evaluated columns.

    Args:
      num_batches: the maximum number of batches to produce. If none specified,
        the returned value will iterate through infinite batches.
      graph: the `Graph` in which the `DataFrame` should be built.
      session: the `Session` in which to run the columns of the `DataFrame`.
      start_queues: if true, queues will be started before running and halted
        after producting `n` batches.
      initialize_variables: if true, variables will be initialized.
      **kwargs: Additional keyword arguments e.g. `num_epochs`.

    Yields:
      A dictionary, mapping column names to the values resulting from running
      each column for a single batch.
    """
    if graph is None:
      graph = ops.get_default_graph()
    with graph.as_default():
      if session is None:
        session = sess.Session()
      self_built = self.build(**kwargs)
      keys = list(self_built.keys())
      cols = list(self_built.values())
      if initialize_variables:
        if variables.local_variables():
          session.run(variables.initialize_local_variables())
        if variables.all_variables():
          session.run(variables.initialize_all_variables())
      if start_queues:
        coord = coordinator.Coordinator()
        threads = qr.start_queue_runners(sess=session, coord=coord)
      i = 0
      while num_batches is None or i < num_batches:
        i += 1
        try:
          values = session.run(cols)
          yield collections.OrderedDict(zip(keys, values))
        except errors.OutOfRangeError:
          break
      if start_queues:
        coord.request_stop()
        coord.join(threads)
예제 #27
0
 def testFloorDivGrad(self):
     with self.test_session():
         a = variables.Variable(2.0)
         b = variables.Variable(4.0)
         with self.test_session() as sess:
             sess.run(variables.initialize_all_variables())
             c_grad = gradients.gradients(math_ops.div_deprecated(a, b), [a, b])
             self.assertAllEqual([x.eval() for x in c_grad], [0.25, -0.125])
             c_grad = gradients.gradients(math_ops.div(a, b), [a, b])
             self.assertAllEqual([x.eval() for x in c_grad], [0.25, -0.125])
             c_grad = gradients.gradients(math_ops.floordiv(a, b), [a, b])
             self.assertAllEqual([None if x is None else x.eval() for x in c_grad], [None, None])
예제 #28
0
def tfadd_with_ckpt(out_dir):
  x = array_ops.placeholder(dtypes.int32, name='x_hold')
  y = variables.Variable(constant_op.constant([0]), name='y_saved')
  math_ops.add(x, y, name='x_y_sum')

  init_op = variables.initialize_all_variables()
  saver = saver_lib.Saver(write_version=saver_pb2.SaverDef.V1)
  with session.Session() as sess:
    sess.run(init_op)
    sess.run(y.assign(y + 42))
    # Without the checkpoint, the variable won't be set to 42.
    ckpt = '%s/test_graph_tfadd_with_ckpt.ckpt' % out_dir
    saver.save(sess, ckpt)
예제 #29
0
  def _testTypesForSparseFtrl(self, x, y, z, lr, grad, indices, l1=0.0, l2=0.0,
                              lr_power=-0.5):
    self.setUp()
    with self.test_session(use_gpu=False):
      var = variables.Variable(x)
      accum = variables.Variable(y)
      linear = variables.Variable(z)
      variables.initialize_all_variables().run()

      self.assertAllEqual(x, var.eval())
      sparse_apply_ftrl = training_ops.sparse_apply_ftrl(
          var, accum, linear, grad,
          constant_op.constant(indices, self._toType(indices.dtype)),
          lr, l1, l2, lr_power=lr_power)
      out = sparse_apply_ftrl.eval()
      self.assertShapeEqual(out, sparse_apply_ftrl)

      for (i, index) in enumerate(indices):
        self.assertAllClose(
            x[index] - lr * grad[i] * (y[index] + grad[i] * grad[i]) ** (
                lr_power),
            var.eval()[index])
        self.assertAllEqual(y[index] + grad[i] * grad[i], accum.eval()[index])
  def testPiecewiseConstant(self):
    with self.test_session():
      x = variables.Variable(-999)
      assign_100 = x.assign(100)
      assign_105 = x.assign(105)
      assign_110 = x.assign(110)
      assign_120 = x.assign(120)
      assign_999 = x.assign(999)
      pc = learning_rate_decay.piecewise_constant(x, [100, 110, 120],
                                                  [1.0, 0.1, 0.01, 0.001])

      variables.initialize_all_variables().run()
      self.assertAllClose(pc.eval(), 1.0, 1e-6)
      assign_100.op.run()
      self.assertAllClose(pc.eval(), 1.0, 1e-6)
      assign_105.op.run()
      self.assertAllClose(pc.eval(), 0.1, 1e-6)
      assign_110.op.run()
      self.assertAllClose(pc.eval(), 0.1, 1e-6)
      assign_120.op.run()
      self.assertAllClose(pc.eval(), 0.01, 1e-6)
      assign_999.op.run()
      self.assertAllClose(pc.eval(), 0.001, 1e-6)
예제 #31
0
def tfadd_with_ckpt_saver(out_dir):
    x = array_ops.placeholder(dtypes.int32, name='x_hold')
    y = variables.Variable(constant_op.constant([0]), name='y_saved')
    math_ops.add(x, y, name='x_y_sum')

    init_op = variables.initialize_all_variables()
    saver = saver_lib.Saver(name='abcprefix',
                            write_version=saver_pb2.SaverDef.V1)
    with session.Session() as sess:
        sess.run(init_op)
        sess.run(y.assign(y + 42))
        # Without the checkpoint, the variable won't be set to 42.
        ckpt_file = os.path.join(out_dir,
                                 'test_graph_tfadd_with_ckpt_saver.ckpt')
        saver.save(sess, ckpt_file)
        # Without the SaverDef, the restore op won't be named correctly.
        saver_file = os.path.join(out_dir,
                                  'test_graph_tfadd_with_ckpt_saver.saver')
        with open(saver_file, 'wb') as f:
            f.write(saver.as_saver_def().SerializeToString())
예제 #32
0
  def testDenseLayerJitScopeDefinedShape(self):
    """Tests that the dense layer node is properly compiled in jit scope.

    Dense layer with static shape input tensor should be compiled into a single
    XlaCompile/XlaRun op pair by XLA.
    """

    with self.cached_session() as sess:
      x = array_ops.placeholder(shape=[2, 2, 3], dtype=np.float32)
      with jit_scope():
        y = layers.dense(x, 3)

      sess.run(variables.initialize_all_variables())
      run_metadata = config_pb2.RunMetadata()
      sess.run(
          y, {x: np.array([[[1, 2, 3], [4, 5, 6]], [[1, 2, 3], [4, 5, 6]]])},
          run_metadata=run_metadata,
          options=config_pb2.RunOptions(
              trace_level=config_pb2.RunOptions.FULL_TRACE))

    labels = GetRunMetadataLabels(run_metadata)
    self.assertEqual(1, self.countXlaOps(labels))
    def testTraining(self):
        """Tests a gradient descent step for a simple model."""
        with self.test_session() as session:
            with self.test_scope():
                with variable_scope.variable_scope("ascope",
                                                   use_resource=True):
                    w = variable_scope.get_variable(
                        "w",
                        shape=[4, 2],
                        dtype=dtypes.float32,
                        initializer=init_ops.constant_initializer(
                            np.array([[1, 2], [3, 4], [5, 6], [7, 8]],
                                     dtype=np.float32)))
                    b = variable_scope.get_variable(
                        "b",
                        shape=[2],
                        dtype=dtypes.float32,
                        initializer=init_ops.constant_initializer(
                            np.array([2, 3], dtype=np.float32)))

                    x = array_ops.placeholder(dtypes.float32, shape=[1, 4])
                    y = math_ops.matmul(x, w) + b
                    loss = math_ops.reduce_sum(y)
                    optimizer = GradientDescentOptimizer(0.1)
                    train = optimizer.minimize(loss)

            session.run(variables.initialize_all_variables())
            session.run(train, {x: np.array([[7, 3, 5, 9]], dtype=np.float32)})
            vw, vb = session.run([w, b])
            self.assertAllClose(np.array(
                [[0.3, 1.3], [2.7, 3.7], [4.5, 5.5], [6.1, 7.1]],
                dtype=np.float32),
                                vw,
                                rtol=1e-4)
            self.assertAllClose(np.array([1.9, 2.9], dtype=np.float32),
                                vb,
                                rtol=1e-4)
예제 #34
0
def main(args):
    if not os.path.exists(FLAGS.checkpoint):
        tf.logging.fatal(
            'Checkpoint %s does not exist. Have you download it? See tools/download_data.sh',
            FLAGS.checkpoint)
    g = tf.Graph()
    with g.as_default():
        input_image = PreprocessImage(FLAGS.image_path[0])

        with slim.arg_scope(inception.inception_v3_arg_scope()):
            logits, end_points = inception.inception_v3(
                input_image, num_classes=FLAGS.num_classes, is_training=False)

        predictions = end_points['multi_predictions'] = tf.nn.sigmoid(
            logits, name='multi_predictions')
        init_op = control_flow_ops.group(
            variables.initialize_all_variables(),
            variables.initialize_local_variables(),
            data_flow_ops.initialize_all_tables())
        saver = tf_saver.Saver()
        sess = tf.Session()
        saver.restore(sess, FLAGS.checkpoint)

        # Run the evaluation on the image
        predictions_eval = np.squeeze(sess.run(predictions))

    # Print top(n) results
    labelmap, label_dict = LoadLabelMaps(FLAGS.num_classes, FLAGS.labelmap,
                                         FLAGS.dict)

    top_k = predictions_eval.argsort()[-FLAGS.n:][::-1]
    for idx in top_k:
        mid = labelmap[idx]
        display_name = label_dict.get(mid, 'unknown')
        score = predictions_eval[idx]
        print('{}: {} - {} (score = {:.2f})'.format(idx, mid, display_name,
                                                    score))
예제 #35
0
    def testKernelStateTensor(self):
        """Test that transition kernel works with tensor input to `state`."""
        loc = variable_scope.get_variable("loc", initializer=0.)

        def target_log_prob_fn(loc):
            return normal_lib.Normal(loc=0.0, scale=0.1).log_prob(loc)

        new_state, _ = mh.kernel(target_log_prob_fn=target_log_prob_fn,
                                 proposal_fn=mh.proposal_normal(scale=0.05),
                                 current_state=loc,
                                 seed=231251)
        loc_update = loc.assign(new_state)

        init = variables.initialize_all_variables()
        with self.test_session() as sess:
            sess.run(init)
            loc_samples = []
            for _ in range(2500):
                loc_sample = sess.run(loc_update)
                loc_samples.append(loc_sample)
        loc_samples = loc_samples[500:]  # drop samples for burn-in

        self.assertAllClose(np.mean(loc_samples), 0.0, rtol=1e-5, atol=1e-1)
        self.assertAllClose(np.std(loc_samples), 0.1, rtol=1e-5, atol=1e-1)
예제 #36
0
def evaluate(graph,
             output_dir,
             checkpoint_path,
             eval_dict,
             update_op=None,
             global_step_tensor=None,
             supervisor_master='',
             log_every_steps=10,
             feed_fn=None,
             max_steps=None):
  """Evaluate a model loaded from a checkpoint.

  Given `graph`, a directory to write summaries to (`output_dir`), a checkpoint
  to restore variables from, and a `dict` of `Tensor`s to evaluate, run an eval
  loop for `max_steps` steps, or until an exception (generally, an
  end-of-input signal from a reader operation) is raised from running
  `eval_dict`.

  In each step of evaluation, all tensors in the `eval_dict` are evaluated, and
  every `log_every_steps` steps, they are logged. At the very end of evaluation,
  a summary is evaluated (finding the summary ops using `Supervisor`'s logic)
  and written to `output_dir`.

  Args:
    graph: A `Graph` to train. It is expected that this graph is not in use
      elsewhere.
    output_dir: A string containing the directory to write a summary to.
    checkpoint_path: A string containing the path to a checkpoint to restore.
      Can be `None` if the graph doesn't require loading any variables.
    eval_dict: A `dict` mapping string names to tensors to evaluate. It is
      evaluated in every logging step. The result of the final evaluation is
      returned. If `update_op` is None, then it's evaluated in every step. If
      `max_steps` is `None`, this should depend on a reader that will raise an
      end-of-inupt exception when the inputs are exhausted.
    update_op: A `Tensor` which is run in every step.
    global_step_tensor: A `Variable` containing the global step. If `None`,
      one is extracted from the graph using the same logic as in `Supervisor`.
      Used to place eval summaries on training curves.
    supervisor_master: The master string to use when preparing the session.
    log_every_steps: Integer. Output logs every `log_every_steps` evaluation
      steps. The logs contain the `eval_dict` and timing information.
    feed_fn: A function that is called every iteration to produce a `feed_dict`
      passed to `session.run` calls. Optional.
    max_steps: Integer. Evaluate `eval_dict` this many times.

  Returns:
    A tuple `(eval_results, global_step)`:
    eval_results: A `dict` mapping `string` to numeric values (`int`, `float`)
      that are the result of running eval_dict in the last step. `None` if no
      eval steps were run.
    global_step: The global step this evaluation corresponds to.

  Raises:
    ValueError: if `output_dir` is empty.
  """
  if not output_dir:
    raise ValueError('Output directory should be non-empty %s.' % output_dir)
  with graph.as_default():
    global_step_tensor = contrib_variables.assert_or_get_global_step(
        graph, global_step_tensor)

    # Create or get summary op, global_step and saver.
    saver = _get_saver()
    local_init_op = _get_local_init_op()
    ready_op = _get_ready_op()

    session_manager = session_manager_lib.SessionManager(
        local_init_op=local_init_op,
        ready_op=ready_op)
    session, initialized = session_manager.recover_session(
        master=supervisor_master,
        saver=saver,
        checkpoint_dir=checkpoint_path)

    # Start queue runners.
    coord = coordinator.Coordinator()
    threads = queue_runner.start_queue_runners(session, coord)

  with session:
    if not initialized:
      logging.warning('Failed to initialize from %s.', checkpoint_path)
      # TODO(ipolosukhin): This should be failing, but old code relies on that.
      session.run(variables.initialize_all_variables())
      if checkpoint_path:
        _restore_from_checkpoint(session, graph, checkpoint_path, saver)

    current_global_step = session.run(global_step_tensor)
    eval_results = None
    # TODO(amodei): Fix this to run through the eval set exactly once.
    step = 0
    eval_step = None
    feed_dict = None
    logging.info('Eval steps [%d,%s) for training step %d.', step,
                 'inf' if max_steps is None
                 else str(max_steps), current_global_step)
    try:
      try:
        while (max_steps is None) or (step < max_steps):
          step += 1
          start_time = time.time()
          feed_dict = feed_fn() if feed_fn is not None else None
          if update_op is not None:
            session.run(update_op, feed_dict=feed_dict)
          else:
            eval_results = session.run(eval_dict, feed_dict=feed_dict)
            eval_step = step

          # TODO(wicke): We should assert that the global step hasn't changed.
          if step % log_every_steps == 0:
            if eval_step is None or step != eval_step:
              eval_results = session.run(eval_dict, feed_dict=feed_dict)
              eval_step = step
            duration = time.time() - start_time
            logging.info('Results after %d steps (%.3f sec/batch): %s.',
                         step, float(duration),
                         _eval_results_to_str(eval_results))
      finally:
        if eval_results is None or step != eval_step:
          eval_results = session.run(eval_dict, feed_dict=feed_dict)
          eval_step = step
        # Stop session first, before queue runners.
        session.close()

        # Stop queue runners.
        try:
          coord.request_stop()
          coord.join(threads, stop_grace_period_secs=120)
        except (RuntimeError, errors.CancelledError) as e:
          logging.warning('Coordinator didn\'t stop cleanly: %s', e)

    # catch OutOfRangeError which is thrown when queue is out of data (and for
    # other reasons as well).
    except errors.OutOfRangeError as e:
      if max_steps is None:
        logging.info('Input queue is exhausted.')
      else:
        logging.warn('Input queue is exhausted: %s.', e)
    # catch StopIteration which is thrown is DataReader is out of data.
    except StopIteration as e:
      if max_steps is None:
        logging.info('Input iterator is exhausted.')
      else:
        logging.warn('Input iterator is exhausted: %s.', e)

  # Save summaries for this evaluation.
  _write_summary_results(output_dir, eval_results, current_global_step)

  return eval_results, current_global_step
예제 #37
0
def evaluate_once(master,
                  checkpoint_path,
                  logdir,
                  num_evals=1,
                  eval_op=None,
                  eval_op_feed_dict=None,
                  final_op=None,
                  final_op_feed_dict=None,
                  summary_op=_USE_DEFAULT,
                  summary_op_feed_dict=None,
                  variables_to_restore=None,
                  session_config=None):
  """Evaluates the model at the given checkpoint path.

  Args:
    master: The BNS address of the TensorFlow master.
    checkpoint_path: The path to a checkpoint to use for evaluation.
    logdir: The directory where the TensorFlow summaries are written to.
    num_evals: The number of times to run `eval_op`.
    eval_op: A operation run `num_evals` times.
    eval_op_feed_dict: The feed dictionary to use when executing the `eval_op`.
    final_op: An operation to execute after all of the `eval_op` executions. The
      value of `final_op` is returned.
    final_op_feed_dict: A feed dictionary to use when executing `final_op`.
    summary_op: The summary_op to evaluate after running TF-Slims metric ops. By
      default the summary_op is set to tf.merge_all_summaries().
    summary_op_feed_dict: An optional feed dictionary to use when running the
      `summary_op`.
    variables_to_restore: A list of TensorFlow variables to restore during
      evaluation. If the argument is left as `None` then
      slim.variables.GetVariablesToRestore() is used.
    session_config: An instance of `tf.ConfigProto` that will be used to
      configure the `Session`. If left as `None`, the default will be used.

  Returns:
    The value of `final_op` or `None` if `final_op` is `None`.
  """
  if summary_op == _USE_DEFAULT:
    summary_op = logging_ops.merge_all_summaries()

  global_step = variables.get_or_create_global_step()

  init_op = control_flow_ops.group(tf_variables.initialize_all_variables(),
                                   tf_variables.initialize_local_variables(),
                                   data_flow_ops.initialize_all_tables())

  saver = tf_saver.Saver(variables_to_restore or
                         variables.get_variables_to_restore())

  summary_writer = summary_io.SummaryWriter(logdir)

  sv = supervisor.Supervisor(graph=ops.get_default_graph(),
                             logdir=logdir,
                             init_op=init_op,
                             summary_op=None,
                             summary_writer=None,
                             global_step=None,
                             saver=None)

  logging.info('Starting evaluation at ' + time.strftime('%Y-%m-%d-%H:%M:%S',
                                                         time.gmtime()))
  with sv.managed_session(
      master, start_standard_services=False, config=session_config) as sess:
    saver.restore(sess, checkpoint_path)
    sv.start_queue_runners(sess)
    final_op_value = evaluation(sess,
                                num_evals=num_evals,
                                eval_op=eval_op,
                                eval_op_feed_dict=eval_op_feed_dict,
                                final_op=final_op,
                                final_op_feed_dict=final_op_feed_dict,
                                summary_op=summary_op,
                                summary_op_feed_dict=summary_op_feed_dict,
                                summary_writer=summary_writer,
                                global_step=global_step)

  logging.info('Finished evaluation at ' + time.strftime('%Y-%m-%d-%H:%M:%S',
                                                         time.gmtime()))

  return final_op_value
예제 #38
0
    def testRestore(self):
        # Calling self.test_session() without a graph specified results in
        # TensorFlowTestCase caching the session and returning the same one
        # every time. In this test, we need to create two different sessions
        # which is why we also create a graph and pass it to self.test_session()
        # to ensure no caching occurs under the hood.
        save_path = os.path.join(self.get_temp_dir(), "restore-test")
        with ops.Graph().as_default() as graph:
            with self.test_session(graph) as sess:
                tree_ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig(
                )

                tree = tree_ensemble_config.trees.add()
                tree_ensemble_config.tree_metadata.add().is_finalized = True
                tree_ensemble_config.tree_weights.append(1.0)
                _append_to_leaf(tree.nodes.add().leaf, 0, -0.1)

                tree_ensemble_config2 = tree_config_pb2.DecisionTreeEnsembleConfig(
                )
                tree2 = tree_ensemble_config2.trees.add()
                tree_ensemble_config.tree_weights.append(1.0)
                _append_to_leaf(tree2.nodes.add().leaf, 0, -1.0)

                tree_ensemble_config3 = tree_config_pb2.DecisionTreeEnsembleConfig(
                )
                tree3 = tree_ensemble_config3.trees.add()
                tree_ensemble_config.tree_weights.append(1.0)
                _append_to_leaf(tree3.nodes.add().leaf, 0, -10.0)

                # Prepare learner config.
                learner_config = learner_pb2.LearnerConfig()
                learner_config.num_classes = 2

                tree_ensemble_handle = model_ops.tree_ensemble_variable(
                    stamp_token=3,
                    tree_ensemble_config=tree_ensemble_config.
                    SerializeToString(),
                    name="restore_tree")
                feature_usage_counts = variables.Variable(
                    initial_value=array_ops.zeros([1], dtypes.int64),
                    name="feature_usage_counts",
                    trainable=False)
                feature_gains = variables.Variable(
                    initial_value=array_ops.zeros([1], dtypes.float32),
                    name="feature_gains",
                    trainable=False)

                resources.initialize_resources(
                    resources.shared_resources()).run()
                variables.initialize_all_variables().run()
                my_saver = saver.Saver()

                with ops.control_dependencies([
                        ensemble_optimizer_ops.add_trees_to_ensemble(
                            tree_ensemble_handle,
                            tree_ensemble_config2.SerializeToString(),
                            feature_usage_counts, [0],
                            feature_gains, [0], [[]],
                            learning_rate=1)
                ]):
                    result, _, _ = prediction_ops.gradient_trees_prediction(
                        tree_ensemble_handle,
                        self._seed, [self._dense_float_tensor], [
                            self._sparse_float_indices1,
                            self._sparse_float_indices2
                        ], [
                            self._sparse_float_values1,
                            self._sparse_float_values2
                        ],
                        [self._sparse_float_shape1, self._sparse_float_shape2],
                        [self._sparse_int_indices1],
                        [self._sparse_int_values1], [self._sparse_int_shape1],
                        learner_config=learner_config.SerializeToString(),
                        apply_dropout=False,
                        apply_averaging=False,
                        center_bias=False,
                        reduce_dim=True)
                self.assertAllClose([[-1.1], [-1.1]], result.eval())
                # Save before adding other trees.
                val = my_saver.save(sess, save_path)
                self.assertEqual(save_path, val)

                # Add more trees after saving.
                with ops.control_dependencies([
                        ensemble_optimizer_ops.add_trees_to_ensemble(
                            tree_ensemble_handle,
                            tree_ensemble_config3.SerializeToString(),
                            feature_usage_counts, [0],
                            feature_gains, [0], [[]],
                            learning_rate=1)
                ]):
                    result, _, _ = prediction_ops.gradient_trees_prediction(
                        tree_ensemble_handle,
                        self._seed, [self._dense_float_tensor], [
                            self._sparse_float_indices1,
                            self._sparse_float_indices2
                        ], [
                            self._sparse_float_values1,
                            self._sparse_float_values2
                        ],
                        [self._sparse_float_shape1, self._sparse_float_shape2],
                        [self._sparse_int_indices1],
                        [self._sparse_int_values1], [self._sparse_int_shape1],
                        learner_config=learner_config.SerializeToString(),
                        apply_dropout=False,
                        apply_averaging=False,
                        center_bias=False,
                        reduce_dim=True)
                self.assertAllClose(result.eval(), [[-11.1], [-11.1]])

        # Start a second session.  In that session the parameter nodes
        # have not been initialized either.
        with ops.Graph().as_default() as graph:
            with self.test_session(graph) as sess:
                tree_ensemble_handle = model_ops.tree_ensemble_variable(
                    stamp_token=0,
                    tree_ensemble_config="",
                    name="restore_tree")
                my_saver = saver.Saver()
                my_saver.restore(sess, save_path)
                result, _, _ = prediction_ops.gradient_trees_prediction(
                    tree_ensemble_handle,
                    self._seed, [self._dense_float_tensor],
                    [self._sparse_float_indices1, self._sparse_float_indices2],
                    [self._sparse_float_values1, self._sparse_float_values2],
                    [self._sparse_float_shape1, self._sparse_float_shape2],
                    [self._sparse_int_indices1], [self._sparse_int_values1],
                    [self._sparse_int_shape1],
                    learner_config=learner_config.SerializeToString(),
                    apply_dropout=False,
                    apply_averaging=False,
                    center_bias=False,
                    reduce_dim=True)
                # Make sure we only have the first and second tree.
                # The third tree was added after the save.
                self.assertAllClose(result.eval(), [[-1.1], [-1.1]])
예제 #39
0
    def testDocstringExample(self):
        """Tests the simplified docstring example with multiple chains."""

        n = 2  # dimension of the problem

        # Generate 300 initial values randomly. Each of these would be an
        # independent starting point for a Markov chain.
        state = variable_scope.get_variable(
            "state",
            initializer=random_ops.random_normal([300, n],
                                                 mean=3.0,
                                                 dtype=dtypes.float32,
                                                 seed=42))

        # Computes the log(p(x)) for the unit normal density and ignores the
        # normalization constant.
        def log_density(x):
            return -math_ops.reduce_sum(x * x, reduction_indices=-1) / 2.0

        # Initial log-density value
        state_log_density = variable_scope.get_variable(
            "state_log_density",
            initializer=log_density(state.initialized_value()))

        # A variable to store the log_acceptance_ratio:
        log_acceptance_ratio = variable_scope.get_variable(
            "log_acceptance_ratio",
            initializer=array_ops.zeros([300], dtype=dtypes.float32))

        # Generates random proposals by moving each coordinate uniformly and
        # independently in a box of size 2 centered around the current value.
        # Returns the new point and also the log of the Hastings ratio (the
        # ratio of the probability of going from the proposal to origin and the
        # probability of the reverse transition). When this ratio is 1, the value
        # may be omitted and replaced by None.
        def random_proposal(x):
            return (x + random_ops.random_uniform(array_ops.shape(x),
                                                  minval=-1,
                                                  maxval=1,
                                                  dtype=x.dtype,
                                                  seed=12)), None

        #  Create the op to propagate the chain for 100 steps.
        stepper = mh.evolve(state,
                            state_log_density,
                            log_acceptance_ratio,
                            log_density,
                            random_proposal,
                            n_steps=100,
                            seed=123)
        init = variables.initialize_all_variables()
        with self.test_session() as sess:
            sess.run(init)
            # Run the chains for a total of 1000 steps.
            for _ in range(10):
                sess.run(stepper)
            samples = sess.run(state)
            covariance = np.eye(n)
            # Verify that the estimated mean and covariance are close to the true
            # values.
            self.assertAlmostEqual(np.max(
                np.abs(np.mean(samples, 0) - np.zeros(n))),
                                   0,
                                   delta=0.1)
            self.assertAlmostEqual(np.max(
                np.abs(
                    np.reshape(np.cov(samples, rowvar=False), [n**2]) -
                    np.reshape(covariance, [n**2]))),
                                   0,
                                   delta=0.2)
예제 #40
0
 def default_init_op():
     return control_flow_ops.group(
         variables.initialize_all_variables(),
         resources.initialize_resources(
             resources.shared_resources()))
예제 #41
0
def evaluation_loop(master,
                    checkpoint_dir,
                    logdir,
                    num_evals=1,
                    eval_op=None,
                    eval_op_feed_dict=None,
                    final_op=None,
                    final_op_feed_dict=None,
                    summary_op=_USE_DEFAULT,
                    summary_op_feed_dict=None,
                    variables_to_restore=None,
                    eval_interval_secs=60,
                    max_number_of_evaluations=None):
  """Runs TF-Slim's Evaluation Loop.

  Args:
    master: The BNS address of the TensorFlow master.
    checkpoint_dir: The directory where checkpoints are stored.
    logdir: The directory where the TensorFlow summaries are written to.
    num_evals: The number of times to run `eval_op`.
    eval_op: A operation run `num_evals` times.
    eval_op_feed_dict: The feed dictionary to use when executing the `eval_op`.
    final_op: An operation to execute after all of the `eval_op` executions. The
      value of `final_op` is returned.
    final_op_feed_dict: A feed dictionary to use when executing `final_op`.
    summary_op: The summary_op to evaluate after running TF-Slims metric ops. By
      default the summary_op is set to tf.merge_all_summaries().
    summary_op_feed_dict: An optional feed dictionary to use when running the
      `summary_op`.
    variables_to_restore: A list of TensorFlow variables to restore during
      evaluation. If the argument is left as `None` then
      slim.variables.GetVariablesToRestore() is used.
    eval_interval_secs: The minimum number of seconds between evaluations.
    max_number_of_evaluations: the max number of iterations of the evaluation.
      If the value is left as 'None', the evaluation continues indefinitely.
  """
  if summary_op == _USE_DEFAULT:
    summary_op = logging_ops.merge_all_summaries()

  global_step = variables.get_or_create_global_step()

  init_op = control_flow_ops.group(tf_variables.initialize_all_variables(),
                                   tf_variables.initialize_local_variables(),
                                   data_flow_ops.initialize_all_tables())

  saver = tf_saver.Saver(variables_to_restore or
                         variables.get_variables_to_restore())

  summary_writer = summary_io.SummaryWriter(logdir)

  sv = supervisor.Supervisor(graph=ops.get_default_graph(),
                             logdir=logdir,
                             init_op=init_op,
                             summary_op=None,
                             summary_writer=None,
                             global_step=None,
                             saver=saver)

  last_checkpoint = None
  number_of_evaluations = 0
  while True:
    last_checkpoint = wait_for_new_checkpoint(checkpoint_dir, last_checkpoint)
    start = time.time()
    logging.info('Starting evaluation at ' + time.strftime('%Y-%m-%d-%H:%M:%S',
                                                           time.gmtime()))

    with sv.managed_session(master, start_standard_services=False) as sess:
      sv.saver.restore(sess, last_checkpoint)
      sv.start_queue_runners(sess)
      evaluation(sess,
                 num_evals=num_evals,
                 eval_op=eval_op,
                 eval_op_feed_dict=eval_op_feed_dict,
                 final_op=final_op,
                 final_op_feed_dict=final_op_feed_dict,
                 summary_op=summary_op,
                 summary_op_feed_dict=summary_op_feed_dict,
                 summary_writer=summary_writer,
                 global_step=global_step)

    logging.info('Finished evaluation at ' + time.strftime('%Y-%m-%d-%H:%M:%S',
                                                           time.gmtime()))
    number_of_evaluations += 1
    if (max_number_of_evaluations and
        number_of_evaluations >= max_number_of_evaluations):
      logging.info('Reached max_number_of_evaluations=%s. Exit',
                   max_number_of_evaluations)
      break

    time_to_next_eval = start + eval_interval_secs - time.time()
    if time_to_next_eval > 0:
      time.sleep(time_to_next_eval)
예제 #42
0
def train(train_op,
          logdir,
          train_step_fn=train_step,
          train_step_kwargs=_USE_DEFAULT,
          log_every_n_steps=1,
          graph=None,
          master='',
          is_chief=True,
          global_step=None,
          number_of_steps=None,
          init_op=_USE_DEFAULT,
          init_feed_dict=None,
          local_init_op=_USE_DEFAULT,
          init_fn=None,
          ready_op=_USE_DEFAULT,
          summary_op=_USE_DEFAULT,
          save_summaries_secs=600,
          summary_writer=_USE_DEFAULT,
          startup_delay_steps=0,
          saver=None,
          save_interval_secs=600,
          sync_optimizer=None,
          session_config=None,
          trace_every_n_steps=None):
    """Runs a training loop using a TensorFlow supervisor.

  When the sync_optimizer is supplied, gradient updates are applied
  synchronously. Otherwise, gradient updates are applied asynchronous.

  Args:
    train_op: A `Tensor` that, when executed, will apply the gradients and
      return the loss value.
    logdir: The directory where training logs are written to. If None, model
      checkpoints and summaries will not be written.
    train_step_fn: The function to call in order to execute a single gradient
      step. The function must have take exactly four arguments: the current
      session, the `train_op` `Tensor`, a global step `Tensor` and a dictionary.
    train_step_kwargs: A dictionary which is passed to the `train_step_fn`. By
      default, two `Boolean`, scalar ops called "should_stop" and "should_log"
      are provided.
    log_every_n_steps: The frequency, in terms of global steps, that the loss
      and global step and logged.
    graph: The graph to pass to the supervisor. If no graph is supplied the
      default graph is used.
    master: The BNS name of the tensorflow master.
    is_chief: Specifies whether or not the training is being run by the primary
      replica during replica training.
    global_step: The `Tensor` representing the global step. If left as `None`,
      then slim.variables.get_or_create_global_step() is used.
    number_of_steps: The max number of gradient steps to take during training.
      If the value is left as None, training proceeds indefinitely.
    init_op: The initialization operation. If left to its default value, then
      the session is initialized by calling `tf.initialize_all_variables()`.
    init_feed_dict: A feed dictionary to use when executing the `init_op`.
    local_init_op: The local initialization operation. If left to its default
      value, then the session is initialized by calling
      `tf.initialize_local_variables()` and `tf.initialize_all_tables()`.
    init_fn: An optional callable to be executed after `init_op` is called. The
      callable must accept one argument, the session being initialized.
    ready_op: Operation to check if the model is ready to use. If left to its
      default value, then the session checks for readiness by calling
      `tf.report_uninitialized_variables()`.
    summary_op: The summary operation.
    save_summaries_secs: How often, in seconds, to save summaries.
    summary_writer: `SummaryWriter` to use.  Can be `None`
      to indicate that no summaries should be written. If unset, we
      create a SummaryWriter.
    startup_delay_steps: The number of steps to wait for before beginning. Note
      that this must be 0 if a sync_optimizer is supplied.
    saver: Saver to save checkpoints. If None, a default one will be created
      and used.
    save_interval_secs: How often, in seconds, to save the model to `logdir`.
    sync_optimizer: an instance of tf.train.SyncReplicasOptimizer. If the
      argument is supplied, gradient updates will be synchronous. If left as
      `None`, gradient updates will be asynchronous.
    session_config: An instance of `tf.ConfigProto` that will be used to
      configure the `Session`. If left as `None`, the default will be used.
    trace_every_n_steps: produce and save a `Timeline` in Chrome trace format
      and add it to the summaries every `trace_every_n_steps`. If None, no trace
      information will be produced or saved.

  Returns:
    the value of the loss function after training.

  Raises:
    ValueError: if `train_op` is empty or if `startup_delay_steps` is
      non-zero when `sync_optimizer` is supplied, if `number_of_steps` is
      negative, or if `trace_every_n_steps` is not `None` and no `logdir` is
      provided.
  """
    if train_op is None:
        raise ValueError('train_op cannot be None.')

    if logdir is None:
        if summary_op != _USE_DEFAULT:
            raise ValueError('Cannot provide summary_op because logdir=None')
        if saver is not None:
            raise ValueError('Cannot provide saver because logdir=None')
        if trace_every_n_steps is not None:
            raise ValueError('Cannot provide trace_every_n_steps because '
                             'logdir=None')

    if sync_optimizer is not None and startup_delay_steps > 0:
        raise ValueError(
            'startup_delay_steps must be zero when sync_optimizer is supplied.'
        )

    if number_of_steps is not None and number_of_steps <= 0:
        raise ValueError(
            '`number_of_steps` must be either None or a positive number.')

    graph = graph or ops.get_default_graph()
    with graph.as_default():
        if global_step is None:
            global_step = variables.get_or_create_global_step()
        saver = saver or tf_saver.Saver()

        with ops.name_scope('init_ops'):
            if init_op == _USE_DEFAULT:
                init_op = tf_variables.initialize_all_variables()

            if ready_op == _USE_DEFAULT:
                ready_op = tf_variables.report_uninitialized_variables()

            if local_init_op == _USE_DEFAULT:
                local_init_op = control_flow_ops.group(
                    tf_variables.initialize_local_variables(),
                    data_flow_ops.initialize_all_tables())

        if summary_op == _USE_DEFAULT:
            summary_op = logging_ops.merge_all_summaries()

        if summary_writer == _USE_DEFAULT:
            summary_writer = supervisor.Supervisor.USE_DEFAULT

        cleanup_op = None

        if is_chief and sync_optimizer is not None:
            if not isinstance(
                    sync_optimizer,
                (sync_replicas_optimizer.SyncReplicasOptimizer,
                 sync_replicas_optimizer.SyncReplicasOptimizerV2)):
                raise ValueError(
                    '`sync_optimizer` must be a tf.train.SyncReplicasOptimizer or '
                    'tf.train.SyncReplicasOptimizerV2.')

            # Need to create these BEFORE the supervisor finalizes the graph:
            with ops.control_dependencies([init_op]):
                init_tokens_op = sync_optimizer.get_init_tokens_op()
            init_op = init_tokens_op
            chief_queue_runner = sync_optimizer.get_chief_queue_runner()
            if isinstance(sync_optimizer,
                          sync_replicas_optimizer.SyncReplicasOptimizer):
                cleanup_op = sync_optimizer.get_clean_up_op()

        if train_step_kwargs == _USE_DEFAULT:
            with ops.name_scope('train_step'):
                train_step_kwargs = {}

                if number_of_steps:
                    should_stop_op = math_ops.greater_equal(
                        global_step, number_of_steps)
                else:
                    should_stop_op = constant_op.constant(False)
                train_step_kwargs['should_stop'] = should_stop_op
                train_step_kwargs['should_log'] = math_ops.equal(
                    math_ops.mod(global_step, log_every_n_steps), 0)
                if is_chief and trace_every_n_steps is not None:
                    train_step_kwargs['should_trace'] = math_ops.equal(
                        math_ops.mod(global_step, trace_every_n_steps), 0)
                    train_step_kwargs['logdir'] = logdir

    sv = supervisor.Supervisor(graph=graph,
                               is_chief=is_chief,
                               logdir=logdir,
                               init_op=init_op,
                               init_feed_dict=init_feed_dict,
                               local_init_op=local_init_op,
                               ready_op=ready_op,
                               summary_op=summary_op,
                               summary_writer=summary_writer,
                               global_step=global_step,
                               saver=saver,
                               save_summaries_secs=save_summaries_secs,
                               save_model_secs=save_interval_secs,
                               init_fn=init_fn)

    if summary_writer is not None:
        train_step_kwargs['summary_writer'] = sv.summary_writer

    should_retry = True
    while should_retry:
        try:
            should_retry = False
            with sv.managed_session(master,
                                    start_standard_services=False,
                                    config=session_config) as sess:
                logging.info('Starting Session.')
                if is_chief:
                    if logdir:
                        sv.start_standard_services(sess)
                elif startup_delay_steps > 0:
                    _wait_for_step(
                        sess, global_step,
                        min(startup_delay_steps, number_of_steps
                            or sys.maxint))
                sv.start_queue_runners(sess)
                logging.info('Starting Queues.')
                if is_chief and sync_optimizer is not None:
                    sv.start_queue_runners(sess, [chief_queue_runner])
                try:
                    while not sv.should_stop():
                        total_loss, should_stop = train_step_fn(
                            sess, train_op, global_step, train_step_kwargs)
                        if should_stop:
                            logging.info('Stopping Training.')
                            break
                    if logdir and sv.is_chief:
                        logging.info(
                            'Finished training! Saving model to disk.')
                        sv.saver.save(sess,
                                      sv.save_path,
                                      global_step=sv.global_step)
                except:
                    if sv.is_chief and cleanup_op is not None:
                        logging.info('About to execute sync_clean_up_op!')
                        sess.run(cleanup_op)
                    raise

        except errors.AbortedError:
            # Always re-run on AbortedError as it indicates a restart of one of the
            # distributed tensorflow servers.
            logging.info('Retrying training!')
            should_retry = True

    return total_loss
예제 #43
0
    def testKernelStateList(self):
        """Test that transition kernel works with list input to `state`."""
        num_chains = 2
        loc_one = variable_scope.get_variable(
            "loc_one", [num_chains], initializer=init_ops.zeros_initializer())
        loc_two = variable_scope.get_variable(
            "loc_two", [num_chains], initializer=init_ops.zeros_initializer())

        def target_log_prob_fn(loc_one, loc_two):
            loc = array_ops.stack([loc_one, loc_two])
            log_prob = mvn_tril_lib.MultivariateNormalTriL(
                loc=constant_op.constant([0., 0.]),
                scale_tril=constant_op.constant([[0.1, 0.1],
                                                 [0.0, 0.1]])).log_prob(loc)
            return math_ops.reduce_sum(log_prob, 0)

        def proposal_fn(loc_one, loc_two):
            loc_one_proposal = mh.proposal_normal(scale=0.05)
            loc_two_proposal = mh.proposal_normal(scale=0.05)
            loc_one_sample, _ = loc_one_proposal(loc_one)
            loc_two_sample, _ = loc_two_proposal(loc_two)
            return [loc_one_sample, loc_two_sample], None

        new_state, _ = mh.kernel(target_log_prob_fn=target_log_prob_fn,
                                 proposal_fn=proposal_fn,
                                 current_state=[loc_one, loc_two],
                                 seed=12415)
        loc_one_update = loc_one.assign(new_state[0])
        loc_two_update = loc_two.assign(new_state[1])

        init = variables.initialize_all_variables()
        with self.test_session() as sess:
            sess.run(init)
            loc_one_samples = []
            loc_two_samples = []
            for _ in range(10000):
                loc_one_sample, loc_two_sample = sess.run(
                    [loc_one_update, loc_two_update])
                loc_one_samples.append(loc_one_sample)
                loc_two_samples.append(loc_two_sample)

        loc_one_samples = np.array(loc_one_samples)
        loc_two_samples = np.array(loc_two_samples)
        loc_one_samples = loc_one_samples[1000:]  # drop samples for burn-in
        loc_two_samples = loc_two_samples[1000:]  # drop samples for burn-in

        self.assertAllClose(np.mean(loc_one_samples, 0),
                            np.array([0.] * num_chains),
                            rtol=1e-5,
                            atol=1e-1)
        self.assertAllClose(np.mean(loc_two_samples, 0),
                            np.array([0.] * num_chains),
                            rtol=1e-5,
                            atol=1e-1)
        self.assertAllClose(np.std(loc_one_samples, 0),
                            np.array([0.1] * num_chains),
                            rtol=1e-5,
                            atol=1e-1)
        self.assertAllClose(np.std(loc_two_samples, 0),
                            np.array([0.1] * num_chains),
                            rtol=1e-5,
                            atol=1e-1)
예제 #44
0
def evaluate(graph,
             output_dir,
             checkpoint_path,
             eval_dict,
             update_op=None,
             global_step_tensor=None,
             supervisor_master='',
             log_every_steps=10,
             feed_fn=None,
             max_steps=None):
    """Evaluate a model loaded from a checkpoint.

  Given `graph`, a directory to write summaries to (`output_dir`), a checkpoint
  to restore variables from, and a `dict` of `Tensor`s to evaluate, run an eval
  loop for `max_steps` steps.

  In each step of evaluation, all tensors in the `eval_dict` are evaluated, and
  every `log_every_steps` steps, they are logged. At the very end of evaluation,
  a summary is evaluated (finding the summary ops using `Supervisor`'s logic)
  and written to `output_dir`.

  Args:
    graph: A `Graph` to train. It is expected that this graph is not in use
      elsewhere.
    output_dir: A string containing the directory to write a summary to.
    checkpoint_path: A string containing the path to a checkpoint to restore.
      Can be `None` if the graph doesn't require loading any variables.
    eval_dict: A `dict` mapping string names to tensors to evaluate. It is
      evaluated in every logging step. The result of the final evaluation is
      returned. If update_op is None, then it's evaluated in every step.
    update_op: A `Tensor` which is run in every step.
    global_step_tensor: A `Variable` containing the global step. If `None`,
      one is extracted from the graph using the same logic as in `Supervisor`.
      Used to place eval summaries on training curves.
    supervisor_master: The master string to use when preparing the session.
    log_every_steps: Integer. Output logs every `log_every_steps` evaluation
      steps. The logs contain the `eval_dict` and timing information.
    feed_fn: A function that is called every iteration to produce a `feed_dict`
      passed to `session.run` calls. Optional.
    max_steps: Integer. Evaluate `eval_dict` this many times.

  Returns:
    A tuple `(eval_results, global_step)`:
    eval_results: A `dict` mapping `string` to numeric values (`int`, `float`)
      that are the result of running eval_dict in the last step. `None` if no
      eval steps were run.
    global_step: The global step this evaluation corresponds to.
  """
    global_step_tensor = contrib_variables.assert_or_get_global_step(
        graph, global_step_tensor)

    # Add scalar summaries for every tensor in evaluation dict if there is not
    # one existing already or it's a string.
    existing_tags = [
        tensor_util.constant_value(summary.op.inputs[0])
        for summary in ops.get_collection(ops.GraphKeys.SUMMARIES)
    ]
    for key, value in eval_dict.items():
        if key in existing_tags:
            continue
        if isinstance(value, ops.Tensor):
            summaries.summarize_tensor(value, tag=key)

    # Create or get summary op, global_step and saver.
    summary_op = logging_ops.get_summary_op()
    saver = _get_saver()
    local_init_op = _get_local_init_op()
    ready_op = _get_ready_op()

    session_manager = session_manager_lib.SessionManager(
        local_init_op=local_init_op, ready_op=ready_op)
    session, initialized = session_manager.recover_session(
        master=supervisor_master, saver=saver, checkpoint_dir=checkpoint_path)

    # Start queue runners.
    coord = coordinator.Coordinator()
    threads = _start_queue_runners(session, coord)

    with session:
        if not initialized:
            logging.warning('Failed to initialize from %s.', checkpoint_path)
            # TODO(ipolosukhin): This should be failing, but old code relies on that.
            session.run(variables.initialize_all_variables())
            if checkpoint_path:
                _restore_from_checkpoint(session, graph, checkpoint_path,
                                         saver)

        current_global_step = session.run(global_step_tensor)
        eval_results = None
        # TODO(amodei): Fix this to run through the eval set exactly once.
        step = 0
        logging.info('Eval steps [%d,%s) for training step %d.', step,
                     'inf' if max_steps is None else str(max_steps),
                     current_global_step)
        try:
            try:
                while (max_steps is None) or (step < max_steps):
                    start_time = time.time()
                    feed_dict = feed_fn() if feed_fn is not None else None
                    eval_results = None
                    if update_op is not None:
                        session.run(update_op, feed_dict=feed_dict)
                    else:
                        eval_results = _run_dict(session,
                                                 eval_dict,
                                                 feed_dict=feed_dict)

                    # TODO(wicke): We should assert that the global step hasn't changed.
                    step += 1
                    if step % log_every_steps == 0:
                        if eval_results is None:
                            eval_results = _run_dict(session,
                                                     eval_dict,
                                                     feed_dict=feed_dict)
                        duration = time.time() - start_time
                        logging.info(
                            'Results after %d steps (%.3f sec/batch): %s.',
                            step, float(duration),
                            ', '.join('%s = %s' % (k, v)
                                      for k, v in eval_results.items()))
            finally:
                if eval_results is None:
                    eval_results = _run_dict(session,
                                             eval_dict,
                                             feed_dict=feed_dict)
                # Stop queue runners.
                coord.request_stop()
                coord.join(threads, stop_grace_period_secs=120)

                # Make our own summary writer and write a summary to the eval dir.
                # Only is feed_fn is not provided.
                # TODO(ipolosukhin): Convert evaluation to use streaming_metrics,
                # then we can save for non feed_fn as well.
                if summary_op is not None and feed_fn is None:
                    summary_writer = None
                    try:
                        summary_writer = get_summary_writer(output_dir)
                        summary_str = session.run(summary_op)
                        if summary_str:
                            summary_writer.add_summary(summary_str,
                                                       current_global_step)
                    finally:
                        if summary_writer:
                            summary_writer.close()
        # catch OutOfRangeError which is thrown when queue is out of data (and for
        # other reasons as well).
        except errors.OutOfRangeError as e:
            if max_steps is None:
                logging.info('Input queue is exhausted.')
            else:
                logging.warn('Input queue is exhausted: %s.', e)
        # catch StopIteration which is thrown is DataReader is out of data.
        except StopIteration as e:
            if max_steps is None:
                logging.info('Input iterator is exhausted.')
            else:
                logging.warn('Input iterator is exhausted: %s.', e)

    return eval_results, current_global_step
    def testWithExistingEnsembleAndDropout(self):
        with self.test_session():
            tree_ensemble = tree_config_pb2.DecisionTreeEnsembleConfig()
            # Add 10 trees with some weights.
            for i in range(0, 10):
                tree = tree_ensemble.trees.add()
                _append_to_leaf(tree.nodes.add().leaf, 0, -0.4)
                tree_ensemble.tree_weights.append(i + 1)
                meta = tree_ensemble.tree_metadata.add()
                meta.num_tree_weight_updates = 1
            tree_ensemble_handle = model_ops.tree_ensemble_variable(
                stamp_token=0,
                tree_ensemble_config=tree_ensemble.SerializeToString(),
                name="existing")
            # Create non-zero feature importance.
            feature_usage_counts = variables.Variable(
                initial_value=np.array([2, 3], np.int64),
                name="feature_usage_counts",
                trainable=False)
            feature_gains = variables.Variable(initial_value=np.array(
                [0.0, 0.3], np.float32),
                                               name="feature_gains",
                                               trainable=False)

            resources.initialize_resources(resources.shared_resources()).run()
            variables.initialize_all_variables().run()

            dropped = [1, 6, 8]
            dropped_original_weights = [2.0, 7.0, 9.0]

            output_ensemble = tree_config_pb2.DecisionTreeEnsembleConfig()
            with ops.control_dependencies([
                    ensemble_optimizer_ops.add_trees_to_ensemble(
                        tree_ensemble_handle,
                        self._ensemble_to_add.SerializeToString(),
                        feature_usage_counts, [1, 2],
                        feature_gains, [0.5, 0.3],
                        [dropped, dropped_original_weights],
                        learning_rate=0.1)
            ]):
                output_ensemble.ParseFromString(
                    model_ops.tree_ensemble_serialize(tree_ensemble_handle)
                    [1].eval())

            # Output.
            self.assertEqual(11, len(output_ensemble.trees))
            self.assertProtoEquals(self._tree_to_add,
                                   output_ensemble.trees[10])
            self.assertAllClose(4.5, output_ensemble.tree_weights[10])

            self.assertAllClose(
                [1., 1.5, 3., 4., 5., 6., 5.25, 8., 6.75, 10., 4.5],
                output_ensemble.tree_weights)

            self.assertEqual(
                1, output_ensemble.tree_metadata[0].num_tree_weight_updates)
            self.assertEqual(
                2, output_ensemble.tree_metadata[1].num_tree_weight_updates)
            self.assertEqual(
                1, output_ensemble.tree_metadata[2].num_tree_weight_updates)

            self.assertEqual(
                1, output_ensemble.tree_metadata[3].num_tree_weight_updates)
            self.assertEqual(
                1, output_ensemble.tree_metadata[4].num_tree_weight_updates)
            self.assertEqual(
                1, output_ensemble.tree_metadata[5].num_tree_weight_updates)
            self.assertEqual(
                2, output_ensemble.tree_metadata[6].num_tree_weight_updates)
            self.assertEqual(
                1, output_ensemble.tree_metadata[7].num_tree_weight_updates)
            self.assertEqual(
                2, output_ensemble.tree_metadata[8].num_tree_weight_updates)
            self.assertEqual(
                1, output_ensemble.tree_metadata[9].num_tree_weight_updates)
            self.assertEqual(
                1, output_ensemble.tree_metadata[10].num_tree_weight_updates)
            self.assertAllEqual([3, 5], feature_usage_counts.eval())
            self.assertArrayNear([0.05, 0.33], feature_gains.eval(), 1e-6)
예제 #46
0
    def _setup_training(self):
        """Sets up graph, model and trainer."""
        # Create config if not given.
        if self._config is None:
            self._config = RunConfig(verbose=self.verbose)
        # Create new graph.
        self._graph = ops.Graph()
        self._graph.add_to_collection("IS_TRAINING", True)
        with self._graph.as_default():
            random_seed.set_random_seed(self._config.tf_random_seed)
            self._global_step = variables.Variable(0,
                                                   name="global_step",
                                                   trainable=False)

            # Setting up inputs and outputs.
            self._inp, self._out = self._data_feeder.input_builder()

            # If class weights are provided, add them to the graph.
            # Different loss functions can use this tensor by name.
            if self.class_weight:
                self._class_weight_node = constant_op.constant(
                    self.class_weight, name='class_weight')

            # Add histograms for X and y if they are floats.
            if self._data_feeder.input_dtype in (np.float32, np.float64):
                logging_ops.histogram_summary("X", self._inp)
            if self._data_feeder.output_dtype in (np.float32, np.float64):
                logging_ops.histogram_summary("y", self._out)

            # Create model's graph.
            self._model_predictions, self._model_loss = self.model_fn(
                self._inp, self._out)

            # Set up a single operator to merge all the summaries
            self._summaries = logging_ops.merge_all_summaries()

            # Create trainer and augment graph with gradients and optimizer.
            # Additionally creates initialization ops.
            learning_rate = self.learning_rate
            optimizer = self.optimizer
            if callable(learning_rate):
                learning_rate = learning_rate(self._global_step)
            if callable(optimizer):
                optimizer = optimizer(learning_rate)
            self._train = optimizers.optimize_loss(
                self._model_loss,
                self._global_step,
                learning_rate=learning_rate,
                optimizer=optimizer,
                clip_gradients=self.clip_gradients)

            # Update ops during training, e.g. batch_norm_ops
            self._train = control_flow_ops.group(
                self._train, *ops.get_collection('update_ops'))

            # Get all initializers for all trainable variables.
            self._initializers = variables.initialize_all_variables()

            # Create model's saver capturing all the nodes created up until now.
            self._saver = train.Saver(
                max_to_keep=self._config.keep_checkpoint_max,
                keep_checkpoint_every_n_hours=self._config.
                keep_checkpoint_every_n_hours)

            # Enable monitor to create validation data dict with appropriate tf placeholders
            self._monitor.create_val_feed_dict(self._inp, self._out)

            # Create session to run model with.
            self._session = session.Session(self._config.tf_master,
                                            config=self._config.tf_config)

            # Run parameter initializers.
            self._session.run(self._initializers)
예제 #47
0
def train(train_op,
          logdir,
          train_step_fn=train_step,
          train_step_kwargs=_USE_DEFAULT,
          log_every_n_steps=1,
          graph=None,
          master='',
          is_chief=True,
          global_step=None,
          number_of_steps=None,
          init_op=_USE_DEFAULT,
          init_feed_dict=None,
          local_init_op=None,
          init_fn=None,
          summary_op=_USE_DEFAULT,
          save_summaries_secs=600,
          startup_delay_steps=0,
          saver=None,
          save_interval_secs=600,
          sync_optimizer=None):
    """Runs a training loop using a TensorFlow supervisor.

  When the sync_optimizer is supplied, gradient updates are applied
  synchronously. Otherwise, gradient updates are applied asynchronous.

  Args:
    train_op: A `Tensor` that, when executed, will apply the gradients and
      return the loss value.
    logdir: The directory where training logs are written to.
    train_step_fn: The function to call in order to execute a single gradient
      step. The function must have take exactly four arguments: the current
      session, the `train_op` `Tensor`, a global step `Tensor` and a dictionary.
    train_step_kwargs: A dictionary which is passed to the `train_step_fn`. By
      default, two `Boolean`, scalar ops called "should_stop" and "should_log"
      are provided.
    log_every_n_steps: The frequency, in terms of global steps, that the loss
      and global step and logged.
    graph: The graph to pass to the supervisor. If no graph is supplied the
      default graph is used.
    master: The BNS name of the tensorflow master.
    is_chief: Specifies whether or not the training is being run by the primary
      replica during replica training.
    global_step: The `Tensor` representing the global step. If left as `None`,
      then slim.variables.get_or_create_global_step() is used.
    number_of_steps: The max number of gradient steps to take during training.
      If the value is left as None, training proceeds indefinitely.
    init_op: The initialization operation. If left to its default value, then
      the session is initialized by calling `tf.initialize_all_variables()`.
    init_feed_dict: A feed dictionary to use when executing the `init_op`.
    local_init_op: The local initialization operation. If None,
      then the session is initialized by calling
      `tf.initialize_local_variables()` and `tf.initialize_all_tables()`.
    init_fn: An optional callable to be executed after `init_op` is called. The
      callable must accept one argument, the session being initialized.
    summary_op: The summary operation.
    save_summaries_secs: How often, in seconds, to save summaries.
    startup_delay_steps: The number of steps to wait for before beginning. Note
      that this must be 0 if a sync_optimizer is supplied.
    saver: Saver to save checkpoints. If none, a default one will be created
      and used.
    save_interval_secs: How often, in seconds, to save the model to `logdir`.
    sync_optimizer: an instance of tf.train.SyncReplicasOptimizer. If the
      argument is supplied, gradient updates will be synchronous. If left as
      `None`, gradient updates will be asynchronous.

  Returns:
    the value of the loss function after training.

  Raises:
    ValueError: if `train_op` is empty or if `startup_delay_steps` is
      non-zero when `sync_optimizer` is supplied, or if `number_of_steps` is
      negative.
  """
    if train_op is None:
        raise ValueError('train_op cannot be None.')

    if sync_optimizer and startup_delay_steps > 0:
        raise ValueError(
            'startup_delay_steps must be zero when sync_optimizer is supplied.'
        )

    if number_of_steps is not None and number_of_steps <= 0:
        raise ValueError(
            '`number_of_steps` must be either None or a positive number.')

    graph = graph or ops.get_default_graph()
    with graph.as_default():
        if global_step is None:
            global_step = variables.get_or_create_global_step()
        saver = saver or tf_saver.Saver()

    if init_op == _USE_DEFAULT:
        init_op = tf_variables.initialize_all_variables()

    if summary_op == _USE_DEFAULT:
        summary_op = logging_ops.merge_all_summaries()

    cleanup_op = None

    if is_chief and sync_optimizer:
        if not isinstance(sync_optimizer,
                          sync_replicas_optimizer.SyncReplicasOptimizer):
            raise ValueError(
                '`sync_optimizer` must be a tf.train.SyncReplicasOptimizer')

        # Need to create these BEFORE the supervisor finalizes the graph:
        with ops.control_dependencies([init_op]):
            init_tokens_op = sync_optimizer.get_init_tokens_op()
        init_op = init_tokens_op
        chief_queue_runner = sync_optimizer.get_chief_queue_runner()
        cleanup_op = sync_optimizer.get_clean_up_op()

    if train_step_kwargs == _USE_DEFAULT:
        train_step_kwargs = {}

        if number_of_steps:
            should_stop_op = math_ops.greater_equal(global_step,
                                                    number_of_steps)
        else:
            should_stop_op = constant_op.constant(False)
        train_step_kwargs['should_stop'] = should_stop_op
        train_step_kwargs['should_log'] = math_ops.equal(
            math_ops.mod(global_step, log_every_n_steps), 0)

    sv = supervisor.Supervisor(graph=graph,
                               is_chief=is_chief,
                               logdir=logdir,
                               init_op=init_op,
                               init_feed_dict=init_feed_dict,
                               local_init_op=local_init_op,
                               summary_op=summary_op,
                               global_step=global_step,
                               saver=saver,
                               save_summaries_secs=save_summaries_secs,
                               save_model_secs=save_interval_secs,
                               init_fn=init_fn)

    with sv.managed_session(master, start_standard_services=False) as sess:
        if is_chief:
            sv.start_standard_services(sess)
        elif not is_chief and startup_delay_steps > 0:
            _wait_for_step(
                sess, global_step,
                min(startup_delay_steps, number_of_steps or sys.maxint))
        sv.start_queue_runners(sess)
        if is_chief and sync_optimizer:
            sv.start_queue_runners(sess, [chief_queue_runner])

        try:
            while not sv.should_stop():
                total_loss, should_stop = train_step_fn(
                    sess, train_op, global_step, train_step_kwargs)
                if should_stop:
                    break
        finally:
            if sv.is_chief and cleanup_op is not None:
                sess.run(cleanup_op)

        # This waits for service threads to finish.
        sv.Stop()

        if sv.is_chief:
            logging.info('Finished training! Saving model to disk.')
            sv.saver.save(sess, sv.save_path, global_step=sv.global_step)

        return total_loss