def test_normal_integral_mean_and_var_correctly_estimated(self):
    n = int(1000)
    # This test is almost identical to the similarly named test in
    # monte_carlo_test.py. The only difference is that we use the Halton
    # samples instead of the random samples to evaluate the expectations.
    # MC with pseudo random numbers converges at the rate of 1/ Sqrt(N)
    # (N=number of samples). For QMC in low dimensions, the expected convergence
    # rate is ~ 1/N. Hence we should only need 1e3 samples as compared to the
    # 1e6 samples used in the pseudo-random monte carlo.
    with self.test_session():
      mu_p = array_ops.constant([-1.0, 1.0], dtype=dtypes.float64)
      mu_q = array_ops.constant([0.0, 0.0], dtype=dtypes.float64)
      sigma_p = array_ops.constant([0.5, 0.5], dtype=dtypes.float64)
      sigma_q = array_ops.constant([1.0, 1.0], dtype=dtypes.float64)
      p = normal_lib.Normal(loc=mu_p, scale=sigma_p)
      q = normal_lib.Normal(loc=mu_q, scale=sigma_q)

      cdf_sample = halton.sample(2, num_samples=n, dtype=dtypes.float64)
      q_sample = q.quantile(cdf_sample)

      # Compute E_p[X].
      e_x = mc.expectation_importance_sampler(
          f=lambda x: x, log_p=p.log_prob, sampling_dist_q=q, z=q_sample,
          seed=42)

      # Compute E_p[X^2].
      e_x2 = mc.expectation_importance_sampler(
          f=math_ops.square, log_p=p.log_prob, sampling_dist_q=q, z=q_sample,
          seed=42)

      stddev = math_ops.sqrt(e_x2 - math_ops.square(e_x))
      # Keep the tolerance levels the same as in monte_carlo_test.py.
      self.assertEqual(p.batch_shape, e_x.get_shape())
      self.assertAllClose(p.mean().eval(), e_x.eval(), rtol=0.01)
      self.assertAllClose(p.stddev().eval(), stddev.eval(), rtol=0.02)
예제 #2
0
 def testConsistent(self):
   nums, divs = self.intTestData()
   with self.test_session():
     tf_result = (
         math_ops.floor_div(nums, divs) * divs + math_ops.floormod(nums, divs)
     ).eval()
     tf_nums = array_ops.constant(nums)
     tf_divs = array_ops.constant(divs)
     tf2_result = (tf_nums // tf_divs * tf_divs + tf_nums % tf_divs).eval()
     np_result = (nums // divs) * divs + (nums % divs)
     # consistentcy with numpy
     self.assertAllEqual(tf_result, np_result)
     # consistentcy with two forms of divide
     self.assertAllEqual(tf_result, tf2_result)
     # consistency for truncation form
     tf3_result = (
         math_ops.truncatediv(nums, divs) * divs
         + math_ops.truncatemod(nums, divs)
     ).eval()
     expanded_nums = np.reshape(np.tile(nums, divs.shape[1]),
                                (nums.shape[0], divs.shape[1]))
     # Consistent with desire to get numerator
     self.assertAllEqual(tf3_result, expanded_nums)
     # Consistent with desire to get numerator
     self.assertAllEqual(tf_result, expanded_nums)
예제 #3
0
  def testScaleAndBiasAndIdentity(self):
    """This tests a scaled add which has 3 inputs and 2 outputs."""
    a = array_ops.constant(1.)
    x = array_ops.constant([2., 3.])
    b = array_ops.constant([4., 5.])

    def _scaled_and_bias_and_identity(a, x, b):
      custom = op_hint.OpHint("scale_and_bias_and_identity")
      a, x, b = custom.add_inputs(a, x, b)
      return custom.add_outputs(a * x + b, x)
    output = array_ops.identity(_scaled_and_bias_and_identity(a, x, b),
                                name="ModelOutput")

    with self.cached_session() as sess:
      # make sure one identity for each input (3) and output (2) => 3 + 2 = 5
      # +1 for the final output
      self.assertEqual(self._countIdentities(sess.graph_def.node), 6)

      stubbed_graphdef = op_hint.convert_op_hints_to_stubs(
          graph_def=sess.graph_def)

      self.assertEqual(
          self._getGraphOpTypes(
              stubbed_graphdef,
              output_nodes=[op_hint._tensor_name_base(output.name)]),
          set(["scale_and_bias_and_identity", "Const", "Identity", "Pack"]))
예제 #4
0
def report_uninitialized_variables(var_list=None, name="report_uninitialized_variables"):
    """Adds ops to list the names of uninitialized variables.

  When run, it returns a 1-D tensor containing the names of uninitialized
  variables if there are any, or an empty array if there are none.

  Args:
    var_list: List of `Variable` objects to check. Defaults to the
      value of `all_variables() + local_variables()`
    name: Optional name of the `Operation`.

  Returns:
    A 1-D tensor containing names of the unintialized variables, or an empty 1-D
    tensor if there are no variables or no uninitialized variables.
  """
    if var_list is None:
        var_list = all_variables() + local_variables()
    # Backwards compatibility for old-style variables. TODO(touts): remove.
    if not var_list:
        var_list = []
        for op in ops.get_default_graph().get_operations():
            if op.type in ["Variable", "AutoReloadVariable"]:
                var_list.append(op.outputs[0])
    if not var_list:
        # Return an empty tensor so we only need to check for returned tensor
        # size being 0 as an indication of model ready.
        return array_ops.constant([], dtype=dtypes.string, name=name)
    else:
        # Get a 1-D boolean tensor listing whether each variable is initialized.
        variables_mask = math_ops.logical_not(array_ops.pack([state_ops.is_variable_initialized(v) for v in var_list]))
        # Get a 1-D string tensor containing all the variable names.
        variable_names_tensor = array_ops.constant([s.op.name for s in var_list])
        # Return a 1-D tensor containing all the names of uninitialized variables.
        return array_ops.boolean_mask(variable_names_tensor, variables_mask, name=name)
예제 #5
0
  def testMultiplyInverseAgainstExplicit(self):
    with ops.Graph().as_default(), self.test_session() as sess:
      random_seed.set_random_seed(200)
      params = (array_ops.constant([1., 2.]), array_ops.constant(3.))
      block = fb.FullFB(lc.LayerCollection(), params)
      block.register_additional_minibatch(32)
      grads = (array_ops.constant([2., 3.]), array_ops.constant(4.))
      damping = 0.5
      block.instantiate_factors((grads,), damping)
      block._factor.instantiate_cov_variables()
      block.register_inverse()
      block._factor.instantiate_inv_variables()

      # Make sure our inverse is something other than the identity.
      sess.run(state_ops.assign(block._factor._cov, _make_psd(3)))
      sess.run(block._factor.make_inverse_update_ops())

      v_flat = np.array([4., 5., 6.], dtype=np.float32)
      vector = utils.column_to_tensors(params, array_ops.constant(v_flat))
      output = block.multiply_inverse(vector)
      output_flat = sess.run(utils.tensors_to_column(output)).ravel()

      full = sess.run(block.full_fisher_block())
      explicit = np.dot(np.linalg.inv(full + damping * np.eye(3)), v_flat)

      self.assertAllClose(output_flat, explicit)
예제 #6
0
  def testFullFBInitTensorTuple(self):
    with ops.Graph().as_default():
      random_seed.set_random_seed(200)
      params = (array_ops.constant([1., 2.]), array_ops.constant(3.))
      block = fb.FullFB(lc.LayerCollection(), params, 32)

      self.assertAllEqual(params, block.tensors_to_compute_grads())
예제 #7
0
 def testScopeStringFromParamsMultipleTypes(self):
   with tf_ops.Graph().as_default():
     x = array_ops.constant(1,)
     y = array_ops.constant(2,)
     scope_string = ff.scope_string_from_params([[1, 2, 3], 'foo', True, 4,
                                                 (x, y)])
     self.assertEqual('1-2-3_foo_True_4_Const__Const_1', scope_string)
예제 #8
0
  def testOptimizerInit(self):
    with ops.Graph().as_default():
      layer_collection = lc.LayerCollection()

      inputs = array_ops.ones((2, 1)) * 2
      weights_val = np.ones((1, 1), dtype=np.float32) * 3.
      weights = variable_scope.get_variable(
          'w', initializer=array_ops.constant(weights_val))
      bias = variable_scope.get_variable(
          'b', initializer=init_ops.zeros_initializer(), shape=(1, 1))
      output = math_ops.matmul(inputs, weights) + bias

      layer_collection.register_fully_connected((weights, bias), inputs, output)

      logits = math_ops.tanh(output)
      targets = array_ops.constant([[0.], [1.]])
      output = math_ops.reduce_mean(
          nn.softmax_cross_entropy_with_logits(logits=logits, labels=targets))

      layer_collection.register_categorical_predictive_distribution(logits)

      optimizer.KfacOptimizer(
          0.1,
          0.2,
          0.3,
          layer_collection,
          momentum=0.5,
          momentum_type='regular')
  def test_mixture_dev(self):
    mixture_weights = np.array([
        [1.0/3, 1.0/3, 1.0/3],
        [0.750, 0.250, 0.000]
    ])
    component_means = np.array([
        [1.0, 1.0, 1.0],
        [-5, 0, 1.25]
    ])
    component_devs = np.array([
        [1.0, 1.0, 1.0],
        [0.01, 2.0, 0.1]
    ])

    # The first case should trivially have a standard deviation of 1.0 because
    # all components are identical and have that standard deviation.
    # The second case was computed by hand.
    expected_devs = np.array([
        1.0,
        2.3848637277
    ])

    weights_tf = array_ops.constant(mixture_weights)
    means_tf = array_ops.constant(component_means)
    sigmas_tf = array_ops.constant(component_devs)
    mix_dev = distribution_util.mixture_stddev(weights_tf,
                                               means_tf,
                                               sigmas_tf)

    with self.test_session() as sess:
      actual_devs = sess.run(mix_dev)

    self.assertAllClose(actual_devs, expected_devs)
 def testMakeSparseSplitAllEmptyDimensions(self):
   """Tests split handler op when all dimensions have only bias bucket id."""
   with self.test_session() as sess:
     # The data looks like the following after dividing by number of steps (2).
     # Gradients    | Partition | Dimension | bucket ID       |
     # (0.9, 0.39)  | 0         |    0      |  -1             |
     # (4.0, 0.13)  | 1         |    0      |  -1             |
     partition_ids = array_ops.constant([0, 1], dtype=dtypes.int32)
     # We have only 1 dimension in our sparse feature column.
     bucket_ids = array_ops.constant([[-1, 0], [-1, 0]], dtype=dtypes.int64)
     gradients = array_ops.constant([1.8, 8.0])
     hessians = array_ops.constant([0.78, 0.26])
     bucket_boundaries = array_ops.constant([0.3, 0.52])
     partitions, gains, splits = (
         split_handler_ops.build_sparse_inequality_splits(
             num_minibatches=2,
             partition_ids=partition_ids,
             bucket_ids=bucket_ids,
             gradients=gradients,
             hessians=hessians,
             bucket_boundaries=bucket_boundaries,
             l1_regularization=0,
             l2_regularization=2,
             tree_complexity_regularization=0,
             min_node_weight=0,
             feature_column_group_id=0,
             bias_feature_id=-1,
             class_id=-1,
             multiclass_strategy=learner_pb2.LearnerConfig.TREE_PER_CLASS))
     partitions, gains, splits = (sess.run([partitions, gains, splits]))
   self.assertEqual(0, len(partitions))
   self.assertEqual(0, len(splits))
예제 #11
0
  def testSwishLiteHint(self):
    """Makes a custom op swish and makes sure it gets converted as a unit."""
    image = array_ops.constant([1., 2., 3., 4.])
    swish_scale = array_ops.constant(1.0)

    def _swish(input_tensor, scale):
      custom = op_hint.OpHint("cool_activation")
      input_tensor, scale = custom.add_inputs(input_tensor, scale)
      output = math_ops.sigmoid(input_tensor) * input_tensor * scale
      output, = custom.add_outputs(output)
      return output
    output = array_ops.identity(_swish(image, swish_scale), name="ModelOutput")

    with self.cached_session() as sess:
      # check if identities have been put into the graph (2 input, 1 output,
      # and 1 final output).
      self.assertEqual(self._countIdentities(sess.graph_def.node), 4)

      stubbed_graphdef = op_hint.convert_op_hints_to_stubs(
          graph_def=sess.graph_def)

      self.assertEqual(
          self._getGraphOpTypes(
              stubbed_graphdef,
              output_nodes=[op_hint._tensor_name_base(output.name)]),
          set(["cool_activation", "Const", "Identity"]))
예제 #12
0
  def testTrackPersistentBytes(self):
    ops.reset_default_graph()
    a = array_ops.constant(np.ones((100, 100)))
    b = array_ops.constant(np.ones((100, 100)))
    c = a * b

    with session.Session() as sess:
      run_options = config_pb2.RunOptions(
          trace_level=config_pb2.RunOptions.FULL_TRACE)
      run_metadata = config_pb2.RunMetadata()
      sess.run(c, options=run_options, run_metadata=run_metadata)

      options = option_builder.ProfileOptionBuilder.time_and_memory()
      options['min_bytes'] = 0
      options['select'] = ('bytes', 'peak_bytes', 'output_bytes',
                           'residual_bytes')
      ret = model_analyzer.profile(
          sess.graph, run_meta=run_metadata, cmd='scope', options=options)

      run_metadata = config_pb2.RunMetadata()
      sess.run(c, options=run_options, run_metadata=run_metadata)
      ret2 = model_analyzer.profile(
          sess.graph, run_meta=run_metadata, cmd='scope', options=options)

      n = lib.SearchTFProfNode(ret, 'mul')
      n2 = lib.SearchTFProfNode(ret2, 'mul')
      self.assertGreater(n.peak_bytes, 0)
      self.assertGreater(n.output_bytes, 0)
      self.assertGreater(n.residual_bytes, 0)
      self.assertEqual(n.peak_bytes, n2.peak_bytes)
      self.assertEqual(n.output_bytes, n2.output_bytes)
      self.assertEqual(n.residual_bytes, n2.residual_bytes)
예제 #13
0
def report_uninitialized_resources(resource_list=None,
                                   name="report_uninitialized_resources"):
  """Returns the names of all uninitialized resources in resource_list.

  If the returned tensor is empty then all resources have been initialized.

  Args:
   resource_list: resources to check. If None, will use shared_resources() +
    local_resources().
   name: name for the resource-checking op.

  Returns:
   Tensor containing names of the handles of all resources which have not
   yet been initialized.

  """
  if resource_list is None:
    resource_list = shared_resources() + local_resources()
  with ops.name_scope(name):
    # Run all operations on CPU
    with ops.device("/cpu:0"):
      if not resource_list:
        # Return an empty tensor so we only need to check for returned tensor
        # size being 0 as an indication of model ready.
        return array_ops.constant([], dtype=dtypes.string)
      # Get a 1-D boolean tensor listing whether each resource is initialized.
      variables_mask = math_ops.logical_not(
          array_ops.stack([r.is_initialized for r in resource_list]))
      # Get a 1-D string tensor containing all the resource names.
      variable_names_tensor = array_ops.constant(
          [s.handle.name for s in resource_list])
      # Return a 1-D tensor containing all the names of uninitialized resources.
      return array_ops.boolean_mask(variable_names_tensor, variables_mask)
예제 #14
0
 def testRegisterSingleParamRegisteredInTuple(self):
   x = variable_scope.get_variable('x', initializer=array_ops.constant(1,))
   y = variable_scope.get_variable('y', initializer=array_ops.constant(1,))
   lc = layer_collection.LayerCollection()
   lc.fisher_blocks = {(x, y): '1'}
   lc.register_block(x, 'foo')
   self.assertEqual(set(['1']), set(lc.get_blocks()))
  def test_kernel_classifier_distance_block_sizes(self):
    """Test that `kernel_classifier_distance` works with unusual max_block_size

    values..
    """
    np.random.seed(0)

    test_pool_real_a = np.float32(np.random.randn(512, 256))
    test_pool_gen_a = np.float32(np.random.randn(768, 256) * 1.1 + .05)

    max_block_size = array_ops.placeholder(dtypes.int32, shape=())
    kid_op = _run_with_mock(
        classifier_metrics.kernel_classifier_distance_and_std_from_activations,
        array_ops.constant(test_pool_real_a),
        array_ops.constant(test_pool_gen_a),
        max_block_size=max_block_size)

    for block_size in [50, 512, 1000]:
      with self.cached_session() as sess:
        actual_kid, actual_std = sess.run(kid_op, {max_block_size: block_size})

      expected_kid, expected_std = _expected_kid_and_std(
          test_pool_real_a, test_pool_gen_a, max_block_size=block_size)

      self.assertAllClose(expected_kid, actual_kid, 0.001)
      self.assertAllClose(expected_std, actual_std, 0.001)
예제 #16
0
  def testAggregateGradients(self):

    def fn(x):
      ind1 = constant_op.constant(np.array([0, 1]))
      ind2 = constant_op.constant(np.array([2, 3]))
      ind3 = constant_op.constant(np.array([1, 3]))
      # A mixture of IndexedSlices and dense tensor to aggregate.
      g1 = embedding_ops.embedding_lookup(x, ind1)
      g2 = embedding_ops.embedding_lookup(x, ind2)
      g3 = embedding_ops.embedding_lookup(x, ind3)
      g4 = math_ops.reduce_sum(x * constant_op.constant(2.0))
      return g1 * g2 * g3 * g4

    var_np = np.random.rand(4, 2).astype(np.float32)
    var = constant_op.constant(var_np)
    grad = backprop.gradients_function(fn, [0])(var)[0]
    grad = self.evaluate(ops.convert_to_tensor(grad))

    if not context.executing_eagerly():
      tf_var = array_ops.constant(var_np, dtypes.float32)
      tf_ind1 = array_ops.constant([0, 1])
      tf_ind2 = array_ops.constant([2, 3])
      tf_ind3 = array_ops.constant([1, 3])
      tf_g1 = embedding_ops.embedding_lookup(tf_var, tf_ind1)
      tf_g2 = embedding_ops.embedding_lookup(tf_var, tf_ind2)
      tf_g3 = embedding_ops.embedding_lookup(tf_var, tf_ind3)
      tf_g4 = math_ops.reduce_sum(tf_var * 2.0, axis=(0, 1))
      tf_y = tf_g1 * tf_g2 * tf_g3 * tf_g4
      tf_grad = gradients.gradients(tf_y, [tf_var])[0]

      tf_dense_grad = math_ops.unsorted_segment_sum(
          tf_grad.values, tf_grad.indices, tf_grad.dense_shape[0])

      self.assertAllClose(grad, self.evaluate(tf_dense_grad))
예제 #17
0
  def testMultiplyInverseTuple(self):
    with ops.Graph().as_default(), self.test_session() as sess:
      random_seed.set_random_seed(200)
      inputs = array_ops.constant([[1., 2., 3.], [3., 4., 5.], [5., 6., 7.]])
      outputs = array_ops.constant([[3., 4.], [5., 6.]])
      block = fb.FullyConnectedKFACBasicFB(lc.LayerCollection(), has_bias=False)
      block.register_additional_minibatch(inputs, outputs)
      grads = outputs**2
      block.instantiate_factors(([grads],), 0.5)

      # Make sure our inverse is something other than the identity.
      sess.run(tf_variables.global_variables_initializer())
      sess.run(block._input_factor.make_inverse_update_ops())
      sess.run(block._output_factor.make_inverse_update_ops())

      vector = (
          np.arange(2, 6).reshape(2, 2).astype(np.float32),  #
          np.arange(1, 3).reshape(2, 1).astype(np.float32))
      output = block.multiply_inverse((array_ops.constant(vector[0]),
                                       array_ops.constant(vector[1])))

      output = sess.run(output)
      self.assertAllClose([[0.686291, 1.029437], [1.372583, 1.715729]],
                          output[0])
      self.assertAllClose([0.343146, 0.686291], output[1])
예제 #18
0
  def testMultiplyInverseTuple(self):
    with ops.Graph().as_default(), self.test_session() as sess:
      random_seed.set_random_seed(200)
      params = random_ops.random_normal((2, 2, 2, 2))
      inputs = random_ops.random_normal((2, 2, 2, 2))
      outputs = random_ops.random_normal((2, 2, 2, 2))
      block = fb.ConvKFCBasicFB(lc.LayerCollection(), params, (1, 1, 1, 1),
                                'SAME')
      block.register_additional_minibatch(inputs, outputs)
      grads = outputs**2
      block.instantiate_factors(([grads],), 0.5)

      # Make sure our inverse is something other than the identity.
      sess.run(tf_variables.global_variables_initializer())
      sess.run(block._input_factor.make_inverse_update_ops())
      sess.run(block._output_factor.make_inverse_update_ops())

      vector = (np.arange(1, 15).reshape(7, 2).astype(np.float32),
                np.arange(2, 4).reshape(2, 1).astype(np.float32))
      output = block.multiply_inverse((array_ops.constant(vector[0]),
                                       array_ops.constant(vector[1])))

      output = sess.run(output)
      self.assertAllClose([0.136455, 0.27291], output[0][0])
      self.assertAllClose([0.27291, 0.409365], output[1])
예제 #19
0
  def testAggregateGradients(self):

    def fn(x):
      ind1 = tensor.Tensor(np.array([0, 1]))
      ind2 = tensor.Tensor(np.array([2, 3]))
      ind3 = tensor.Tensor(np.array([1, 3]))
      # A mixture of IndexedSlices and dense tensor to aggregate.
      g1 = embedding_ops.embedding_lookup(x, ind1)
      g2 = embedding_ops.embedding_lookup(x, ind2)
      g3 = embedding_ops.embedding_lookup(x, ind3)
      g4 = math_ops.reduce_sum(x * tensor.Tensor(2.0))
      return g1 * g2 * g3 * g4

    var_np = np.random.rand(4, 2).astype(np.float32)
    var = tensor.Tensor(var_np)
    grad = backprop.gradients_function(fn, [0])(var)[0]

    with context.graph_mode(), self.test_session():
      tf_var = array_ops.constant(var_np, dtypes.float32)
      tf_ind1 = array_ops.constant([0, 1])
      tf_ind2 = array_ops.constant([2, 3])
      tf_ind3 = array_ops.constant([1, 3])
      tf_g1 = embedding_ops.embedding_lookup(tf_var, tf_ind1)
      tf_g2 = embedding_ops.embedding_lookup(tf_var, tf_ind2)
      tf_g3 = embedding_ops.embedding_lookup(tf_var, tf_ind3)
      tf_g4 = math_ops.reduce_sum(tf_var * 2.0, reduction_indices=(0, 1))
      tf_y = tf_g1 * tf_g2 * tf_g3 * tf_g4
      tf_grad = gradients.gradients(tf_y, [tf_var])[0]

      tf_dense_grad = math_ops.unsorted_segment_sum(
          tf_grad.values, tf_grad.indices, tf_grad.dense_shape[0])

      self.assertAllClose(grad.numpy(), tf_dense_grad.eval())
예제 #20
0
  def testColumnToTensors(self):
    with ops.Graph().as_default(), self.test_session() as sess:
      random_seed.set_random_seed(200)

      vector_template = array_ops.constant(np.array([[0., 1.], [2., 3.]]))
      colvec = array_ops.constant(np.arange(4.)[:, None])
      output = sess.run(utils.column_to_tensors(vector_template, colvec))
      self.assertAllClose(output, np.array([[0., 1.], [2., 3.]]))

      vector_template = self._fully_connected_layer_params()
      colvec = array_ops.constant(np.arange(6.)[:, None])
      output = sess.run(utils.column_to_tensors(vector_template, colvec))

      self.assertIsInstance(output, tuple)
      self.assertEqual(len(output), 2)
      a, b = output
      self.assertAllClose(a, np.array([[0., 1.], [2., 3.]]))
      self.assertAllClose(b, np.array([4., 5.]))

      vector_template = list(vector_template)
      vector_template.append(array_ops.constant([[6.], [7.], [8.], [9.]]))
      colvec = array_ops.constant(np.arange(10.)[:, None])
      output = sess.run(utils.column_to_tensors(vector_template, colvec))
      self.assertIsInstance(output, tuple)
      self.assertEqual(len(output), 3)
      a, b, c = output
      self.assertAllClose(a, np.array([[0., 1.], [2., 3.]]))
      self.assertAllClose(b, np.array([4., 5.]))
      self.assertAllClose(c, np.array([[6.], [7.], [8.], [9.]]))
예제 #21
0
 def test_parameter_switching(self):
   parameter = array_ops.constant(5)
   overridden_parameter = array_ops.constant(3)
   with self.cached_session():
     getter = model_utils.parameter_switch({overridden_parameter: 4})
     self.assertEqual(5, getter(parameter))
     self.assertEqual(4, getter(overridden_parameter))
예제 #22
0
  def testUpdateClipCoeff(self):
    with ops.Graph().as_default(), self.test_session() as sess:
      grads_and_vars = [(array_ops.constant([[1., 2.], [3., 4.]]), None),
                        (array_ops.constant([[2., 3.], [4., 5.]]), None)]
      pgrads_and_vars = [(array_ops.constant([[3., 4.], [5., 6.]]), None),
                         (array_ops.constant([[7., 8.], [9., 10.]]), None)]
      lrate = 0.1

      # Note: without rescaling, the squared Fisher norm of the update
      # is 1.74

      # If the update already satisfies the norm constraint, there should
      # be no rescaling.
      opt = optimizer.KfacOptimizer(
          lrate, 0.2, 0.3, dummy_layer_collection(), norm_constraint=10.)
      coeff = opt._update_clip_coeff(grads_and_vars, pgrads_and_vars)
      self.assertAlmostEqual(1., sess.run(coeff), places=5)

      # If the update violates the constraint, it should be rescaled to
      # be on the constraint boundary.
      opt = optimizer.KfacOptimizer(
          lrate, 0.2, 0.3, dummy_layer_collection(), norm_constraint=0.5)
      coeff = opt._update_clip_coeff(grads_and_vars, pgrads_and_vars)
      sq_norm_pgrad = opt._squared_fisher_norm(grads_and_vars, pgrads_and_vars)
      sq_norm_update = lrate**2 * coeff**2 * sq_norm_pgrad
      self.assertAlmostEqual(0.5, sess.run(sq_norm_update), places=5)
 def testMakeDenseSplitEmptyInputs(self):
   """Tests empty inputs op."""
   with self.test_session() as sess:
     partition_ids = array_ops.constant([], dtype=dtypes.int32)
     bucket_ids = array_ops.constant([[]], dtype=dtypes.int64)
     gradients = array_ops.constant([])
     hessians = array_ops.constant([])
     bucket_boundaries = [0.3, 0.52]
     partitions, gains, splits = (
         split_handler_ops.build_dense_inequality_splits(
             num_minibatches=0,
             partition_ids=partition_ids,
             bucket_ids=bucket_ids,
             gradients=gradients,
             hessians=hessians,
             bucket_boundaries=bucket_boundaries,
             l1_regularization=0.1,
             l2_regularization=1,
             tree_complexity_regularization=0,
             min_node_weight=0,
             class_id=-1,
             feature_column_group_id=0,
             multiclass_strategy=learner_pb2.LearnerConfig.TREE_PER_CLASS))
     partitions, gains, splits = sess.run([partitions, gains, splits])
   # .assertEmpty doesn't exist on ubuntu-contrib
   self.assertEqual(0, len(partitions))
   self.assertEqual(0, len(gains))
   self.assertEqual(0, len(splits))
예제 #24
0
  def testAggregate(self):
    a = array_ops.constant([3., 4.])
    b = array_ops.constant([5., 6.])
    hint = op_hint.OpHint("agg")
    a0, a1 = array_ops.unstack(a)
    b0, b1 = array_ops.unstack(b)

    a0 = hint.add_input(a0, tag="c", aggregate=op_hint.OpHint.AGGREGATE_STACK)
    b0 = hint.add_input(b0, tag="n", aggregate=op_hint.OpHint.AGGREGATE_STACK)
    a1 = hint.add_input(a1, tag="c", aggregate=op_hint.OpHint.AGGREGATE_STACK)
    b1 = hint.add_input(b1, tag="n", aggregate=op_hint.OpHint.AGGREGATE_STACK)

    c0 = math_ops.add(a0, b0, name="addleft")
    c1 = math_ops.add(a1, b1, name="addright")
    c0 = hint.add_output(
        c0, tag="out", aggregate=op_hint.OpHint.AGGREGATE_STACK)
    c1 = hint.add_output(
        c1, tag="out", aggregate=op_hint.OpHint.AGGREGATE_STACK)

    curr = array_ops.stack([c0, c1])
    output = array_ops.identity(curr, name="FINAL_OUTPUT")
    with self.cached_session() as sess:
      stubbed_graphdef = op_hint.convert_op_hints_to_stubs(
          graph_def=sess.graph_def)
      self.assertEqual(
          self._getGraphOpTypes(
              stubbed_graphdef,
              output_nodes=[op_hint._tensor_name_base(output.name)]),
          set(["agg", "Const", "Identity"]))
예제 #25
0
 def testRegisterSingleParamRegisteredInTuple(self):
   x = variable_scope.get_variable('x', initializer=array_ops.constant(1,))
   y = variable_scope.get_variable('y', initializer=array_ops.constant(1,))
   lc = layer_collection.LayerCollection()
   lc.fisher_blocks = {(x, y): '1'}
   with self.assertRaises(ValueError) as cm:
     lc.register_block(x, 'foo')
   self.assertIn('was already registered', str(cm.exception))
예제 #26
0
  def testFullFBInitSingleTensor(self):
    with ops.Graph().as_default():
      random_seed.set_random_seed(200)
      params = (array_ops.constant([1., 2.]), array_ops.constant(3.))
      block = fb.FullFB(lc.LayerCollection(), params)
      block.register_additional_minibatch(32)

      self.assertAllEqual(params, block.tensors_to_compute_grads())
예제 #27
0
 def testFullyConnectedSeriesFBInit(self):
   with ops.Graph().as_default():
     random_seed.set_random_seed(200)
     inputs = array_ops.constant([1., 2.])
     outputs = array_ops.constant([3., 4.])
     block = fb.FullyConnectedSeriesFB(
         lc.LayerCollection(), inputs=[inputs], outputs=[outputs])
     self.assertAllEqual([outputs], block.tensors_to_compute_grads())
예제 #28
0
  def testRegisterTupleParamRegistered(self):
    x = variable_scope.get_variable('x', initializer=array_ops.constant(1,))
    y = variable_scope.get_variable('y', initializer=array_ops.constant(1,))
    lc = layer_collection.LayerCollection()
    lc.fisher_blocks = {(x, y): '1'}

    with self.assertRaises(ValueError):
      lc.register_block((x, y), 'foo')
예제 #29
0
 def testRegisterSingleParamNotRegistered(self):
   x = variable_scope.get_variable('x', initializer=array_ops.constant(1,))
   lc = layer_collection.LayerCollection()
   lc.fisher_blocks = {
       variable_scope.get_variable('y', initializer=array_ops.constant(1,)):
           '1'
   }
   lc.register_block(x, 'foo')
예제 #30
0
 def testRepeatedAdds(self):
   a = array_ops.constant([[1., 2.], [3., 4.]])
   b = array_ops.constant([[5., 6.], [7., 8.]])
   c = a + b + a  # note that a appears twice in this graph
   sub_graph = utils.SubGraph((c,))
   self.assertTrue(sub_graph.is_member(a))
   self.assertTrue(sub_graph.is_member(b))
   self.assertTrue(sub_graph.is_member(c))
예제 #31
0
 def run(self):
   return {"my_output": array_ops.constant(1.0)}
예제 #32
0
  def build(self, input_shape):
    input_shape = tensor_shape.TensorShape(input_shape)
    if not input_shape.ndims:
      raise ValueError('Input has undefined rank:', input_shape)
    ndims = len(input_shape)

    # Convert axis to list and resolve negatives
    if isinstance(self.axis, int):
      self.axis = [self.axis]

    for idx, x in enumerate(self.axis):
      if x < 0:
        self.axis[idx] = ndims + x

    # Validate axes
    for x in self.axis:
      if x < 0 or x >= ndims:
        raise ValueError('Invalid axis: %d' % x)
    if len(self.axis) != len(set(self.axis)):
      raise ValueError('Duplicate axis: %s' % self.axis)

    if self.virtual_batch_size is not None:
      if self.virtual_batch_size <= 0:
        raise ValueError('virtual_batch_size must be a positive integer that '
                         'divides the true batch size of the input Tensor')
      # If using virtual batches, the first dimension must be the batch
      # dimension and cannot be the batch norm axis
      if 0 in self.axis:
        raise ValueError('When using virtual_batch_size, the batch dimension '
                         'must be 0 and thus axis cannot include 0')
      if self.adjustment is not None:
        raise ValueError('When using virtual_batch_size, adjustment cannot '
                         'be specified')

    if self.fused in (None, True):
      # TODO(yaozhang): if input is not 4D, reshape it to 4D and reshape the
      # output back to its original shape accordingly.
      if self._USE_V2_BEHAVIOR:
        if self.fused is None:
          self.fused = (ndims == 4)
        elif self.fused and ndims != 4:
          raise ValueError('Batch normalization layers with fused=True only '
                           'support 4D input tensors.')
      else:
        assert self.fused is not None
        self.fused = (ndims == 4 and self._fused_can_be_used())
      # TODO(chrisying): fused batch norm is currently not supported for
      # multi-axis batch norm and by extension virtual batches. In some cases,
      # it might be possible to use fused batch norm but would require reshaping
      # the Tensor to 4D with the axis in 1 or 3 (preferred 1) which is
      # particularly tricky. A compromise might be to just support the most
      # common use case (turning 5D w/ virtual batch to NCHW)

    if self.fused:
      if self.axis == [1]:
        self._data_format = 'NCHW'
      elif self.axis == [3]:
        self._data_format = 'NHWC'
      else:
        raise ValueError('Unsupported axis, fused batch norm only supports '
                         'axis == [1] or axis == [3]')

    # Raise parameters of fp16 batch norm to fp32
    if self.dtype == dtypes.float16 or self.dtype == dtypes.bfloat16:
      param_dtype = dtypes.float32
    else:
      param_dtype = self.dtype or dtypes.float32

    axis_to_dim = {x: input_shape.dims[x].value for x in self.axis}
    for x in axis_to_dim:
      if axis_to_dim[x] is None:
        raise ValueError('Input has undefined `axis` dimension. Input shape: ',
                         input_shape)
    self.input_spec = InputSpec(ndim=ndims, axes=axis_to_dim)

    if len(axis_to_dim) == 1 and self.virtual_batch_size is None:
      # Single axis batch norm (most common/default use-case)
      param_shape = (list(axis_to_dim.values())[0],)
    else:
      # Parameter shape is the original shape but with 1 in all non-axis dims
      param_shape = [axis_to_dim[i] if i in axis_to_dim
                     else 1 for i in range(ndims)]
      if self.virtual_batch_size is not None:
        # When using virtual batches, add an extra dim at index 1
        param_shape.insert(1, 1)
        for idx, x in enumerate(self.axis):
          self.axis[idx] = x + 1      # Account for added dimension

    if self.scale:
      self.gamma = self.add_weight(
          name='gamma',
          shape=param_shape,
          dtype=param_dtype,
          initializer=self.gamma_initializer,
          regularizer=self.gamma_regularizer,
          constraint=self.gamma_constraint,
          trainable=True)
    else:
      self.gamma = None
      if self.fused:
        self._gamma_const = array_ops.constant(
            1.0, dtype=param_dtype, shape=param_shape)

    if self.center:
      self.beta = self.add_weight(
          name='beta',
          shape=param_shape,
          dtype=param_dtype,
          initializer=self.beta_initializer,
          regularizer=self.beta_regularizer,
          constraint=self.beta_constraint,
          trainable=True)
    else:
      self.beta = None
      if self.fused:
        self._beta_const = array_ops.constant(
            0.0, dtype=param_dtype, shape=param_shape)

    try:
      # Disable variable partitioning when creating the moving mean and variance
      if hasattr(self, '_scope') and self._scope:
        partitioner = self._scope.partitioner
        self._scope.set_partitioner(None)
      else:
        partitioner = None
      self.moving_mean = self.add_weight(
          name='moving_mean',
          shape=param_shape,
          dtype=param_dtype,
          initializer=self.moving_mean_initializer,
          synchronization=tf_variables.VariableSynchronization.ON_READ,
          trainable=False,
          aggregation=tf_variables.VariableAggregation.MEAN)

      self.moving_variance = self.add_weight(
          name='moving_variance',
          shape=param_shape,
          dtype=param_dtype,
          initializer=self.moving_variance_initializer,
          synchronization=tf_variables.VariableSynchronization.ON_READ,
          trainable=False,
          aggregation=tf_variables.VariableAggregation.MEAN)

      if self.renorm:
        # Create variables to maintain the moving mean and standard deviation.
        # These are used in training and thus are different from the moving
        # averages above. The renorm variables are colocated with moving_mean
        # and moving_variance.
        # NOTE: below, the outer `with device` block causes the current device
        # stack to be cleared. The nested ones use a `lambda` to set the desired
        # device and ignore any devices that may be set by the custom getter.
        def _renorm_variable(name, shape):
          var = self.add_weight(
              name=name,
              shape=shape,
              dtype=param_dtype,
              initializer=init_ops.zeros_initializer(),
              synchronization=tf_variables.VariableSynchronization.ON_READ,
              trainable=False,
              aggregation=tf_variables.VariableAggregation.MEAN)
          return var

        with distribution_strategy_context.get_distribution_strategy(
        ).colocate_vars_with(self.moving_mean):
          self.renorm_mean = _renorm_variable('renorm_mean', param_shape)
          self.renorm_mean_weight = _renorm_variable('renorm_mean_weight', ())
        # We initialize renorm_stddev to 0, and maintain the (0-initialized)
        # renorm_stddev_weight. This allows us to (1) mix the average
        # stddev with the minibatch stddev early in training, and (2) compute
        # the unbiased average stddev by dividing renorm_stddev by the weight.
        with distribution_strategy_context.get_distribution_strategy(
        ).colocate_vars_with(self.moving_variance):
          self.renorm_stddev = _renorm_variable('renorm_stddev', param_shape)
          self.renorm_stddev_weight = _renorm_variable('renorm_stddev_weight',
                                                       ())
    finally:
      if partitioner:
        self._scope.set_partitioner(partitioner)
    self.built = True
예제 #33
0
 def run(self, inp):
   # Here the keys are not ordered lexicographically on purpose.
   return {
       "output_b": array_ops.constant(1.0),
       "output_a": inp + inp * inp
   }
    def testGenerateFeatureSplitCandidatesMulticlass(self):
        with self.cached_session() as sess:
            # Batch size is 4, 2 gradients per each instance.
            gradients = array_ops.constant(
                [[0.2, 0.1], [-0.5, 0.2], [1.2, 3.4], [4.0, -3.5]],
                shape=[4, 2])
            # 2x2 matrix for each instance
            hessian_0 = [[0.12, 0.02], [0.3, 0.11]]
            hessian_1 = [[0.07, -0.2], [-0.5, 0.2]]
            hessian_2 = [[0.2, -0.23], [-0.8, 0.9]]
            hessian_3 = [[0.13, -0.3], [-1.5, 2.2]]
            hessians = array_ops.constant(
                [hessian_0, hessian_1, hessian_2, hessian_3])

            partition_ids = [0, 0, 0, 1]
            indices = [[0, 0], [0, 1], [2, 0], [3, 0]]
            values = array_ops.constant([1, 2, 2, 1], dtype=dtypes.int64)

            hessians = array_ops.constant(
                [hessian_0, hessian_1, hessian_2, hessian_3])
            partition_ids = array_ops.constant([0, 0, 0, 1],
                                               dtype=dtypes.int32)

            gradient_shape = tensor_shape.TensorShape([2])
            hessian_shape = tensor_shape.TensorShape([2, 2])
            class_id = -1

            split_handler = categorical_split_handler.EqualitySplitHandler(
                l1_regularization=0.1,
                l2_regularization=1,
                tree_complexity_regularization=0,
                min_node_weight=0,
                sparse_int_column=sparse_tensor.SparseTensor(
                    indices, values, [4, 1]),
                feature_column_group_id=0,
                gradient_shape=gradient_shape,
                hessian_shape=hessian_shape,
                multiclass_strategy=learner_pb2.LearnerConfig.FULL_HESSIAN,
                init_stamp_token=0)
            resources.initialize_resources(resources.shared_resources()).run()

            empty_gradients, empty_hessians = get_empty_tensors(
                gradient_shape, hessian_shape)
            example_weights = array_ops.ones([4, 1], dtypes.float32)

            update_1 = split_handler.update_stats_sync(
                0,
                partition_ids,
                gradients,
                hessians,
                empty_gradients,
                empty_hessians,
                example_weights,
                is_active=array_ops.constant([True, True]))
            with ops.control_dependencies([update_1]):
                are_splits_ready, partitions, gains, splits = (
                    split_handler.make_splits(0, 1, class_id))
                are_splits_ready, partitions, gains, splits = (sess.run(
                    [are_splits_ready, partitions, gains, splits]))
        self.assertTrue(are_splits_ready)
        self.assertAllEqual([0, 1], partitions)

        split_info = split_info_pb2.SplitInfo()
        split_info.ParseFromString(splits[0])

        left_child = split_info.left_child.vector
        right_child = split_info.right_child.vector
        split_node = split_info.split_node.categorical_id_binary_split
        # Each leaf has 2 element vector.
        self.assertEqual(2, len(left_child.value))
        self.assertEqual(2, len(right_child.value))
        self.assertEqual(1, split_node.feature_id)

        split_info.ParseFromString(splits[1])
        left_child = split_info.left_child.vector
        right_child = split_info.right_child.vector
        split_node = split_info.split_node.categorical_id_binary_split
        self.assertEqual(2, len(left_child.value))
        self.assertEqual(0, len(right_child.value))
        self.assertEqual(1, split_node.feature_id)
예제 #35
0
def kernel_classifier_distance_and_std_from_activations(
        real_activations,
        generated_activations,
        max_block_size=10,
        dtype=None):
    """Kernel "classifier" distance for evaluating a generative model.

    This methods computes the kernel classifier distance from activations of
    real images and generated images. This can be used independently of the
    kernel_classifier_distance() method, especially in the case of using large
    batches during evaluation where we would like to precompute all of the
    activations before computing the classifier distance, or if we want to
    compute multiple metrics based on the same images. It also returns a rough
    estimate of the standard error of the estimator.

    This technique is described in detail in https://arxiv.org/abs/1801.01401.
    Given two distributions P and Q of activations, this function calculates

        E_{X, X' ~ P}[k(X, X')] + E_{Y, Y' ~ Q}[k(Y, Y')]
          - 2 E_{X ~ P, Y ~ Q}[k(X, Y)]

    where k is the polynomial kernel

        k(x, y) = ( x^T y / dimension + 1 )^3.

    This captures how different the distributions of real and generated images'
    visual features are. Like the Frechet distance (and unlike the Inception
    score), this is a true distance and incorporates information about the
    target images. Unlike the Frechet score, this function computes an
    *unbiased* and asymptotically normal estimator, which makes comparing
    estimates across models much more intuitive.

    The estimator used takes time quadratic in max_block_size. Larger values of
    max_block_size will decrease the variance of the estimator but increase the
    computational cost. This differs slightly from the estimator used by the
    original paper; it is the block estimator of https://arxiv.org/abs/1307.1954.
    The estimate of the standard error will also be more reliable when there are
    more blocks, i.e. when max_block_size is smaller.

    NOTE: the blocking code assumes that real_activations and
    generated_activations are both in random order. If either is sorted in a
    meaningful order, the estimator will behave poorly.

    Args:
      real_activations: 2D Tensor containing activations of real data. Shape is
        [batch_size, activation_size].
      generated_activations: 2D Tensor containing activations of generated data.
        Shape is [batch_size, activation_size].
      max_block_size: integer, default 1024. The distance estimator splits samples
        into blocks for computational efficiency. Larger values are more
        computationally expensive but decrease the variance of the distance
        estimate. Having a smaller block size also gives a better estimate of the
        standard error.
      dtype: if not None, coerce activations to this dtype before computations.

    Returns:
     The Kernel Inception Distance. A floating-point scalar of the same type
       as the output of the activations.
     An estimate of the standard error of the distance estimator (a scalar of
       the same type).
    """

    real_activations.shape.assert_has_rank(2)
    generated_activations.shape.assert_has_rank(2)
    real_activations.shape[1].assert_is_compatible_with(
        generated_activations.shape[1])

    if dtype is None:
        dtype = real_activations.dtype
        assert generated_activations.dtype == dtype
    else:
        real_activations = math_ops.cast(real_activations, dtype)
        generated_activations = math_ops.cast(generated_activations, dtype)

    # Figure out how to split the activations into blocks of approximately
    # equal size, with none larger than max_block_size.
    n_r = array_ops.shape(real_activations)[0]
    n_g = array_ops.shape(generated_activations)[0]

    n_bigger = math_ops.maximum(n_r, n_g)
    n_blocks = math_ops.to_int32(math_ops.ceil(n_bigger / max_block_size))

    v_r = n_r // n_blocks
    v_g = n_g // n_blocks

    n_plusone_r = n_r - v_r * n_blocks
    n_plusone_g = n_g - v_g * n_blocks

    sizes_r = array_ops.concat([
        array_ops.fill([n_blocks - n_plusone_r], v_r),
        array_ops.fill([n_plusone_r], v_r + 1),
    ], 0)
    sizes_g = array_ops.concat([
        array_ops.fill([n_blocks - n_plusone_g], v_g),
        array_ops.fill([n_plusone_g], v_g + 1),
    ], 0)

    zero = array_ops.zeros([1], dtype=dtypes.int32)
    inds_r = array_ops.concat([zero, math_ops.cumsum(sizes_r)], 0)
    inds_g = array_ops.concat([zero, math_ops.cumsum(sizes_g)], 0)

    dim = math_ops.cast(tf.shape(real_activations)[1], dtype)

    def compute_kid_block(i):
        'Compute the ith block of the KID estimate.'
        r_s = inds_r[i]
        r_e = inds_r[i + 1]
        r = real_activations[r_s:r_e]
        m = math_ops.cast(r_e - r_s, dtype)

        g_s = inds_g[i]
        g_e = inds_g[i + 1]
        g = generated_activations[g_s:g_e]
        n = math_ops.cast(g_e - g_s, dtype)

        k_rr = (math_ops.matmul(r, r, transpose_b=True) / dim + 1)**3
        k_rg = (math_ops.matmul(r, g, transpose_b=True) / dim + 1)**3
        k_gg = (math_ops.matmul(g, g, transpose_b=True) / dim + 1)**3
        return (-2 * math_ops.reduce_mean(k_rg) +
                (math_ops.reduce_sum(k_rr) - math_ops.trace(k_rr)) /
                (m * (m - 1)) +
                (math_ops.reduce_sum(k_gg) - math_ops.trace(k_gg)) / (n *
                                                                      (n - 1)))

    ests = functional_ops.map_fn(compute_kid_block,
                                 math_ops.range(n_blocks),
                                 dtype=dtype,
                                 back_prop=False)

    mn = math_ops.reduce_mean(ests)

    # nn_impl.moments doesn't use the Bessel correction, which we want here
    n_blocks_ = math_ops.cast(n_blocks, dtype)
    var = control_flow_ops.cond(
        math_ops.less_equal(n_blocks, 1),
        lambda: array_ops.constant(float('nan'), dtype=dtype),
        lambda: math_ops.reduce_sum(math_ops.square(ests - mn)) /
        (n_blocks_ - 1))

    return mn, math_ops.sqrt(var / n_blocks_)
예제 #36
0
 def _event_shape_tensor(self):
     return array_ops.constant([], dtype=dtypes.int32)
예제 #37
0
def update_confusion_matrix_variables(variables_to_update,
                                      y_true,
                                      y_pred,
                                      thresholds,
                                      top_k=None,
                                      class_id=None,
                                      sample_weight=None):
  """Returns op to update the given confusion matrix variables.

  For every pair of values in y_true and y_pred:

  true_positive: y_true == True and y_pred > thresholds
  false_negatives: y_true == True and y_pred <= thresholds
  true_negatives: y_true == False and y_pred <= thresholds
  false_positive: y_true == False and y_pred > thresholds

  The results will be weighted and added together. When multiple thresholds are
  provided, we will repeat the same for every threshold.

  For estimation of these metrics over a stream of data, the function creates an
  `update_op` operation that updates the given variables.

  If `sample_weight` is `None`, weights default to 1.
  Use weights of 0 to mask values.

  Args:
    variables_to_update: Dictionary with 'tp', 'fn', 'tn', 'fp' as valid keys
      and corresponding variables to update as values.
    y_true: A `Tensor` whose shape matches `y_pred`. Will be cast to `bool`.
    y_pred: A floating point `Tensor` of arbitrary shape and whose values are in
      the range `[0, 1]`.
    thresholds: A float value or a python list or tuple of float thresholds in
      `[0, 1]`, or NEG_INF (used when top_k is set).
    top_k: Optional int, indicates that the positive labels should be limited to
      the top k predictions.
    class_id: Optional int, limits the prediction and labels to the class
      specified by this argument.
    sample_weight: Optional `Tensor` whose rank is either 0, or the same rank as
      `y_true`, and must be broadcastable to `y_true` (i.e., all dimensions must
      be either `1`, or the same as the corresponding `y_true` dimension).

  Returns:
    Update op.

  Raises:
    ValueError: If `y_pred` and `y_true` have mismatched shapes, or if
      `sample_weight` is not `None` and its shape doesn't match `y_pred`, or if
      `variables_to_update` contains invalid keys.
  """
  if variables_to_update is None:
    return
  y_true = math_ops.cast(y_true, dtype=dtypes.float32)
  y_pred = math_ops.cast(y_pred, dtype=dtypes.float32)
  [y_pred,
   y_true], _ = ragged_assert_compatible_and_get_flat_values([y_pred, y_true],
                                                             sample_weight)
  y_pred.shape.assert_is_compatible_with(y_true.shape)

  if not any(
      key for key in variables_to_update if key in list(ConfusionMatrix)):
    raise ValueError(
        'Please provide at least one valid confusion matrix '
        'variable to update. Valid variable key options are: "{}". '
        'Received: "{}"'.format(
            list(ConfusionMatrix), variables_to_update.keys()))

  invalid_keys = [
      key for key in variables_to_update if key not in list(ConfusionMatrix)
  ]
  if invalid_keys:
    raise ValueError(
        'Invalid keys: {}. Valid variable key options are: "{}"'.format(
            invalid_keys, list(ConfusionMatrix)))

  with ops.control_dependencies([
      check_ops.assert_greater_equal(
          y_pred,
          math_ops.cast(0.0, dtype=y_pred.dtype),
          message='predictions must be >= 0'),
      check_ops.assert_less_equal(
          y_pred,
          math_ops.cast(1.0, dtype=y_pred.dtype),
          message='predictions must be <= 1')
  ]):
    if sample_weight is None:
      y_pred, y_true = tf_losses_utils.squeeze_or_expand_dimensions(
          y_pred, y_true)
    else:
      y_pred, y_true, sample_weight = (
          tf_losses_utils.squeeze_or_expand_dimensions(
              y_pred, y_true, sample_weight=sample_weight))

  if top_k is not None:
    y_pred = _filter_top_k(y_pred, top_k)
  if class_id is not None:
    y_true = y_true[..., class_id]
    y_pred = y_pred[..., class_id]

  thresholds = to_list(thresholds)
  num_thresholds = len(thresholds)
  num_predictions = array_ops.size(y_pred)

  # Reshape predictions and labels.
  predictions_2d = array_ops.reshape(y_pred, [1, -1])
  labels_2d = array_ops.reshape(
      math_ops.cast(y_true, dtype=dtypes.bool), [1, -1])

  # Tile the thresholds for every prediction.
  thresh_tiled = array_ops.tile(
      array_ops.expand_dims(array_ops.constant(thresholds), 1),
      array_ops.stack([1, num_predictions]))

  # Tile the predictions for every threshold.
  preds_tiled = array_ops.tile(predictions_2d, [num_thresholds, 1])

  # Compare predictions and threshold.
  pred_is_pos = math_ops.greater(preds_tiled, thresh_tiled)

  # Tile labels by number of thresholds
  label_is_pos = array_ops.tile(labels_2d, [num_thresholds, 1])

  if sample_weight is not None:
    weights = weights_broadcast_ops.broadcast_weights(
        math_ops.cast(sample_weight, dtype=dtypes.float32), y_pred)
    weights_tiled = array_ops.tile(
        array_ops.reshape(weights, [1, -1]), [num_thresholds, 1])
  else:
    weights_tiled = None

  update_ops = []

  def weighted_assign_add(label, pred, weights, var):
    label_and_pred = math_ops.cast(
        math_ops.logical_and(label, pred), dtype=dtypes.float32)
    if weights is not None:
      label_and_pred *= weights
    return var.assign_add(math_ops.reduce_sum(label_and_pred, 1))

  loop_vars = {
      ConfusionMatrix.TRUE_POSITIVES: (label_is_pos, pred_is_pos),
  }
  update_tn = ConfusionMatrix.TRUE_NEGATIVES in variables_to_update
  update_fp = ConfusionMatrix.FALSE_POSITIVES in variables_to_update
  update_fn = ConfusionMatrix.FALSE_NEGATIVES in variables_to_update

  if update_fn or update_tn:
    pred_is_neg = math_ops.logical_not(pred_is_pos)
    loop_vars[ConfusionMatrix.FALSE_NEGATIVES] = (label_is_pos, pred_is_neg)

  if update_fp or update_tn:
    label_is_neg = math_ops.logical_not(label_is_pos)
    loop_vars[ConfusionMatrix.FALSE_POSITIVES] = (label_is_neg, pred_is_pos)
    if update_tn:
      loop_vars[ConfusionMatrix.TRUE_NEGATIVES] = (label_is_neg, pred_is_neg)

  for matrix_cond, (label, pred) in loop_vars.items():
    if matrix_cond in variables_to_update:
      update_ops.append(
          weighted_assign_add(label, pred, weights_tiled,
                              variables_to_update[matrix_cond]))
  return control_flow_ops.group(update_ops)
예제 #38
0
 def _conv_layer_params(self):
     weights_shape = 2, 2, 3, 4
     biases_shape = weights_shape[-1:]
     weights = array_ops.constant(npr.RandomState(0).randn(*weights_shape))
     biases = array_ops.constant(npr.RandomState(1).randn(*biases_shape))
     return (weights, biases)
예제 #39
0
 def _fully_connected_layer_params(self):
     weights_part = array_ops.constant([[1., 2.], [4., 3.]])
     bias_part = array_ops.constant([1., 2.])
     return (weights_part, bias_part)
    def testObliviousFeatureSplitGeneration(self):
        with self.test_session() as sess:
            # The data looks like the following:
            # Example |  Gradients    | Partition | Feature ID     |
            # i0      |  (0.2, 0.12)  | 1         | 1              |
            # i1      |  (-0.5, 0.07) | 1         | 2              |
            # i2      |  (1.2, 0.2)   | 1         | 1              |
            # i3      |  (4.0, 0.13)  | 2         | 2              |
            gradients = array_ops.constant([0.2, -0.5, 1.2, 4.0])
            hessians = array_ops.constant([0.12, 0.07, 0.2, 0.13])
            partition_ids = [1, 1, 1, 2]
            indices = [[0, 0], [1, 0], [2, 0], [3, 0]]
            values = array_ops.constant([1, 2, 1, 2], dtype=dtypes.int64)

            gradient_shape = tensor_shape.scalar()
            hessian_shape = tensor_shape.scalar()
            class_id = -1

            split_handler = categorical_split_handler.EqualitySplitHandler(
                l1_regularization=0.1,
                l2_regularization=1,
                tree_complexity_regularization=0,
                min_node_weight=0,
                sparse_int_column=sparse_tensor.SparseTensor(
                    indices, values, [4, 1]),
                feature_column_group_id=0,
                gradient_shape=gradient_shape,
                hessian_shape=hessian_shape,
                multiclass_strategy=learner_pb2.LearnerConfig.TREE_PER_CLASS,
                init_stamp_token=0,
                weak_learner_type=learner_pb2.LearnerConfig.
                OBLIVIOUS_DECISION_TREE)
            resources.initialize_resources(resources.shared_resources()).run()

            empty_gradients, empty_hessians = get_empty_tensors(
                gradient_shape, hessian_shape)
            example_weights = array_ops.ones([4, 1], dtypes.float32)

            update_1 = split_handler.update_stats_sync(
                0,
                partition_ids,
                gradients,
                hessians,
                empty_gradients,
                empty_hessians,
                example_weights,
                is_active=array_ops.constant([True, True]))
            update_2 = split_handler.update_stats_sync(
                0,
                partition_ids,
                gradients,
                hessians,
                empty_gradients,
                empty_hessians,
                example_weights,
                is_active=array_ops.constant([True, True]))

            with ops.control_dependencies([update_1, update_2]):
                are_splits_ready, partitions, gains, splits = (
                    split_handler.make_splits(0, 1, class_id))
                are_splits_ready, partitions, gains, splits = (sess.run(
                    [are_splits_ready, partitions, gains, splits]))
        self.assertTrue(are_splits_ready)
        self.assertAllEqual([1, 2], partitions)

        # For partition 1.
        # -(0.2 + 1.2 - 0.1) / (0.12 + 0.2 + 1)
        expected_left_weight1 = -0.9848484848484846
        # (0.2 + 1.2 - 0.1) ** 2 / (0.12 + 0.2 + 1)
        expected_left_gain1 = 1.2803030303030298

        # -(-0.5 + 0.1) / (0.07 + 1)
        expected_right_weight1 = 0.37383177570093457

        # (-0.5 + 0.1) ** 2 / (0.07 + 1)
        expected_right_gain1 = 0.14953271028037385

        # (0.2 + -0.5 + 1.2 - 0.1) ** 2 / (0.12 + 0.07 + 0.2 + 1)
        expected_bias_gain1 = 0.46043165467625885

        split_info = split_info_pb2.ObliviousSplitInfo()
        split_info.ParseFromString(splits[0])
        # Children of partition 1.
        left_child = split_info.children[0].vector
        right_child = split_info.children[1].vector
        split_node = split_info.split_node.oblivious_categorical_id_binary_split

        self.assertEqual(0, split_node.feature_column)
        self.assertEqual(1, split_node.feature_id)
        self.assertAllClose([expected_left_weight1], left_child.value, 0.00001)
        self.assertAllClose([expected_right_weight1], right_child.value,
                            0.00001)

        # For partition2.
        expected_left_weight2 = 0
        expected_left_gain2 = 0
        # -(4 - 0.1) / (0.13 + 1)
        expected_right_weight2 = -3.4513274336283186
        # (4 - 0.1) ** 2 / (0.13 + 1)
        expected_right_gain2 = 13.460176991150442
        # (4 - 0.1) ** 2 / (0.13 + 1)
        expected_bias_gain2 = 13.460176991150442

        # Children of partition 2.
        left_child = split_info.children[2].vector
        right_child = split_info.children[3].vector
        self.assertAllClose([expected_left_weight2], left_child.value, 0.00001)
        self.assertAllClose([expected_right_weight2], right_child.value,
                            0.00001)

        self.assertAllClose(
            expected_left_gain1 + expected_right_gain1 - expected_bias_gain1 +
            expected_left_gain2 + expected_right_gain2 - expected_bias_gain2,
            gains[0], 0.00001)
    def testGenerateFeatureSplitCandidatesSumReduction(self):
        with self.cached_session() as sess:
            # The data looks like the following:
            # Example |  Gradients    | Partition | Feature ID     |
            # i0      |  (0.2, 0.12)  | 0         | 1,2            |
            # i1      |  (-0.5, 0.07) | 0         |                |
            # i2      |  (1.2, 0.2)   | 0         | 2              |
            # i3      |  (4.0, 0.13)  | 1         | 1              |
            gradients = array_ops.constant([0.2, -0.5, 1.2, 4.0])
            hessians = array_ops.constant([0.12, 0.07, 0.2, 0.13])
            partition_ids = [0, 0, 0, 1]
            indices = [[0, 0], [0, 1], [2, 0], [3, 0]]
            values = array_ops.constant([1, 2, 2, 1], dtype=dtypes.int64)

            gradient_shape = tensor_shape.scalar()
            hessian_shape = tensor_shape.scalar()
            class_id = -1

            split_handler = categorical_split_handler.EqualitySplitHandler(
                l1_regularization=0.1,
                l2_regularization=1,
                tree_complexity_regularization=0,
                min_node_weight=0,
                sparse_int_column=sparse_tensor.SparseTensor(
                    indices, values, [4, 1]),
                feature_column_group_id=0,
                gradient_shape=gradient_shape,
                hessian_shape=hessian_shape,
                multiclass_strategy=learner_pb2.LearnerConfig.TREE_PER_CLASS,
                init_stamp_token=0,
                loss_uses_sum_reduction=True)
            resources.initialize_resources(resources.shared_resources()).run()

            empty_gradients, empty_hessians = get_empty_tensors(
                gradient_shape, hessian_shape)
            example_weights = array_ops.ones([4, 1], dtypes.float32)

            update_1 = split_handler.update_stats_sync(
                0,
                partition_ids,
                gradients,
                hessians,
                empty_gradients,
                empty_hessians,
                example_weights,
                is_active=array_ops.constant([True, True]))
            update_2 = split_handler.update_stats_sync(
                0,
                partition_ids,
                gradients,
                hessians,
                empty_gradients,
                empty_hessians,
                example_weights,
                is_active=array_ops.constant([True, True]))
            with ops.control_dependencies([update_1, update_2]):
                are_splits_ready, partitions, gains, splits = (
                    split_handler.make_splits(0, 1, class_id))
                are_splits_ready, partitions, gains, splits = (sess.run(
                    [are_splits_ready, partitions, gains, splits]))
        self.assertTrue(are_splits_ready)
        self.assertAllEqual([0, 1], partitions)

        # Check the split on partition 0.
        # -(0.4 + 2.4 - 0.1) / (0.24 + 0.4 + 1)
        expected_left_weight = -1.6463414634146338

        # (0.4 + 2.4 - 0.1) ** 2 / (0.24 + 0.4 + 1)
        expected_left_gain = 4.445121951219511

        # -(-1 + 0.1) / (0.14 + 1)
        expected_right_weight = 0.789473684211

        # (-1 + 0.1) ** 2 / (0.14 + 1)
        expected_right_gain = 0.710526315789

        # (0.4 + -1 + 2.4 - 0.1) ** 2 / (0.24 + 0.14 + 0.4 + 1)
        expected_bias_gain = 1.6235955056179772

        split_info = split_info_pb2.SplitInfo()
        split_info.ParseFromString(splits[0])
        left_child = split_info.left_child.vector
        right_child = split_info.right_child.vector
        split_node = split_info.split_node.categorical_id_binary_split

        self.assertEqual(0, split_node.feature_column)

        self.assertEqual(2, split_node.feature_id)

        self.assertAllClose(
            expected_left_gain + expected_right_gain - expected_bias_gain,
            gains[0], 0.00001)

        self.assertAllClose([expected_left_weight], left_child.value, 0.00001)

        self.assertAllClose([expected_right_weight], right_child.value,
                            0.00001)

        # Check the split on partition 1.
        # (-8 + 0.1) / (0.26 + 1)
        expected_left_weight = -6.26984126984
        # (-8 + 0.1) ** 2 / (0.26 + 1)
        expected_left_gain = 49.5317460317
        expected_right_weight = 0
        expected_right_gain = 0
        # (-8 + 0.1) ** 2 / (0.26 + 1)
        expected_bias_gain = 49.5317460317

        # Verify candidate for partition 1, there's only one active feature here
        # so zero gain is expected.
        split_info = split_info_pb2.SplitInfo()
        split_info.ParseFromString(splits[1])
        left_child = split_info.left_child.vector
        right_child = split_info.right_child.vector
        split_node = split_info.split_node.categorical_id_binary_split
        self.assertAllClose(0.0, gains[1], 0.00001)

        self.assertAllClose([expected_left_weight], left_child.value, 0.00001)

        self.assertAllClose([expected_right_weight], right_child.value,
                            0.00001)

        self.assertEqual(0, split_node.feature_column)

        self.assertEqual(1, split_node.feature_id)
예제 #42
0
 def make_sparse(_):
     return sparse_tensor.SparseTensor(
         indices=array_ops.constant([[0, 0], [1, 0], [1, 1]],
                                    dtype=dtypes.int64),
         values=array_ops.constant([0, 0, 1], dtype=dtypes.int32),
         dense_shape=array_ops.constant([2, 2], dtype=dtypes.int64))
예제 #43
0
 def run(self):
   return array_ops.constant(1.0)
예제 #44
0
def load_keypoints_from_quantiles(feature_names,
                                  save_dir,
                                  num_keypoints,
                                  output_min,
                                  output_max,
                                  reversed_dict=None,
                                  dtype=dtypes.float32):
    """Retrieves keypoints initialization values for selected features.

  It expects that the quantiles have already been calculated and saved in the
  save_dir by the save_quantiles_for_keypoints function. It will raise
  an I/O error if not.

  Args:
    feature_names: List of features names for which to get keypoints
      initialization values.
    save_dir: Directory where the quantiles have been saved to. Same value used
      when save_quantiles_for_keypoints was called.
    num_keypoints: Desired number of keypoints to use for calibration. This
      can either be a scalar to be used for all features, or a dict mapping
      feature name to num_keypoints. Fewer keypoints than requested can end
      up being used when for the given feature there are not enough different
      values. If num_keypoints for a feature is missing, None or 0, no
      initialization is generated.
    output_min: Initial calibrated value associated with the first calibration
      keypoint. The keypoints outputs in between will be linearly interpolated.
      It can be given as a scalar, in which case value is used for all features,
      or a dict mapping feature name to output_min.
    output_max: Like output_min, but the calibrated value associated to the
      last keypoint. Scalar or dict.
    reversed_dict: An optional dict. If reversed_dict[feature_name] is True,
      then the initial output keypoints will be in reversed order for that
      feature, i.e., input_min will be mapped to output_max, and input_max will
      be mapped to output_min. Reversing output keypoints is useful for
      decreasing monotonic calibrators.
    dtype: Type to be used for calibration.

  Returns:
    Dict of feature name to pair of constant tensors that can be used to
    initialize calibrators keypoints inputs and outputs.

  Raises:
    tf.errors.NotFoundError: if quantiles file not found.


    values in the signal. This would probably be better handled as categorical,
    but still this should handle the case correctly.
  """
    subdir = os.path.join(save_dir, _QUANTILES_SUBDIRECTORY)
    num_keypoints = tools.cast_to_dict(num_keypoints, feature_names,
                                       num_keypoints)
    output_min = tools.cast_to_dict_of_tensor_scalars(output_min,
                                                      feature_names, dtype,
                                                      "output_min")
    output_max = tools.cast_to_dict_of_tensor_scalars(output_max,
                                                      feature_names, dtype,
                                                      "output_max")

    keypoints = {}
    for feature_name in feature_names:
        if feature_name not in num_keypoints or not num_keypoints[feature_name]:
            continue
        all_quantiles = _load_quantiles(subdir, feature_name)
        percentiles = np.linspace(0., 100., num_keypoints[feature_name])
        quantiles = np.percentile(all_quantiles,
                                  percentiles,
                                  interpolation="nearest")
        quantiles = sorted(set(quantiles))  # Remove repeated quantiles.
        input_kpts = array_ops.constant(quantiles,
                                        shape=[len(quantiles)],
                                        dtype=dtype)
        output_kpts = math_ops.linspace(output_min[feature_name],
                                        output_max[feature_name],
                                        len(quantiles))
        if reversed_dict is not None and reversed_dict[feature_name]:
            output_kpts = array_ops.reverse(output_kpts, axis=[0])
        keypoints[feature_name] = (input_kpts, output_kpts)
    return keypoints
예제 #45
0
    def patches_to_images(self, grad, batch_size, rows_in, cols_in, channels,
                          rows_out, cols_out, ksize_r, ksize_c, stride_h,
                          stride_r):
        rate_r = 1
        rate_c = 1
        padding = self.pad

        ksize_r_eff = ksize_r + (ksize_r - 1) * (rate_r - 1)
        ksize_c_eff = ksize_c + (ksize_c - 1) * (rate_c - 1)

        if padding == 'SAME':
            rows_out = int(ceil(rows_in / stride_r))
            cols_out = int(ceil(cols_in / stride_h))
            pad_rows = ((rows_out - 1) * stride_r + ksize_r_eff - rows_in) // 2
            pad_cols = ((cols_out - 1) * stride_h + ksize_c_eff - cols_in) // 2

        elif padding == 'VALID':
            rows_out = int(ceil((rows_in - ksize_r_eff + 1) / stride_r))
            cols_out = int(ceil((cols_in - ksize_c_eff + 1) / stride_h))
            pad_rows = (rows_out - 1) * stride_r + ksize_r_eff - rows_in
            pad_cols = (cols_out - 1) * stride_h + ksize_c_eff - cols_in

        pad_rows, pad_cols = max(0, pad_rows), max(0, pad_cols)

        grad_expanded = array_ops.transpose(
            array_ops.reshape(
                grad,
                (batch_size, rows_out, cols_out, ksize_r, ksize_c, channels)),
            (1, 2, 3, 4, 0, 5))
        grad_flat = array_ops.reshape(grad_expanded,
                                      (-1, batch_size * channels))

        row_steps = range(0, rows_out * stride_r, stride_r)
        col_steps = range(0, cols_out * stride_h, stride_h)

        idx = []
        for i in range(rows_out):
            for j in range(cols_out):
                r_low, c_low = row_steps[i] - pad_rows, col_steps[j] - pad_cols
                r_high, c_high = r_low + ksize_r_eff, c_low + ksize_c_eff

                idx.extend([
                    (r * (cols_in) + c, i * (cols_out * ksize_r * ksize_c) +
                     j * (ksize_r * ksize_c) + ri * (ksize_c) + ci)
                    for (ri, r) in enumerate(range(r_low, r_high, rate_r))
                    for (ci, c) in enumerate(range(c_low, c_high, rate_c))
                    if 0 <= r and r < rows_in and 0 <= c and c < cols_in
                ])

        sp_shape = (rows_in * cols_in, rows_out * cols_out * ksize_r * ksize_c)

        sp_mat = sparse_tensor.SparseTensor(
            array_ops.constant(idx, dtype=ops.dtypes.int64),
            array_ops.ones((len(idx), ), dtype=ops.dtypes.float32), sp_shape)

        jac = sparse_ops.sparse_tensor_dense_matmul(sp_mat, grad_flat)

        grad_out = array_ops.reshape(jac,
                                     (rows_in, cols_in, batch_size, channels))
        grad_out = array_ops.transpose(grad_out, (2, 0, 1, 3))

        return grad_out
예제 #46
0
def _make_psd(dim):
    """Constructs a PSD matrix of the given dimension."""
    mat = np.ones((dim, dim), dtype=np.float32)
    mat[np.arange(dim), np.arange(dim)] = 2. + np.arange(dim)
    return array_ops.constant(mat)
 def testComplexDiv(self):
     foo = array_ops.constant([1. + 3.j])
     with self.test_session():
         _ = math_ops.divide(foo, 1.).eval()
         _ = math_ops.div(foo, 2.).eval()
def _ExtractImagePatchesGrad(op, grad):

    batch_size, rows_in, cols_in, channels = [
        dim.value for dim in op.inputs[0].get_shape()
    ]
    input_bhwc = array_ops.shape(op.inputs[0])
    batch_size = input_bhwc[0]
    channels = input_bhwc[3]

    _, rows_out, cols_out, _ = [dim.value for dim in op.outputs[0].get_shape()]
    _, ksize_r, ksize_c, _ = op.get_attr('ksizes')
    _, stride_r, stride_h, _ = op.get_attr('strides')
    _, rate_r, rate_c, _ = op.get_attr('rates')
    padding = op.get_attr('padding')

    ksize_r_eff = ksize_r + (ksize_r - 1) * (rate_r - 1)
    ksize_c_eff = ksize_c + (ksize_c - 1) * (rate_c - 1)

    if padding == b'SAME':
        rows_out = int(ceil(rows_in / stride_r))
        cols_out = int(ceil(cols_in / stride_h))
        pad_rows = ((rows_out - 1) * stride_r + ksize_r_eff - rows_in) // 2
        pad_cols = ((cols_out - 1) * stride_h + ksize_c_eff - cols_in) // 2

    elif padding == b'VALID':
        rows_out = int(ceil((rows_in - ksize_r_eff + 1) / stride_r))
        cols_out = int(ceil((cols_in - ksize_c_eff + 1) / stride_h))
        pad_rows = (rows_out - 1) * stride_r + ksize_r_eff - rows_in
        pad_cols = (cols_out - 1) * stride_h + ksize_c_eff - cols_in

    pad_rows, pad_cols = max(0, pad_rows), max(0, pad_cols)

    grad_expanded = array_ops.transpose(
        array_ops.reshape(
            grad,
            (batch_size, rows_out, cols_out, ksize_r, ksize_c, channels)),
        (1, 2, 3, 4, 0, 5))
    grad_flat = array_ops.reshape(grad_expanded, (-1, batch_size * channels))

    row_steps = range(0, rows_out * stride_r, stride_r)
    col_steps = range(0, cols_out * stride_h, stride_h)

    idx = []
    for i in range(rows_out):
        for j in range(cols_out):
            r_low, c_low = row_steps[i] - pad_rows, col_steps[j] - pad_cols
            r_high, c_high = r_low + ksize_r_eff, c_low + ksize_c_eff

            idx.extend([
                (r * (cols_in) + c, i * (cols_out * ksize_r * ksize_c) + j *
                 (ksize_r * ksize_c) + ri * (ksize_c) + ci)
                for (ri, r) in enumerate(range(r_low, r_high, rate_r))
                for (ci, c) in enumerate(range(c_low, c_high, rate_c))
                if 0 <= r and r < rows_in and 0 <= c and c < cols_in
            ])

    sp_shape = (rows_in * cols_in, rows_out * cols_out * ksize_r * ksize_c)

    sp_mat = sparse_tensor.SparseTensor(
        array_ops.constant(idx, dtype=ops.dtypes.int64),
        array_ops.ones((len(idx), ), dtype=ops.dtypes.float32), sp_shape)

    jac = sparse_ops.sparse_tensor_dense_matmul(sp_mat, grad_flat)

    grad_out = array_ops.reshape(jac, (rows_in, cols_in, batch_size, channels))
    grad_out = array_ops.transpose(grad_out, (2, 0, 1, 3))

    return [grad_out]
예제 #49
0
 def event_shape(self, name="event_shape"):
     with ops.name_scope(self.name):
         with ops.op_scope([self._batch_shape], name):
             return array_ops.constant([], dtype=self._batch_shape.dtype)
예제 #50
0
 def testDenseToSparseBatchDatasetWithInvalidShape(self):
     input_tensor = array_ops.constant([[1]])
     with self.assertRaisesRegexp(ValueError, "Dimension -2 must be >= 0"):
         dataset_ops.Dataset.from_tensors(input_tensor).apply(
             batching.dense_to_sparse_batch(
                 4, [-2])).make_initializable_iterator()
 def testDivideName(self):
     with self.test_session():
         op = math_ops.divide(array_ops.constant(3),
                              array_ops.constant(4),
                              name="my_cool_divide")
         self.assertEqual(op.name, "my_cool_divide:0")
예제 #52
0
 def _finalize_func(unused_string_handle):
     return array_ops.constant(0, dtypes.int64)
예제 #53
0
 def _serving_input_receiver_fn():
   return array_ops.constant([1]), None
예제 #54
0
def _dynamic_rnn_loop( cell, inputs, initial_state, ff_keep_prob, recur_keep_prob, parallel_iterations, swap_memory, sequence_length=None):
  """Internal implementation of Dynamic RNN.

  Args:
    cell: An instance of RNNCell.
    inputs: A `Tensor` of shape [time, batch_size, depth].
    initial_state: A `Tensor` of shape [batch_size, depth].
    parallel_iterations: Positive Python int.
    swap_memory: A Python boolean
    sequence_length: (optional) An `int32` `Tensor` of shape [batch_size].

  Returns:
    Tuple (final_outputs, final_state).
    final_outputs:
      A `Tensor` of shape [time, batch_size, depth]`.
    final_state:
      A `Tensor` of shape [batch_size, depth].

  Raises:
    ValueError: If the input depth cannot be inferred via shape inference
      from the inputs.
  """
  state = initial_state
  assert isinstance(parallel_iterations, int), "parallel_iterations must be int"

  # Construct an initial output
  input_shape = array_ops.shape(inputs)
  (time_steps, batch_size, _) = array_ops.unpack(input_shape, 3)

  inputs_got_shape = inputs.get_shape().with_rank(3)
  (const_time_steps, const_batch_size, const_depth) = inputs_got_shape.as_list()

  if const_depth is None:
    raise ValueError(
        "Input size (depth of inputs) must be accessible via shape inference, "
        "but saw value None.")

  # Prepare dynamic conditional copying of state & output
  zero_output = array_ops.zeros(
      array_ops.pack([batch_size, cell.output_size]), inputs.dtype)
  if sequence_length is not None:
    min_sequence_length = math_ops.reduce_min(sequence_length)
    max_sequence_length = math_ops.reduce_max(sequence_length)

  time = array_ops.constant(0, dtype=dtypes.int32, name="time")

  with ops.op_scope([], "dynamic_rnn") as scope:
    base_name = scope

  output_ta = tensor_array_ops.TensorArray(
      dtype=inputs.dtype, size=time_steps,
      tensor_array_name=base_name + "output")

  input_ta = tensor_array_ops.TensorArray(
      dtype=inputs.dtype, size=time_steps,
      tensor_array_name=base_name + "input")

  if isinstance(ff_keep_prob, ops.Tensor) or ff_keep_prob < 1:
    inputs = nn_ops.dropout(inputs, ff_keep_prob, noise_shape=array_ops.pack([1, batch_size, const_depth]))
  input_ta = input_ta.unpack(inputs)
  
  if isinstance(recur_keep_prob, ops.Tensor) or recur_keep_prob < 1:
    ones = array_ops.ones(array_ops.pack([batch_size, cell.output_size]), inputs.dtype)
    state_dropout = nn_ops.dropout(ones, recur_keep_prob)
    state_dropout = array_ops.concat(1, [ones] * (cell.state_size // cell.output_size - 1) + [state_dropout])
  else:
    state_dropout = 1.
    
  def _time_step(time, state, output_ta_t):
    """Take a time step of the dynamic RNN.

    Args:
      time: int32 scalar Tensor.
      state: Vector.
      output_ta_t: `TensorArray`, the output with existing flow.

    Returns:
      The tuple (time + 1, new_state, output_ta_t with updated flow).
    """

    input_t = input_ta.read(time)
    # Restore some shape information
    input_t.set_shape([const_batch_size, const_depth])

    call_cell = lambda: cell(input_t, state*state_dropout)

    if sequence_length is not None:
      (output, new_state) = _rnn_step(
          time=time,
          sequence_length=sequence_length,
          min_sequence_length=min_sequence_length,
          max_sequence_length=max_sequence_length,
          zero_output=zero_output,
          state=state,
          call_cell=call_cell,
          skip_conditionals=True)
    else:
      (output, new_state) = call_cell()

    output_ta_t = output_ta_t.write(time, output)

    return (time + 1, new_state, output_ta_t)

  (_, final_state, output_final_ta) = control_flow_ops.while_loop(
      cond=lambda time, _1, _2: time < time_steps,
      body=_time_step,
      loop_vars=(time, state, output_ta),
      parallel_iterations=parallel_iterations,
      swap_memory=swap_memory)

  final_outputs = output_final_ta.pack()
  # Restore some shape information
  final_outputs.set_shape([
      const_time_steps, const_batch_size, cell.output_size])

  return (final_outputs, final_state)
예제 #55
0
def _confusion_matrix_at_thresholds(labels,
                                    predictions,
                                    thresholds,
                                    weights=None):
    with ops.control_dependencies([
            check_ops.assert_greater_equal(
                predictions,
                math_ops.cast(0.0, dtype=predictions.dtype),
                message='predictions must be in [0, 1]'),
            check_ops.assert_less_equal(
                predictions,
                math_ops.cast(1.0, dtype=predictions.dtype),
                message='predictions must be in [0, 1]')
    ]):
        predictions, labels, weights = _remove_squeezable_dimensions(
            predictions=math_ops.to_float(predictions),
            labels=math_ops.cast(labels, dtype=dtypes.bool),
            weights=weights)

    num_thresholds = len(thresholds)

    # Reshape predictions and labels.
    predictions_2d = array_ops.reshape(predictions, [-1, 1])
    labels_2d = array_ops.reshape(math_ops.cast(labels, dtype=dtypes.bool),
                                  [1, -1])

    # Use static shape if known.
    num_predictions = predictions_2d.get_shape().as_list()[0]

    # Otherwise use dynamic shape.
    if num_predictions is None:
        num_predictions = array_ops.shape(predictions_2d)[0]
    thresh_tiled = array_ops.tile(
        array_ops.expand_dims(array_ops.constant(thresholds), [1]),
        array_ops.stack([1, num_predictions]))

    # Tile the predictions after threshold them across different thresholds.
    pred_is_pos = math_ops.greater(
        array_ops.tile(array_ops.transpose(predictions_2d),
                       [num_thresholds, 1]), thresh_tiled)
    pred_is_neg = math_ops.logical_not(pred_is_pos)
    label_is_pos = array_ops.tile(labels_2d, [num_thresholds, 1])
    label_is_neg = math_ops.logical_not(label_is_pos)

    if weights is not None:
        weights = weights_broadcast_ops.broadcast_weights(
            math_ops.to_float(weights), predictions)
        weights_tiled = array_ops.tile(array_ops.reshape(weights, [1, -1]),
                                       [num_thresholds, 1])
        thresh_tiled.get_shape().assert_is_compatible_with(
            weights_tiled.get_shape())
    else:
        weights_tiled = None

    values = {}

    # tp
    is_true_positive = math_ops.to_float(
        math_ops.logical_and(label_is_pos, pred_is_pos))
    if weights_tiled is not None:
        is_true_positive *= weights_tiled
    values['tp'] = math_ops.reduce_sum(is_true_positive, 1)

    # fn
    is_false_negative = math_ops.to_float(
        math_ops.logical_and(label_is_pos, pred_is_neg))
    if weights_tiled is not None:
        is_false_negative *= weights_tiled
    values['fn'] = math_ops.reduce_sum(is_false_negative, 1)

    # tn
    is_true_negative = math_ops.to_float(
        math_ops.logical_and(label_is_neg, pred_is_neg))
    if weights_tiled is not None:
        is_true_negative *= weights_tiled
    values['tn'] = math_ops.reduce_sum(is_true_negative, 1)

    # fp
    is_false_positive = math_ops.to_float(
        math_ops.logical_and(label_is_neg, pred_is_pos))
    if weights_tiled is not None:
        is_false_positive *= weights_tiled
    values['fp'] = math_ops.reduce_sum(is_false_positive, 1)

    return values
    def testLastOneEmpty(self):
        with self.cached_session() as sess:
            # The data looks like the following:
            # Example |  Gradients    | Partition | Feature ID     |
            # i0      |  (0.2, 0.12)  | 0         | 1,2            |
            # i1      |  (-0.5, 0.07) | 0         |                |
            # i2      |  (1.2, 0.2)   | 0         | 2              |
            # i3      |  (4.0, 0.13)  | 1         |                |
            gradients = array_ops.constant([0.2, -0.5, 1.2, 4.0])
            hessians = array_ops.constant([0.12, 0.07, 0.2, 0.13])
            partition_ids = [0, 0, 0, 1]
            indices = [[0, 0], [0, 1], [2, 0]]
            values = array_ops.constant([1, 2, 2], dtype=dtypes.int64)

            gradient_shape = tensor_shape.scalar()
            hessian_shape = tensor_shape.scalar()
            class_id = -1

            split_handler = categorical_split_handler.EqualitySplitHandler(
                l1_regularization=0.1,
                l2_regularization=1,
                tree_complexity_regularization=0,
                min_node_weight=0,
                sparse_int_column=sparse_tensor.SparseTensor(
                    indices, values, [4, 1]),
                feature_column_group_id=0,
                gradient_shape=gradient_shape,
                hessian_shape=hessian_shape,
                multiclass_strategy=learner_pb2.LearnerConfig.TREE_PER_CLASS,
                init_stamp_token=0)
            resources.initialize_resources(resources.shared_resources()).run()

            empty_gradients, empty_hessians = get_empty_tensors(
                gradient_shape, hessian_shape)
            example_weights = array_ops.ones([4, 1], dtypes.float32)

            update_1 = split_handler.update_stats_sync(
                0,
                partition_ids,
                gradients,
                hessians,
                empty_gradients,
                empty_hessians,
                example_weights,
                is_active=array_ops.constant([True, True]))
            with ops.control_dependencies([update_1]):
                are_splits_ready, partitions, gains, splits = (
                    split_handler.make_splits(0, 1, class_id))
                are_splits_ready, partitions, gains, splits = (sess.run(
                    [are_splits_ready, partitions, gains, splits]))
        self.assertTrue(are_splits_ready)
        self.assertAllEqual([0], partitions)

        # Check the split on partition 0.
        # -(0.2 + 1.2 - 0.1) / (0.12 + 0.2 + 1)
        expected_left_weight = -0.9848484848484846

        # (0.2 + 1.2 - 0.1) ** 2 / (0.12 + 0.2 + 1)
        expected_left_gain = 1.2803030303030298

        # -(-0.5 + 0.1) / (0.07 + 1)
        expected_right_weight = 0.37383177570093457

        # (-0.5 + 0.1) ** 2 / (0.07 + 1)
        expected_right_gain = 0.14953271028037385

        # (0.2 + -0.5 + 1.2 - 0.1) ** 2 / (0.12 + 0.07 + 0.2 + 1)
        expected_bias_gain = 0.46043165467625885

        split_info = split_info_pb2.SplitInfo()
        split_info.ParseFromString(splits[0])
        left_child = split_info.left_child.vector
        right_child = split_info.right_child.vector
        split_node = split_info.split_node.categorical_id_binary_split

        self.assertEqual(0, split_node.feature_column)

        self.assertEqual(2, split_node.feature_id)

        self.assertAllClose(
            expected_left_gain + expected_right_gain - expected_bias_gain,
            gains[0], 0.00001)

        self.assertAllClose([expected_left_weight], left_child.value, 0.00001)

        self.assertAllClose([expected_right_weight], right_child.value,
                            0.00001)
예제 #57
0
def _dynamic_rnn_loop(
    cell, inputs, initial_state, parallel_iterations, swap_memory,
    sequence_length=None):
  """Internal implementation of Dynamic RNN.

  Args:
    cell: An instance of RNNCell.
    inputs: A `Tensor` of shape [time, batch_size, input_size].
    initial_state: A `Tensor` of shape `[batch_size, state_size]`, or if
      `cell.state_size` is a tuple, then this should be a tuple of
      tensors having shapes `[batch_size, s] for s in cell.state_size`.
    parallel_iterations: Positive Python int.
    swap_memory: A Python boolean
    sequence_length: (optional) An `int32` `Tensor` of shape [batch_size].

  Returns:
    Tuple `(final_outputs, final_state)`.
    final_outputs:
      A `Tensor` of shape `[time, batch_size, cell.output_size]`.
    final_state:
      A `Tensor` matrix, or tuple of such matrices, matching in length
      and shapes to `initial_state`.

  Raises:
    ValueError: If the input depth cannot be inferred via shape inference
      from the inputs.
  """
  state = initial_state
  assert isinstance(parallel_iterations, int), "parallel_iterations must be int"

  # Construct an initial output
  input_shape = array_ops.shape(inputs)
  time_steps = input_shape[0]
  batch_size = input_shape[1]

  inputs_got_shape = inputs.get_shape().with_rank_at_least(3).as_list()
  const_time_steps = inputs_got_shape[0]
  const_batch_size = inputs_got_shape[1]
  const_depth = inputs_got_shape[2:]

  if const_depth is None:
    raise ValueError(
        "Input size (depth of inputs) must be accessible via shape inference, "
        "but saw value None.")

  # Prepare dynamic conditional copying of state & output
  zeros_size = _state_size_with_prefix(cell.output_size, prefix=[batch_size])
  zero_output = array_ops.zeros(array_ops.pack(zeros_size), inputs.dtype)

  if sequence_length is not None:
    min_sequence_length = math_ops.reduce_min(sequence_length)
    max_sequence_length = math_ops.reduce_max(sequence_length)

  time = array_ops.constant(0, dtype=dtypes.int32, name="time")

  state_size = cell.state_size
  state_is_tuple = nest.is_sequence(state_size)

  state = nest.flatten(state) if state_is_tuple else (state,)

  with ops.op_scope([], "dynamic_rnn") as scope:
    base_name = scope

  output_ta = tensor_array_ops.TensorArray(
      dtype=inputs.dtype, size=time_steps,
      tensor_array_name=base_name + "output")

  input_ta = tensor_array_ops.TensorArray(
      dtype=inputs.dtype, size=time_steps,
      tensor_array_name=base_name + "input")

  input_ta = input_ta.unpack(inputs)

  def _time_step(time, output_ta_t, *state):
    """Take a time step of the dynamic RNN.

    Args:
      time: int32 scalar Tensor.
      output_ta_t: `TensorArray`, the output with existing flow.
      *state: List of vector tensors.

    Returns:
      The tuple (time + 1, output_ta_t with updated flow) + new_state.
    """

    input_t = input_ta.read(time)
    # Restore some shape information
    input_t.set_shape([const_batch_size] + const_depth)

    # Pack state back up for use by cell
    state = (nest.pack_sequence_as(structure=state_size, flat_sequence=state)
             if state_is_tuple else state[0])

    call_cell = lambda: cell(input_t, state)

    if sequence_length is not None:
      (output, new_state) = _rnn_step(
          time=time,
          sequence_length=sequence_length,
          min_sequence_length=min_sequence_length,
          max_sequence_length=max_sequence_length,
          zero_output=zero_output,
          state=state,
          call_cell=call_cell,
          state_size=state_size,
          skip_conditionals=True)
    else:
      (output, new_state) = call_cell()

    # Pack state if using state tuples
    new_state = (
        tuple(nest.flatten(new_state)) if state_is_tuple else (new_state,))

    output_ta_t = output_ta_t.write(time, output)

    return (time + 1, output_ta_t) + new_state

  final_loop_vars = control_flow_ops.while_loop(
      cond=lambda time, *_: time < time_steps,
      body=_time_step,
      loop_vars=(time, output_ta) + tuple(state),
      parallel_iterations=parallel_iterations,
      swap_memory=swap_memory)

  (output_final_ta, final_state) = (final_loop_vars[1], final_loop_vars[2:])

  final_outputs = output_final_ta.pack()
  # Restore some shape information
  final_outputs_size = _state_size_with_prefix(
      cell.output_size, prefix=[const_time_steps, const_batch_size])
  final_outputs.set_shape(final_outputs_size)

  # Unpack final state if not using state tuples.
  final_state = (
      nest.pack_sequence_as(
          structure=cell.state_size, flat_sequence=final_state)
      if state_is_tuple else final_state[0])
  return (final_outputs, final_state)
예제 #58
0
def sample(dim,
           num_results=None,
           sequence_indices=None,
           dtype=None,
           randomized=True,
           seed=None,
           name=None):
    r"""Returns a sample from the `dim` dimensional Halton sequence.

  Warning: The sequence elements take values only between 0 and 1. Care must be
  taken to appropriately transform the domain of a function if it differs from
  the unit cube before evaluating integrals using Halton samples. It is also
  important to remember that quasi-random numbers without randomization are not
  a replacement for pseudo-random numbers in every context. Quasi random numbers
  are completely deterministic and typically have significant negative
  autocorrelation unless randomization is used.

  Computes the members of the low discrepancy Halton sequence in dimension
  `dim`. The `dim`-dimensional sequence takes values in the unit hypercube in
  `dim` dimensions. Currently, only dimensions up to 1000 are supported. The
  prime base for the k-th axes is the k-th prime starting from 2. For example,
  if `dim` = 3, then the bases will be [2, 3, 5] respectively and the first
  element of the non-randomized sequence will be: [0.5, 0.333, 0.2]. For a more
  complete description of the Halton sequences see:
  https://en.wikipedia.org/wiki/Halton_sequence. For low discrepancy sequences
  and their applications see:
  https://en.wikipedia.org/wiki/Low-discrepancy_sequence.

  If `randomized` is true, this function produces a scrambled version of the
  Halton sequence introduced by Owen in arXiv:1706.02808. For the advantages of
  randomization of low discrepancy sequences see:
  https://en.wikipedia.org/wiki/Quasi-Monte_Carlo_method#Randomization_of_quasi-Monte_Carlo

  The number of samples produced is controlled by the `num_results` and
  `sequence_indices` parameters. The user must supply either `num_results` or
  `sequence_indices` but not both.
  The former is the number of samples to produce starting from the first
  element. If `sequence_indices` is given instead, the specified elements of
  the sequence are generated. For example, sequence_indices=tf.range(10) is
  equivalent to specifying n=10.

  Example Use:

  ```python
  bf = tf.contrib.bayesflow

  # Produce the first 1000 members of the Halton sequence in 3 dimensions.
  num_results = 1000
  dim = 3
  sample = bf.halton_sequence.sample(dim, num_results=num_results, seed=127)

  # Evaluate the integral of x_1 * x_2^2 * x_3^3  over the three dimensional
  # hypercube.
  powers = tf.range(1.0, limit=dim + 1)
  integral = tf.reduce_mean(tf.reduce_prod(sample ** powers, axis=-1))
  true_value = 1.0 / tf.reduce_prod(powers + 1.0)
  with tf.Session() as session:
    values = session.run((integral, true_value))

  # Produces a relative absolute error of 1.7%.
  print ("Estimated: %f, True Value: %f" % values)

  # Now skip the first 1000 samples and recompute the integral with the next
  # thousand samples. The sequence_indices argument can be used to do this.


  sequence_indices = tf.range(start=1000, limit=1000 + num_results,
                              dtype=tf.int32)
  sample_leaped = halton.sample(dim, sequence_indices=sequence_indices,
                                seed=111217)

  integral_leaped = tf.reduce_mean(tf.reduce_prod(sample_leaped ** powers,
                                                  axis=-1))
  with tf.Session() as session:
    values = session.run((integral_leaped, true_value))
  # Now produces a relative absolute error of 0.05%.
  print ("Leaped Estimated: %f, True Value: %f" % values)
  ```

  Args:
    dim: Positive Python `int` representing each sample's `event_size.` Must
      not be greater than 1000.
    num_results: (Optional) positive Python `int`. The number of samples to
      generate. Either this parameter or sequence_indices must be specified but
      not both. If this parameter is None, then the behaviour is determined by
      the `sequence_indices`.
    sequence_indices: (Optional) `Tensor` of dtype int32 and rank 1. The
      elements of the sequence to compute specified by their position in the
      sequence. The entries index into the Halton sequence starting with 0 and
      hence, must be whole numbers. For example, sequence_indices=[0, 5, 6] will
      produce the first, sixth and seventh elements of the sequence. If this
      parameter is None, then the `num_results` parameter must be specified
      which gives the number of desired samples starting from the first sample.
    dtype: (Optional) The dtype of the sample. One of `float32` or `float64`.
      Default is `float32`.
    randomized: (Optional) bool indicating whether to produce a randomized
      Halton sequence. If True, applies the randomization described in
      Owen (2017) [arXiv:1706.02808].
    seed: (Optional) Python integer to seed the random number generator. Only
      used if `randomized` is True. If not supplied and `randomized` is True,
      no seed is set.
    name:  (Optional) Python `str` describing ops managed by this function. If
    not supplied the name of this function is used.

  Returns:
    halton_elements: Elements of the Halton sequence. `Tensor` of supplied dtype
    and `shape` `[num_results, dim]` if `num_results` was specified or shape
    `[s, dim]` where s is the size of `sequence_indices` if `sequence_indices`
    were specified.

  Raises:
    ValueError: if both `sequence_indices` and `num_results` were specified or
    if dimension `dim` is less than 1 or greater than 1000.
  """
    if dim < 1 or dim > _MAX_DIMENSION:
        raise ValueError(
            'Dimension must be between 1 and {}. Supplied {}'.format(
                _MAX_DIMENSION, dim))
    if (num_results is None) == (sequence_indices is None):
        raise ValueError('Either `num_results` or `sequence_indices` must be'
                         ' specified but not both.')

    dtype = dtype or dtypes.float32
    if not dtype.is_floating:
        raise ValueError('dtype must be of `float`-type')

    with ops.name_scope(name, 'sample', values=[sequence_indices]):
        # Here and in the following, the shape layout is as follows:
        # [sample dimension, event dimension, coefficient dimension].
        # The coefficient dimension is an intermediate axes which will hold the
        # weights of the starting integer when expressed in the (prime) base for
        # an event dimension.
        indices = _get_indices(num_results, sequence_indices, dtype)
        radixes = array_ops.constant(_PRIMES[0:dim],
                                     dtype=dtype,
                                     shape=[dim, 1])

        max_sizes_by_axes = _base_expansion_size(math_ops.reduce_max(indices),
                                                 radixes)

        max_size = math_ops.reduce_max(max_sizes_by_axes)

        # The powers of the radixes that we will need. Note that there is a bit
        # of an excess here. Suppose we need the place value coefficients of 7
        # in base 2 and 3. For 2, we will have 3 digits but we only need 2 digits
        # for base 3. However, we can only create rectangular tensors so we
        # store both expansions in a [2, 3] tensor. This leads to the problem that
        # we might end up attempting to raise large numbers to large powers. For
        # example, base 2 expansion of 1024 has 10 digits. If we were in 10
        # dimensions, then the 10th prime (29) we will end up computing 29^10 even
        # though we don't need it. We avoid this by setting the exponents for each
        # axes to 0 beyond the maximum value needed for that dimension.
        exponents_by_axes = array_ops.tile([math_ops.range(max_size)],
                                           [dim, 1])

        # The mask is true for those coefficients that are irrelevant.
        weight_mask = exponents_by_axes >= max_sizes_by_axes
        capped_exponents = array_ops.where(
            weight_mask, array_ops.zeros_like(exponents_by_axes),
            exponents_by_axes)
        weights = radixes**capped_exponents
        # The following computes the base b expansion of the indices. Suppose,
        # x = a0 + a1*b + a2*b^2 + ... Then, performing a floor div of x with
        # the vector (1, b, b^2, b^3, ...) will produce
        # (a0 + s1 * b, a1 + s2 * b, ...) where s_i are coefficients we don't care
        # about. Noting that all a_i < b by definition of place value expansion,
        # we see that taking the elements mod b of the above vector produces the
        # place value expansion coefficients.
        coeffs = math_ops.floor_div(indices, weights)
        coeffs *= 1 - math_ops.cast(weight_mask, dtype)
        coeffs %= radixes
        if not randomized:
            coeffs /= radixes
            return math_ops.reduce_sum(coeffs / weights, axis=-1)
        coeffs = _randomize(coeffs, radixes, seed=seed)
        # Remove the contribution from randomizing the trailing zero for the
        # axes where max_size_by_axes < max_size. This will be accounted
        # for separately below (using zero_correction).
        coeffs *= 1 - math_ops.cast(weight_mask, dtype)
        coeffs /= radixes
        base_values = math_ops.reduce_sum(coeffs / weights, axis=-1)

        # The randomization used in Owen (2017) does not leave 0 invariant. While
        # we have accounted for the randomization of the first `max_size_by_axes`
        # coefficients, we still need to correct for the trailing zeros. Luckily,
        # this is equivalent to adding a uniform random value scaled so the first
        # `max_size_by_axes` coefficients are zero. The following statements perform
        # this correction.
        zero_correction = random_ops.random_uniform([dim, 1],
                                                    seed=seed,
                                                    dtype=dtype)
        zero_correction /= (radixes**max_sizes_by_axes)
        return base_values + array_ops.reshape(zero_correction, [-1])
예제 #59
0
def conditional_batch_norm(inputs,
                           conditional_layer,
                           var_scope_postfix='',
                           decay=0.999,
                           center=True,
                           scale=False,
                           epsilon=0.001,
                           activation_fn=None,
                           param_initializers=None,
                           param_regularizers=None,
                           updates_collections=tf.GraphKeys.UPDATE_OPS,
                           is_training=True,
                           reuse=None,
                           variables_collections=None,
                           outputs_collections=None,
                           trainable=True,
                           data_format=DATA_FORMAT_NHWC,
                           zero_debias_moving_mean=False,
                           renorm=False,
                           renorm_clipping=None,
                           renorm_momentum=0.99,
                           scope=None):
    """Custom implementation of batch norm  to support the optional `conditional_layer` and `var_scope_postfix`.
  For comments on the other parameters, see tensorflow.contrib.layers.python.layers.batch_norm, where this is copied
  from (tf 1.5 version).

  Args:
    conditional_layer: A tensor with 2 dimensions [batch, channels]. If not None, the beta and gamma parameters will
      be conditioned on the `conditional_layer`.
    var_scope_postfix: A string. Append it to the var scopes of all variables other than the weight and bias. e.g.
      var scope of the `gamma` variable becomes `'gamma' + var_scope_postfix`.
  """

    if data_format not in (DATA_FORMAT_NCHW, DATA_FORMAT_NHWC):
        raise ValueError('data_format has to be either NCHW or NHWC.')
    if inputs.dtype != tf.float32:
        raise NotImplementedError(
            'This implementation may not be compatible with mixed precision training.'
        )
    with tf.variable_scope(scope, 'BatchNorm', [inputs], reuse=reuse) as sc:

        if conditional_layer is not None:
            conditional_layer = tf.convert_to_tensor(conditional_layer)
            # Normalizing the conditional layer seems to stabilize training a little.
            conditional_layer = tf.nn.l2_normalize(
                conditional_layer, dim=1, name='normalized_conditional_layer')
            conditional_layer_shape = conditional_layer.get_shape()
            conditional_layer_rank = conditional_layer_shape.ndims
            if conditional_layer_rank is None:
                raise ValueError('Conditional layer %s has undefined rank' %
                                 conditional_layer.name)
            elif conditional_layer_rank != 2:
                raise ValueError('Conditional layer %s is not rank 2.' %
                                 conditional_layer.name)

        inputs = tf.convert_to_tensor(inputs)
        original_shape = inputs.get_shape()
        original_inputs = inputs
        original_rank = original_shape.ndims
        if original_rank is None:
            raise ValueError('Inputs %s has undefined rank' % inputs.name)
        elif original_rank not in [2, 4]:
            raise ValueError('Inputs %s has unsupported rank.'
                             ' Expected 2 or 4 but got %d' %
                             (inputs.name, original_rank))
        if original_rank == 2:
            channels = inputs.get_shape()[-1].value
            if channels is None:
                raise ValueError('`C` dimension must be known but is None')
            new_shape = [-1, 1, 1, channels]
            if data_format == DATA_FORMAT_NCHW:
                new_shape = [-1, channels, 1, 1]
            inputs = tf.reshape(inputs, new_shape)
        inputs_shape = inputs.get_shape()
        if data_format == DATA_FORMAT_NHWC:
            params_shape = inputs_shape[-1:]
        else:
            params_shape = inputs_shape[1:2]
        if not params_shape.is_fully_defined():
            raise ValueError('Inputs %s has undefined `C` dimension %s.' %
                             (inputs.name, params_shape))

        # Allocate parameters for the beta and gamma of the normalization.
        beta_collections = utils.get_variable_collections(
            variables_collections, 'beta')
        variable_dtype = inputs.dtype
        if not param_initializers:
            param_initializers = {}
        if not param_regularizers:
            param_regularizers = {}

        if center:
            beta_scope = 'beta' + var_scope_postfix
            if conditional_layer is not None:
                assert not param_initializers, 'param_initializers are not supported with conditional layer.'
                assert not param_regularizers, 'param_initializers are not supported with conditional layer.'
                beta = get_conditional_batch_norm_param(conditional_layer,
                                                        int(params_shape[-1]),
                                                        scope=beta_scope)
            else:
                # Behaves like normal batch norm.
                beta_collections = utils.get_variable_collections(
                    variables_collections, beta_scope)
                beta_initializer = param_initializers.get(
                    beta_scope, tf.zeros_initializer())
                beta_regularizer = param_regularizers.get('beta')
                beta = variables.model_variable(beta_scope,
                                                shape=params_shape,
                                                dtype=variable_dtype,
                                                initializer=beta_initializer,
                                                regularizer=beta_regularizer,
                                                collections=beta_collections,
                                                trainable=trainable)
        else:
            beta = array_ops.constant(0.0,
                                      dtype=variable_dtype,
                                      shape=params_shape)

        if scale:
            gamma_scope = 'gamma' + var_scope_postfix
            if conditional_layer is not None:
                assert not param_initializers, 'param_initializers are not supported with conditional layer.'
                assert not param_regularizers, 'param_initializers are not supported with conditional layer.'
                delta_gamma = get_conditional_batch_norm_param(
                    conditional_layer,
                    int(params_shape[-1]),
                    scope=gamma_scope)
                # Per https://arxiv.org/pdf/1707.03017.pdf.
                gamma = tf.constant(
                    1.0,
                    dtype=variable_dtype,
                ) + delta_gamma
            else:
                gamma_collections = utils.get_variable_collections(
                    variables_collections, gamma_scope)
                gamma_initializer = param_initializers.get(
                    gamma_scope, tf.ones_initializer())
                gamma_regularizer = param_regularizers.get('gamma')
                gamma = variables.model_variable(gamma_scope,
                                                 shape=params_shape,
                                                 dtype=variable_dtype,
                                                 initializer=gamma_initializer,
                                                 regularizer=gamma_regularizer,
                                                 collections=gamma_collections,
                                                 trainable=trainable)
        else:
            gamma = tf.constant(1.0, dtype=variable_dtype, shape=params_shape)

        # Create moving_mean and moving_variance variables and add them to the
        # appropriate collections. We disable variable partitioning while creating
        # them, because assign_moving_average is not yet supported for partitioned
        # variables (this needs to be handled carefully, as it may break
        # the checkpoint backward compatibility).
        with tf.variable_scope(tf.get_variable_scope()) as local_scope:
            local_scope.set_partitioner(None)
            moving_mean_scope = 'moving_mean' + var_scope_postfix
            moving_mean_collections = utils.get_variable_collections(
                variables_collections, moving_mean_scope)
            moving_mean_initializer = param_initializers.get(
                moving_mean_scope, tf.zeros_initializer())
            moving_mean = variables.model_variable(
                moving_mean_scope,
                shape=params_shape,
                dtype=tf.float32,
                initializer=moving_mean_initializer,
                trainable=False,
                collections=moving_mean_collections)
            moving_variance_scope = 'moving_variance' + var_scope_postfix
            moving_variance_collections = utils.get_variable_collections(
                variables_collections, moving_variance_scope)
            moving_variance_initializer = param_initializers.get(
                moving_variance_scope, tf.ones_initializer())
            moving_variance = variables.model_variable(
                moving_variance_scope,
                shape=params_shape,
                dtype=tf.float32,
                initializer=moving_variance_initializer,
                trainable=False,
                collections=moving_variance_collections)

            if renorm:
                renorm_clipping = renorm_clipping or {}
                keys = ['rmax', 'rmin', 'dmax']
                if set(renorm_clipping) - set(keys):
                    raise ValueError(
                        'renorm_clipping %s contains keys not in %s' %
                        (renorm_clipping, keys))

                # Create variables to maintain the moving mean and standard deviation.
                # These are used in training and thus are different from the moving
                # averages above. The renorm variables are colocated with moving_mean
                # and moving_variance.
                # NOTE: below, the outer `with device` block causes the current device
                # stack to be cleared. The nested ones use a `lambda` to set the desired
                # device and ignore any devices that may be set by the custom getter.
                def _renorm_variable(name, shape):
                    var = variables.model_variable(
                        name=
                        name,  # renorm variable should be dependent on var_scope_postfix.
                        shape=shape,
                        dtype=tf.float32,
                        initializer=param_initializers.get(
                            name, tf.zeros_initializer()),
                        trainable=False)
                    return var

                with ops.device(None):
                    device = ((lambda _: moving_mean.device)
                              if context.executing_eagerly() else
                              moving_mean.device)
                    with ops.device(device):
                        renorm_mean = _renorm_variable(
                            'renorm_mean' + var_scope_postfix, params_shape)
                        renorm_mean_weight = _renorm_variable(
                            'renorm_mean_weight' + var_scope_postfix, ())
                    # We initialize renorm_stddev to 0, and maintain the (0-initialized)
                    # renorm_stddev_weight. This allows us to (1) mix the average
                    # stddev with the minibatch stddev early in training, and (2) compute
                    # the unbiased average stddev by dividing renorm_stddev by the weight.
                    device = ((lambda _: moving_variance.device)
                              if context.executing_eagerly() else
                              moving_variance.device)
                    with ops.device(device):
                        renorm_stddev = _renorm_variable(
                            'renorm_stddev' + var_scope_postfix, params_shape)
                        renorm_stddev_weight = _renorm_variable(
                            'renorm_stddev_weight' + var_scope_postfix, ())

                class dotdict(dict):
                    """dot.notation access to dictionary attributes"""
                    __getattr__ = dict.get
                    __setattr__ = dict.__setitem__
                    __delattr__ = dict.__delitem__

                renorm_params = dotdict({
                    'renorm_mean': renorm_mean,
                    'renorm_mean_weight': renorm_mean_weight,
                    'renorm_stddev': renorm_stddev,
                    'renorm_stddev_weight': renorm_stddev_weight,
                    'renorm_clipping': renorm_clipping,
                    'renorm_momentum': renorm_momentum,
                    'moving_mean': moving_mean,
                    'moving_variance': moving_variance,
                    'epsilon': epsilon
                })
            else:
                renorm_params = None

        def _batch_norm_training():
            # return tf.nn.fused_batch_norm(
            return _batch_norm_aux(inputs,
                                   gamma,
                                   beta,
                                   epsilon=epsilon,
                                   data_format=data_format,
                                   renorm=renorm,
                                   renorm_params=renorm_params)

        def _batch_norm_inference():
            # return tf.nn.fused_batch_norm(
            return _batch_norm_aux(inputs,
                                   gamma,
                                   beta,
                                   mean=tf.cast(moving_mean,
                                                dtype=variable_dtype),
                                   variance=tf.cast(moving_variance,
                                                    dtype=variable_dtype),
                                   epsilon=epsilon,
                                   is_training=False,
                                   data_format=data_format,
                                   renorm=renorm,
                                   renorm_params=renorm_params)

        outputs, mean, variance = utils.smart_cond(is_training,
                                                   _batch_norm_training,
                                                   _batch_norm_inference)

        # If `is_training` doesn't have a constant value, because it is a `Tensor`,
        # a `Variable` or `Placeholder` then is_training_value will be None and
        # `need_updates` will be true.
        is_training_value = utils.constant_value(is_training)
        need_updates = is_training_value is None or is_training_value
        if need_updates:
            if updates_collections is None:
                no_updates = lambda: outputs

                def _force_updates():
                    """Internal function forces updates moving_vars if is_training."""
                    update_moving_mean = moving_averages.assign_moving_average(
                        moving_mean,
                        mean,
                        decay,
                        zero_debias=zero_debias_moving_mean)
                    update_moving_variance = moving_averages.assign_moving_average(
                        moving_variance, variance, decay, zero_debias=False)
                    with tf.control_dependencies(
                        [update_moving_mean, update_moving_variance]):
                        return tf.identity(outputs)

                outputs = utils.smart_cond(is_training, _force_updates,
                                           no_updates)
            else:
                moving_vars_fn = lambda: (moving_mean, moving_variance)

                def _delay_updates():
                    """Internal function that delay updates moving_vars if is_training."""
                    update_moving_mean = moving_averages.assign_moving_average(
                        moving_mean,
                        tf.cast(mean, dtype=moving_mean.dtype),
                        decay,
                        zero_debias=zero_debias_moving_mean)
                    update_moving_variance = moving_averages.assign_moving_average(
                        moving_variance,
                        tf.cast(variance, dtype=moving_variance.dtype),
                        decay,
                        zero_debias=False)
                    return update_moving_mean, update_moving_variance

                update_mean, update_variance = utils.smart_cond(
                    is_training, _delay_updates, moving_vars_fn)
                ops.add_to_collections(updates_collections, update_mean)
                ops.add_to_collections(updates_collections, update_variance)

        outputs.set_shape(inputs_shape)
        if original_shape.ndims == 2:
            outputs = array_ops.reshape(outputs,
                                        array_ops.shape(original_inputs))
        if activation_fn is not None:
            outputs = activation_fn(outputs)
        return utils.collect_named_outputs(outputs_collections, sc.name,
                                           outputs)
예제 #60
0
def leapfrog_integrator(step_size,
                        n_steps,
                        initial_position,
                        initial_momentum,
                        potential_and_grad,
                        initial_grad,
                        name=None):
    """Applies `n_steps` steps of the leapfrog integrator.

  This just wraps `leapfrog_step()` in a `tf.while_loop()`, reusing
  gradient computations where possible.

  Args:
    step_size: Scalar step size or array of step sizes for the
      leapfrog integrator. Broadcasts to the shape of
      `initial_position`. Larger step sizes lead to faster progress, but
      too-large step sizes lead to larger discretization error and
      worse energy conservation.
    n_steps: Number of steps to run the leapfrog integrator.
    initial_position: Tensor containing the value(s) of the position variable(s)
      to update.
    initial_momentum: Tensor containing the value(s) of the momentum variable(s)
      to update.
    potential_and_grad: Python callable that takes a position tensor like
      `initial_position` and returns the potential energy and its gradient at
      that position.
    initial_grad: Tensor with the value of the gradient of the potential energy
      at `initial_position`.
    name: Python `str` name prefixed to Ops created by this function.

  Returns:
    updated_position: Updated value of the position.
    updated_momentum: Updated value of the momentum.
    new_potential: Potential energy of the new position. Has shape matching
      `potential_and_grad(initial_position)`.
    new_grad: Gradient from potential_and_grad() evaluated at the new position.
      Has shape matching `initial_position`.

  Example: Simple quadratic potential.

  ```python
  def potential_and_grad(position):
    return tf.reduce_sum(0.5 * tf.square(position)), position
  position = tf.placeholder(np.float32)
  momentum = tf.placeholder(np.float32)
  potential, grad = potential_and_grad(position)
  new_position, new_momentum, new_potential, new_grad = hmc.leapfrog_integrator(
    0.1, 3, position, momentum, potential_and_grad, grad)

  sess = tf.Session()
  position_val = np.random.randn(10)
  momentum_val = np.random.randn(10)
  potential_val, grad_val = sess.run([potential, grad],
                                     {position: position_val})
  positions = np.zeros([100, 10])
  for i in xrange(100):
    position_val, momentum_val, potential_val, grad_val = sess.run(
      [new_position, new_momentum, new_potential, new_grad],
      {position: position_val, momentum: momentum_val})
    positions[i] = position_val
  # Should trace out sinusoidal dynamics.
  plt.plot(positions[:, 0])
  ```
  """
    def leapfrog_wrapper(step_size, x, m, grad, l):
        x, m, _, grad = leapfrog_step(step_size, x, m, potential_and_grad,
                                      grad)
        return step_size, x, m, grad, l + 1

    def counter_fn(a, b, c, d, counter):  # pylint: disable=unused-argument
        return counter < n_steps

    with ops.name_scope(
            name, 'leapfrog_integrator',
        [step_size, n_steps, initial_position, initial_momentum, initial_grad
         ]):
        _, new_x, new_m, new_grad, _ = control_flow_ops.while_loop(
            counter_fn,
            leapfrog_wrapper, [
                step_size, initial_position, initial_momentum, initial_grad,
                array_ops.constant(0)
            ],
            back_prop=False)
        # We're counting on the runtime to eliminate this redundant computation.
        new_potential, new_grad = potential_and_grad(new_x)
    return new_x, new_m, new_potential, new_grad