Ejemplo n.º 1
0
  def testSerialize(self):
    # pylint: disable=g-import-not-at-top
    try:
      import portpicker
    except ImportError:
      return
    with context.graph_mode():
      worker_port = portpicker.pick_unused_port()
      ps_port = portpicker.pick_unused_port()
      cluster_dict = {
          "worker": ["localhost:%s" % worker_port],
          "ps": ["localhost:%s" % ps_port]
      }
      cs = server_lib.ClusterSpec(cluster_dict)

      worker = server_lib.Server(
          cs, job_name="worker", protocol="grpc", task_index=0, start=True)
      unused_ps = server_lib.Server(
          cs, job_name="ps", protocol="grpc", task_index=0, start=True)
      with ops.Graph().as_default(), session.Session(target=worker.target):
        with ops.device("/job:worker"):
          t = constant_op.constant([[1.0], [2.0]])
          l = list_ops.tensor_list_from_tensor(t, element_shape=[1])
        with ops.device("/job:ps"):
          l_ps = array_ops.identity(l)
          l_ps, e = list_ops.tensor_list_pop_back(
              l_ps, element_dtype=dtypes.float32)
        with ops.device("/job:worker"):
          worker_e = array_ops.identity(e)
        self.assertAllEqual(worker_e.eval(), [2.0])
Ejemplo n.º 2
0
  def testCopyToGPU(self):
    if not test_util.is_gpu_available():
      self.skipTest("No GPU available")

    with ops.device("/cpu:0"):
      optional_with_value = optional_ops.Optional.from_value(
          (constant_op.constant(37.0), constant_op.constant("Foo"),
           constant_op.constant(42)))
      optional_none = optional_ops.Optional.none_from_structure(
          structure.TensorStructure(dtypes.float32, []))

    with ops.device("/gpu:0"):
      gpu_optional_with_value = optional_ops._OptionalImpl(
          array_ops.identity(optional_with_value._variant_tensor),
          optional_with_value.value_structure)
      gpu_optional_none = optional_ops._OptionalImpl(
          array_ops.identity(optional_none._variant_tensor),
          optional_none.value_structure)

      gpu_optional_with_value_has_value = gpu_optional_with_value.has_value()
      gpu_optional_with_value_values = gpu_optional_with_value.get_value()

      gpu_optional_none_has_value = gpu_optional_none.has_value()

    self.assertTrue(self.evaluate(gpu_optional_with_value_has_value))
    self.assertEqual((37.0, b"Foo", 42),
                     self.evaluate(gpu_optional_with_value_values))
    self.assertFalse(self.evaluate(gpu_optional_none_has_value))
Ejemplo n.º 3
0
 def test_rank_one_tensor_doesnt_raise_if_rank_just_right_static_rank(self):
   with self.test_session():
     tensor = constant_op.constant([1, 2], name="my_tensor")
     desired_rank = 1
     with ops.control_dependencies(
         [check_ops.assert_rank_at_least(tensor, desired_rank)]):
       array_ops.identity(tensor).eval()
Ejemplo n.º 4
0
 def testBijector(self):
   x_ = np.arange(3 * 4 * 2).astype(np.float32).reshape(3, 4 * 2)
   with self.cached_session() as sess:
     nvp = RealNVP(
         num_masked=4,
         validate_args=True,
         **self._real_nvp_kwargs)
     x = constant_op.constant(x_)
     forward_x = nvp.forward(x)
     # Use identity to invalidate cache.
     inverse_y = nvp.inverse(array_ops.identity(forward_x))
     forward_inverse_y = nvp.forward(inverse_y)
     fldj = nvp.forward_log_det_jacobian(x, event_ndims=1)
     # Use identity to invalidate cache.
     ildj = nvp.inverse_log_det_jacobian(
         array_ops.identity(forward_x), event_ndims=1)
     variables.global_variables_initializer().run()
     [
         forward_x_,
         inverse_y_,
         forward_inverse_y_,
         ildj_,
         fldj_,
     ] = sess.run([
         forward_x,
         inverse_y,
         forward_inverse_y,
         ildj,
         fldj,
     ])
     self.assertEqual("real_nvp", nvp.name)
     self.assertAllClose(forward_x_, forward_inverse_y_, rtol=1e-1, atol=0.)
     self.assertAllClose(x_, inverse_y_, rtol=1e-1, atol=0.)
     self.assertAllClose(ildj_, -fldj_, rtol=1e-6, atol=0.)
Ejemplo n.º 5
0
 def test_rank_one_tensor_doesnt_raise_if_rank_just_right_dynamic_rank(self):
   with self.test_session():
     tensor = array_ops.placeholder(dtypes.float32, name="my_tensor")
     desired_rank = 1
     with ops.control_dependencies(
         [check_ops.assert_rank_at_least(tensor, desired_rank)]):
       array_ops.identity(tensor).eval(feed_dict={tensor: [1, 2]})
Ejemplo n.º 6
0
 def test_rank_one_tensor_doesnt_raise_if_rank_matches_static_rank(self):
   with self.test_session():
     tensor_rank1 = constant_op.constant([42, 43], name="my_tensor")
     for desired_ranks in ((0, 1, 2), (1, 0, 2), (1, 2, 0)):
       with ops.control_dependencies([
           check_ops.assert_rank_in(tensor_rank1, desired_ranks)]):
         array_ops.identity(tensor_rank1).eval()
Ejemplo n.º 7
0
 def test_rank_one_tensor_raises_if_rank_mismatches_static_rank(self):
   with self.test_session():
     tensor_rank1 = constant_op.constant((42, 43), name="my_tensor")
     with self.assertRaisesRegexp(ValueError, "my_tensor.*rank"):
       with ops.control_dependencies([
           check_ops.assert_rank_in(tensor_rank1, (0, 2))]):
         array_ops.identity(tensor_rank1).eval()
Ejemplo n.º 8
0
 def test_rank_zero_tensor_raises_if_rank_mismatch_dynamic_rank(self):
   with self.test_session():
     tensor_rank0 = array_ops.placeholder(dtypes.float32, name="my_tensor")
     with ops.control_dependencies([
         check_ops.assert_rank_in(tensor_rank0, (1, 2), message="fail")]):
       with self.assertRaisesOpError("fail.*my_tensor.*rank"):
         array_ops.identity(tensor_rank0).eval(feed_dict={tensor_rank0: 42.0})
Ejemplo n.º 9
0
 def test_rank_zero_tensor_doesnt_raise_if_rank_matches_dynamic_rank(self):
   with self.test_session():
     tensor_rank0 = array_ops.placeholder(dtypes.float32, name="my_tensor")
     for desired_ranks in ((0, 1, 2), (1, 0, 2), (1, 2, 0)):
       with ops.control_dependencies([
           check_ops.assert_rank_in(tensor_rank0, desired_ranks)]):
         array_ops.identity(tensor_rank0).eval(feed_dict={tensor_rank0: 42.0})
Ejemplo n.º 10
0
  def mark_as_return(self, tensor):
    """Acts like identity but marks the `Tensor` as a return value.

    This will possibly return a copy of the `Tensor`. Usage:

    ```
      with AutomaticControlDependencies() as a:
       ...
       t = a.mark_as_return(t)
      _ = ...(t...)  # i.e. it's safe to use t here
    ```

    Args:
      tensor: the `Tensor` to be marked

    Returns:
      a copy of the `Tensor`.
    """
    if isinstance(tensor, ops.IndexedSlices):
      values = array_ops.identity(tensor.values)
      indices = array_ops.identity(tensor.indices)
      self._returned_tensors.add(indices)
      self._returned_tensors.add(values)
      return ops.IndexedSlices(values, indices, dense_shape=tensor.dense_shape)
    # We want to make the return values depend on the stateful operations, but
    # we don't want to introduce a cycle, so we make the return value the result
    # of a new identity operation that the stateful operations definitely don't
    # depend on.
    tensor = array_ops.identity(tensor)
    self._returned_tensors.add(tensor)
    return tensor
Ejemplo n.º 11
0
 def _create_estimator_spec(features, mode, logits, labels, train_op_fn):
   del features, labels  # Not used.
   trainable_vars = ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
   testcase.assertItemsEqual(
       expected_var_names,
       [var.name for var in trainable_vars])
   loss = constant_op.constant(1.)
   assert_logits = _assert_close(
       expected_logits, logits, message='Failed for mode={}. '.format(mode))
   with ops.control_dependencies([assert_logits]):
     if mode == model_fn.ModeKeys.TRAIN:
       return model_fn.EstimatorSpec(
           mode=mode,
           loss=loss,
           train_op=train_op_fn(loss))
     elif mode == model_fn.ModeKeys.EVAL:
       return model_fn.EstimatorSpec(
           mode=mode,
           loss=array_ops.identity(loss))
     elif mode == model_fn.ModeKeys.PREDICT:
       return model_fn.EstimatorSpec(
           mode=mode,
           predictions={'logits': array_ops.identity(logits)})
     else:
       testcase.fail('Invalid mode: {}'.format(mode))
 def testFeedSparseTensor(self):
   with session.Session() as s:
     indices = np.array([[3, 2, 0], [4, 5, 1]]).astype(np.int64)
     values = np.array([1.0, 2.0]).astype(np.float32)
     shape = np.array([7, 9, 2]).astype(np.int64)
     sp = ops.SparseTensor(
         array_ops.placeholder(dtype=np.int64, shape=(2, 3)),
         array_ops.placeholder(dtype=np.float32, shape=(2,)),
         array_ops.placeholder(dtype=np.int64, shape=(3,)),)
     sp_indices = array_ops.identity(sp.indices)
     sp_values = array_ops.identity(sp.values)
     sp_shape = array_ops.identity(sp.shape)
     sp2 = ops.SparseTensor(sp_indices, sp_values, sp_shape)
     # Feed with tuple
     indices_out, values_out, shape_out = s.run(
         [sp_indices, sp_values, sp_shape], {sp: (indices, values, shape)})
     self.assertAllEqual(indices_out, indices)
     self.assertAllEqual(values_out, values)
     self.assertAllEqual(shape_out, shape)
     # Feed with SparseTensorValue
     indices_out, values_out, shape_out = s.run(
         [sp_indices, sp_values, sp_shape],
         {sp: ops.SparseTensorValue(indices, values, shape)})
     self.assertAllEqual(indices_out, indices)
     self.assertAllEqual(values_out, values)
     self.assertAllEqual(shape_out, shape)
     # Feed with SparseTensorValue, fetch SparseTensorValue
     sp2_out = s.run(sp2, {sp: ops.SparseTensorValue(indices, values, shape)})
     self.assertAllEqual(sp2_out.indices, indices)
     self.assertAllEqual(sp2_out.values, values)
     self.assertAllEqual(sp2_out.shape, shape)
 def testFeedIndexedSlicesWithoutDenseShape(self):
   with session.Session() as s:
     values = np.array([1.0, 2.0]).astype(np.float32)
     indices = np.array([[3, 2, 0], [4, 5, 1]]).astype(np.int64)
     dense_shape = None
     ind = ops.IndexedSlices(
         array_ops.placeholder(dtype=np.float32,
                               shape=(2,)),
         array_ops.placeholder(dtype=np.int64,
                               shape=(2, 3)),
         None)
     ind_values = array_ops.identity(ind.values)
     ind_indices = array_ops.identity(ind.indices)
     ind2 = ops.IndexedSlices(ind_values, ind_indices)
     # Feed with tuple
     values_out, indices_out = s.run(
         [ind_values, ind_indices], {ind: (values, indices)})
     self.assertAllEqual(values_out, values)
     self.assertAllEqual(indices_out, indices)
     # Feed with IndexedSlicesValue
     values_out, indices_out = s.run(
         [ind_values, ind_indices],
         {ind: ops.IndexedSlicesValue(values, indices, dense_shape)})
     self.assertAllEqual(values_out, values)
     self.assertAllEqual(indices_out, indices)
     # Feed with IndexedSlicesValue, fetch IndexedSlicesValue
     ind2_out = s.run(ind2, {ind: ops.IndexedSlicesValue(values, indices,
                                                         dense_shape)})
     self.assertAllEqual(ind2_out.values, values)
     self.assertAllEqual(ind2_out.indices, indices)
     self.assertAllEqual(ind2_out.dense_shape, dense_shape)
Ejemplo n.º 14
0
    def _concat(self):
        """Returns the overall concatenated value as a `Tensor`.

    This is different from using the partitioned variable directly as a tensor
    (through tensor conversion and `as_tensor`) in that it creates a new set of
    operations that keeps the control dependencies from its scope.

    Returns:
      `Tensor` containing the concatenated value.
    """
        if len(self._variable_list) == 1:
            with ops.name_scope(None):
                return array_ops.identity(self._variable_list[0], name=self._name)

        partition_axes = self._partition_axes()

        if len(partition_axes) > 1:
            raise NotImplementedError(
                "Cannot concatenate along more than one dimension: %s.  "
                "Multi-axis partition concat is not supported" % str(partition_axes)
            )
        partition_ix = partition_axes[0]

        with ops.name_scope(self._name + "/ConcatPartitions/"):
            concatenated = array_ops.concat(partition_ix, self._variable_list)

        with ops.name_scope(None):
            return array_ops.identity(concatenated, name=self._name)
Ejemplo n.º 15
0
  def _LayerWithIdentity(self,
                         input_tensor=None,
                         scope='test',
                         post_activation_bypass=False):
    """Add a basic conv, identity, batch norm with skip to the default graph."""
    batch_size, height, width, depth = 5, 128, 128, 3
    if input_tensor is None:
      input_tensor = array_ops.zeros((batch_size, height, width, depth))
    weight_init = init_ops.truncated_normal_initializer
    with ops.name_scope(scope):
      output = layers.conv2d(
          input_tensor,
          depth, [5, 5],
          padding='SAME',
          weights_initializer=weight_init(0.09),
          activation_fn=None,
          normalizer_fn=None,
          biases_initializer=None)
      output = array_ops.identity(output, name='conv_out')

      output = layers.batch_norm(
          output, center=True, scale=True, decay=1.0 - 0.003, fused=True)

      output = array_ops.identity(output, name='bn_out')
      if post_activation_bypass:
        output += input_tensor
    return output
Ejemplo n.º 16
0
  def as_tensor(self):
    """Returns the overall concatenated value as a `Tensor`.

    Returns:
      `Tensor` containing the concatenated value.
    """
    if self._as_tensor is not None:
      return self._as_tensor

    if len(self._variable_list) == 1:
      with ops.name_scope(None):
        self._as_tensor = array_ops.identity(self._variable_list[0],
                                             name=self._name)
        return self._as_tensor

    if all([p < 2 for p in self._partitions]):
      partition_ix = 0
    else:
      partition_ix = [i for i, p in enumerate(self._partitions) if p > 1][0]
    with ops.name_scope(self._name + "/ConcatPartitions/"):
      concatenated = array_ops.concat(partition_ix, self._variable_list)
    with ops.name_scope(None):
      # Be sure to cache the concatenated tensor to not do extraneous
      # computations.
      self._as_tensor = array_ops.identity(concatenated, name=self._name)
    return self._as_tensor
Ejemplo n.º 17
0
    def initialized_value(self):
        """Returns the value of the initialized variable.

    You should use this instead of the variable itself to initialize another
    variable with a value that depends on the value of this variable.

    ```python
    # Initialize 'v' with a random tensor.
    v = tf.Variable(tf.truncated_normal([10, 40]))
    # Use `initialized_value` to guarantee that `v` has been
    # initialized before its value is used to initialize `w`.
    # The random values are picked only once.
    w = tf.Variable(v.initialized_value() * 2.0)
    ```

    Returns:
      A `Tensor` holding the value of this variable after its initializer
      has run.
    """
        with ops.control_dependencies(None):
            with ops.control_dependencies([self._initializer_op]):
                # TODO(vrv): Change this class to not take caching_device, but
                # to take the op to colocate the snapshot with, so we can use
                # colocation rather than devices.
                if self._caching_device is not None:
                    with ops.device(self._caching_device):
                        return array_ops.identity(self._variable)
                else:
                    with ops.colocate_with(self._variable.op):
                        return array_ops.identity(self._variable)
Ejemplo n.º 18
0
  def testSumOfTwoReadVariablesWithoutRepeatGrad(self):
    with self.test_session(use_gpu=True) as session:
      a = array_ops.identity(
          np.arange(
              3 * 5, dtype=np.float32).reshape(3, 5) + 1)
      b = array_ops.identity(
          np.arange(
              3 * 5, dtype=np.float32).reshape(3, 5) + 1 + 3 * 5)
      ta = tensor_array_ops.TensorArray(dtype=dtypes.float32, size=2)
      ta = ta.write(0, a, name="write_a")
      ta = ta.write(1, b, name="write_b")
      c = (
          ta.read(
              0, name="read_a_0") +  # a + b
          ta.read(
              1, name="read_b_0"))
      g0 = -(np.arange(3 * 5, dtype=np.float32).reshape(3, 5) + 1)
      grad_a = gradients_impl.gradients([c], [a], [g0])[0]  # d(a+b)/da = 1
      grad_b = gradients_impl.gradients([c], [b], [g0])[0]  # d(a+b)/db = 1

      # Test gradients calculated individually
      grad_a_t, = session.run([grad_a])
      self.assertAllEqual(grad_a_t, g0)

      grad_b_t, = session.run([grad_b])
      self.assertAllEqual(grad_b_t, g0)

      # Test gradients calculated jointly
      joint_grad_a_t, joint_grad_b_t = session.run([grad_a, grad_b])
      self.assertAllEqual(joint_grad_a_t, g0)
      self.assertAllEqual(joint_grad_b_t, g0)
 def testBijector(self):
   x_ = np.arange(3 * 4 * 2).astype(np.float32).reshape(3, 4, 2)
   with self.test_session() as sess:
     ma = MaskedAutoregressiveFlow(
         validate_args=True,
         **self._autoregressive_flow_kwargs)
     x = constant_op.constant(x_)
     forward_x = ma.forward(x)
     # Use identity to invalidate cache.
     inverse_y = ma.inverse(array_ops.identity(forward_x))
     fldj = ma.forward_log_det_jacobian(x)
     # Use identity to invalidate cache.
     ildj = ma.inverse_log_det_jacobian(array_ops.identity(forward_x))
     variables.global_variables_initializer().run()
     [
         forward_x_,
         inverse_y_,
         ildj_,
         fldj_,
     ] = sess.run([
         forward_x,
         inverse_y,
         ildj,
         fldj,
     ])
     self.assertEqual("masked_autoregressive_flow", ma.name)
     self.assertAllClose(forward_x_, forward_x_, rtol=1e-6, atol=0.)
     self.assertAllClose(x_, inverse_y_, rtol=1e-5, atol=0.)
     self.assertAllClose(ildj_, -fldj_, rtol=1e-6, atol=0.)
Ejemplo n.º 20
0
  def __init__(self,
               a=0.0,
               b=1.0,
               validate_args=True,
               allow_nan_stats=False,
               name="Uniform"):
    """Construct Uniform distributions with `a` and `b`.

    The parameters `a` and `b` must be shaped in a way that supports
    broadcasting (e.g. `b - a` is a valid operation).

    Here are examples without broadcasting:

    ```python
    # Without broadcasting
    u1 = Uniform(3.0, 4.0)  # a single uniform distribution [3, 4]
    u2 = Uniform([1.0, 2.0], [3.0, 4.0])  # 2 distributions [1, 3], [2, 4]
    u3 = Uniform([[1.0, 2.0],
                  [3.0, 4.0]],
                 [[1.5, 2.5],
                  [3.5, 4.5]])  # 4 distributions
    ```

    And with broadcasting:

    ```python
    u1 = Uniform(3.0, [5.0, 6.0, 7.0])  # 3 distributions
    ```

    Args:
      a: Floating point tensor, the minimum endpoint.
      b: Floating point tensor, the maximum endpoint. Must be > `a`.
      validate_args: Whether to assert that `a > b`. If `validate_args` is
        `False` and inputs are invalid, correct behavior is not guaranteed.
      allow_nan_stats:  Boolean, default `False`.  If `False`, raise an
        exception if a statistic (e.g. mean/mode/etc...) is undefined for any
        batch member.  If `True`, batch members with valid parameters leading to
        undefined statistics will return NaN for this statistic.
      name: The name to prefix Ops created by this distribution class.

    Raises:
      InvalidArgumentError: if `a >= b` and `validate_args=True`.
    """
    self._allow_nan_stats = allow_nan_stats
    self._validate_args = validate_args
    with ops.name_scope(name, values=[a, b]):
      with ops.control_dependencies([check_ops.assert_less(
          a, b, message="uniform not defined when a > b.")] if validate_args
                                    else []):
        a = array_ops.identity(a, name="a")
        b = array_ops.identity(b, name="b")

    self._a = a
    self._b = b
    self._name = name
    self._batch_shape = common_shapes.broadcast_shape(
        self._a.get_shape(), self._b.get_shape())
    self._event_shape = tensor_shape.TensorShape([])

    contrib_tensor_util.assert_same_float_dtype((a, b))
Ejemplo n.º 21
0
def _check_shape_dominates(tensor, tensors):
  """Check that broadcasting `tensor` against `tensors` does not expand it.

  Why?  Because I want to be very sure that the samples tensor is not
  accidentally enlarged by broadcasting against tensors that are
  supposed to be describing the distribution(s) sampled from, lest the
  sample counts end up inflated.

  Args:
    tensor: A Tensor whose shape is to be protected against broadcasting.
    tensors: A list of Tensors to check

  Returns:
    tensor: `tf.identity(tensor)` with control dependencies attached;
      be sure to use that downstream.
  """
  def check(t):
    target = array_ops.shape(tensor)[1:]
    result = array_ops.broadcast_dynamic_shape(target, array_ops.shape(t))
    # This rank check ensures that I don't get a wrong answer from the
    # _shapes_ broadcasting against each other.
    gt = check_ops.assert_greater(array_ops.rank(target), array_ops.rank(t))
    eq = check_ops.assert_equal(target, result)
    return gt, eq
  checks = list(itertools.chain(*[check(t) for t in tensors]))
  with ops.control_dependencies(checks):
    return array_ops.identity(array_ops.identity(tensor))
Ejemplo n.º 22
0
  def __init__(self, mu, sigma, name="Normal"):
    """Construct Normal distributions with mean and stddev `mu` and `sigma`.

    The parameters `mu` and `sigma` must be shaped in a way that supports
    broadcasting (e.g. `mu + sigma` is a valid operation).

    Args:
      mu: `float` or `double` tensor, the means of the distribution(s).
      sigma: `float` or `double` tensor, the stddevs of the distribution(s).
        sigma must contain only positive values.
      name: The name to give Ops created by the initializer.

    Raises:
      TypeError: if mu and sigma are different dtypes.
    """
    with ops.op_scope([mu, sigma], name):
      mu = ops.convert_to_tensor(mu)
      sigma = ops.convert_to_tensor(sigma)
      with ops.control_dependencies([check_ops.assert_positive(sigma)]):
        self._name = name
        self._mu = array_ops.identity(mu, name="mu")
        self._sigma = array_ops.identity(sigma, name="sigma")
        self._batch_shape = self._ones().get_shape()
        self._event_shape = tensor_shape.TensorShape([])

    contrib_tensor_util.assert_same_float_dtype((mu, sigma))
Ejemplo n.º 23
0
 def _v1_multi_metagraph_saved_model(self):
   export_graph = ops.Graph()
   with export_graph.as_default():
     start = array_ops.placeholder(
         shape=[None], dtype=dtypes.float32, name="start")
     v = resource_variable_ops.ResourceVariable(21.)
     first_output = array_ops.identity(start * v, name="first_output")
     second_output = array_ops.identity(v, name="second_output")
     with session_lib.Session() as session:
       session.run(v.initializer)
       path = os.path.join(self.get_temp_dir(), "saved_model", str(ops.uid()))
       builder = builder_impl.SavedModelBuilder(path)
       builder.add_meta_graph_and_variables(
           session, tags=["first"],
           signature_def_map={
               "first_key": signature_def_utils.build_signature_def(
                   {"first_start": utils_impl.build_tensor_info(start)},
                   {"first_output": utils_impl.build_tensor_info(
                       first_output)})})
       builder.add_meta_graph(
           tags=["second"],
           signature_def_map={
               "second_key": signature_def_utils.build_signature_def(
                   {"second_start": utils_impl.build_tensor_info(start)},
                   {"second_output": utils_impl.build_tensor_info(
                       second_output)})})
       builder.save()
   return path
Ejemplo n.º 24
0
  def _TestInsertQuantOpForAddAfterConv2d(self, is_training):
    graph = ops.Graph()
    with graph.as_default():
      batch_size, height, width, depth = 5, 128, 128, 3
      input1 = array_ops.zeros((batch_size, height, width, depth))
      input2 = array_ops.zeros((batch_size, height / 2, width / 2, 32))
      conv = conv2d(input1, 32, [5, 5], stride=2, padding='SAME',
                    weights_initializer=self._WeightInit(0.09),
                    activation_fn=None, scope='test/test')
      node = math_ops.add(conv, input2, name='test/add')
      node = nn_ops.relu6(node, name='test/relu6')
      update_barrier = control_flow_ops.no_op(name='update_barrier')
      with ops.control_dependencies([update_barrier]):
        array_ops.identity(node, name='control_dependency')

    quantize.Quantize(graph, is_training, weight_bits=8, activation_bits=8)

    quantization_node_name = 'FakeQuantWithMinMaxVars'
    conv_quant = graph.get_operation_by_name('test/test/conv_quant/' +
                                             quantization_node_name)
    self.assertEqual(conv_quant.type, quantization_node_name)

    # Scan through all FakeQuant operations, ensuring that the activation
    # isn't in the consumers of the operation. Since activations are folded
    # the preceding operation during inference, the FakeQuant operation after
    # the activation is all that is needed.
    for op in graph.get_operations():
      if op.type == quantization_node_name:
        quant_op = graph.get_operation_by_name(op.name)
        consumers = []
        for output in quant_op.outputs:
          consumers.extend(output.consumers())

        self.assertNotIn('test/relu6', [c.name for c in consumers])
Ejemplo n.º 25
0
  def __init__(self, alpha, beta, name="Gamma"):
    """Construct Gamma distributions with parameters `alpha` and `beta`.

    The parameters `alpha` and `beta` must be shaped in a way that supports
    broadcasting (e.g. `alpha + beta` is a valid operation).

    Args:
      alpha: `float` or `double` tensor, the shape params of the
        distribution(s).
        alpha must contain only positive values.
      beta: `float` or `double` tensor, the inverse scale params of the
        distribution(s).
        beta must contain only positive values.
      name: The name to prepend to all ops created by this distribution.

    Raises:
      TypeError: if `alpha` and `beta` are different dtypes.
    """
    with ops.op_scope([alpha, beta], name):
      with ops.control_dependencies([
          check_ops.assert_positive(alpha), check_ops.assert_positive(beta)]):
        alpha = array_ops.identity(alpha, name="alpha")
        beta = array_ops.identity(beta, name="beta")

        contrib_tensor_util.assert_same_float_dtype((alpha, beta))
        self._broadcast_tensor = alpha + beta

    self._get_batch_shape = self._broadcast_tensor.get_shape()
    self._get_event_shape = tensor_shape.TensorShape([])

    self._alpha = alpha
    self._beta = beta
    self._name = name
Ejemplo n.º 26
0
  def __init__(
      self, p, dtype=dtypes.int32, strict=True, strict_statistics=True,
      name="Bernoulli"):
    """Construct Bernoulli distributions.

    Args:
      p: An N-D `Tensor` representing the probability of a positive
          event. Each entry in the `Tensor` parameterizes an independent
          Bernoulli distribution.
      dtype: dtype for samples. Note that other values will take the dtype of p.
      strict: Whether to assert that `0 <= p <= 1`. If not strict, `log_pmf` may
        return nans.
      strict_statistics:  Boolean, default True.  If True, raise an exception if
        a statistic (e.g. mean/mode/etc...) is undefined for any batch member.
        If False, batch members with valid parameters leading to undefined
        statistics will return NaN for this statistic.
      name: A name for this distribution.
    """
    self._strict_statistics = strict_statistics
    self._name = name
    self._dtype = dtype
    self._strict = strict
    check_op = check_ops.assert_less_equal
    with ops.op_scope([p], name):
      with ops.control_dependencies(
          [check_op(p, 1.), check_op(0., p)] if strict else []):
        p = array_ops.identity(p, name="p")
      self._p = p
      self._q = array_ops.identity(1. - p, name="q")
      self._batch_shape = array_ops.shape(self._p)
      self._event_shape = array_ops.constant([], dtype=dtypes.int32)
 def _overridden_initial_value_fn(device=d, index=i):
   assert index > 0
   with ops.device(device):
     if context.executing_eagerly():
       return array_ops.identity(value_list[0].value())
     else:
       return array_ops.identity(value_list[0].initial_value)
Ejemplo n.º 28
0
 def custom_getter(getter, *args, **kwargs):
   var = getter(*args, **kwargs)
   if kwargs["reuse"]:
     # This can be used, e.g., for changing the caching device if needed.
     return array_ops.identity(var, name="reused")
   else:
     return array_ops.identity(var, name="not_reused")
Ejemplo n.º 29
0
def _reduce_non_distributed_value(extended, reduce_op, value, destinations):
  """Reduce a non-DistributedValue `value` to `destinations`."""
  if isinstance(value, values.DistributedValues):
    raise ValueError("You are passing a `DistributedValue` to "
                     "`_reduce_non_distributed_value`, which is not allowed.")

  # If the same value is present on all replicas then the PerReplica value will
  # be a single value. We also handle the case when `value` is a single value
  # and equal to 0.
  if value == 0:
    return 0
  # If there is only a single value and the reduce op is MEAN,
  # that value should be on all destinations.
  if reduce_op == reduce_util.ReduceOp.MEAN:
    return value

  cross_device_ops_lib.validate_destinations(destinations)
  # We do not support a reduce op of SUM if the value is the same across
  # all replicas. We call this as part of assign functions for MirroredVariables
  # and summing up identical values across replicas is not clearly defined.
  if (len(extended.worker_devices) != 1 or
      not cross_device_ops_lib.check_destinations(destinations)):
    raise ValueError("A non-DistributedValues value %s cannot be reduced with "
                     "the given reduce op %s." % (value, reduce_op))
  # TODO(anjalisridhar): Moves these methods to a device utility file?
  devices = cross_device_ops_lib.get_devices_from(destinations)
  if len(devices) == 1:
    with ops.device(devices[0]):
      return array_ops.identity(value)
  else:
    value_updates = {}
    for d in devices:
      with ops.device(d):
        value_updates[d] = array_ops.identity(value)
    return values.Mirrored(value_updates)
Ejemplo n.º 30
0
  def _fetch(self, val, destination, fn):
    """Return a copy of `val` or `fn(val)` on `destination`."""
    if isinstance(val, values.TowerLocalVariable):
      val = self.reduce(val.reduce_method, val, destinations=destination)
      with ops.device(destination):
        return fn(self.unwrap(val)[0])

    assert isinstance(val, values.Mirrored), (
        "val = %s (type %s)" % (val, val.__class__.__name__))
    if val.on_device(destination):
      with ops.device(destination):
        # Use an identity here to make sure we are returning a tensor
        # instead of e.g. a variable object.
        return array_ops.identity(fn(val.get(destination)))
    device = None
    for d in self._devices:
      if val.on_device(d):
        device = d
        break
    assert device is not None, (
        "Could not find destination %s in list of devices %s." %
        (destination, val.devices))
    with ops.device(device):
      v = fn(val.get(device))
    with ops.device(destination):
      return array_ops.identity(v)
Ejemplo n.º 31
0
    def _resource_apply_sparse(self, grad, var, indices, apply_state=None):
        var_dtype = var.dtype.base_dtype
        lr_t = array_ops.identity(self._get_hyper('learning_rate', var_dtype))
        beta_1_t = array_ops.identity(self._get_hyper('beta_1', var_dtype))
        beta_2_t = array_ops.identity(self._get_hyper('beta_2', var_dtype))
        epsilon_t = ops.convert_to_tensor(self.epsilon, var_dtype)
        m = self.get_slot(var, 'm')
        v = self.get_slot(var, 'v')
        local_step = math_ops.cast(self.iterations + 1, var_dtype)
        next_step = math_ops.cast(self.iterations + 2, var_dtype)
        decay_base = math_ops.cast(0.96, var_dtype)

        # Learning rate multipliers
        if self.lr_multipliers is not None:
            lr_t = _apply_lr_multiplier(self, lr_t, var)

        momentum_cache_t = beta_1_t * (
            1. - 0.5 *
            (math_ops.pow(decay_base, self._initial_decay * local_step)))
        momentum_cache_t_1 = beta_1_t * (
            1. - 0.5 *
            (math_ops.pow(decay_base, self._initial_decay * next_step)))
        m_schedule_new = math_ops.cast(self._m_cache_read,
                                       var_dtype) * momentum_cache_t
        if var_dtype is self._m_cache.dtype:
            m_schedule_new = array_ops.identity(
                state_ops.assign(self._m_cache,
                                 m_schedule_new,
                                 use_locking=self._use_locking))
        m_schedule_next = m_schedule_new * momentum_cache_t_1

        m_scaled_g_values = grad * (1. - beta_1_t)
        m_t = state_ops.assign(m, m * beta_1_t, use_locking=self._use_locking)
        # 여기 tf.control_dependencies로 바꿈
        with tf.control_dependencies([m_t]):
            m_t = self._resource_scatter_add(m, indices, m_scaled_g_values)
            m_t_slice = array_ops.gather(m_t, indices)

        m_t_prime = m_t_slice / (1. - m_schedule_next)
        g_prime = grad / (1. - m_schedule_new)
        m_t_bar = (1. - momentum_cache_t) * g_prime + (momentum_cache_t_1 *
                                                       m_t_prime)

        v_scaled_g_values = (grad * grad) * (1. - beta_2_t)
        v_t = state_ops.assign(v, v * beta_2_t, use_locking=self._use_locking)

        with tf.control_dependencies([v_t]):
            v_t = self._resource_scatter_add(v, indices, v_scaled_g_values)
            v_t_slice = array_ops.gather(v_t, indices)

        v_t_prime_denominator = 1. - math_ops.pow(beta_2_t, local_step)
        v_t_prime = v_t_slice / v_t_prime_denominator
        v_prime_sqrt_plus_eps = math_ops.sqrt(v_t_prime) + epsilon_t

        var_t = self._resource_scatter_add(
            var, indices, -self.eta_t * lr_t * m_t_bar / v_prime_sqrt_plus_eps)

        # Weight decays
        if var.name in self.weight_decays.keys():
            var_t = _apply_weight_decays(self, var, var_t)

        var_update = state_ops.assign(var,
                                      var_t,
                                      use_locking=self._use_locking)

        # Cosine annealing
        (iteration_done, t_cur_update,
         eta_t_update) = _update_t_cur_eta_t_v2(self, lr_t, var)
        if iteration_done and not self._init_notified:
            self._init_notified = True

        updates = [var_update, m_t_bar, v_t]
        if iteration_done:
            updates += [t_cur_update]
        if self.use_cosine_annealing and iteration_done:
            updates += [eta_t_update]
        return control_flow_ops.group(*updates)
Ejemplo n.º 32
0
def get_logits_and_prob(logits=None,
                        p=None,
                        multidimensional=False,
                        validate_args=False,
                        name="GetLogitsAndProb"):
    """Converts logits to probabilities and vice-versa, and returns both.

  Args:
    logits: Numeric `Tensor` representing log-odds.
    p: Numeric `Tensor` representing probabilities.
    multidimensional: Given `p` a [N1, N2, ... k] dimensional tensor,
      whether the last dimension represents the probability between k classes.
      This will additionally assert that the values in the last dimension
      sum to one. If `False`, will instead assert that each value is in
      `[0, 1]`.
    validate_args: `Boolean`, default `False`.  Whether to assert `0 <= p <= 1`
      if multidimensional is `False`, otherwise that the last dimension of `p`
      sums to one.
    name: A name for this operation (optional).

  Returns:
    Tuple with `logits` and `p`. If `p` has an entry that is `0` or `1`, then
    the corresponding entry in the returned logits will be `-Inf` and `Inf`
    respectively.

  Raises:
    ValueError: if neither `p` nor `logits` were passed in, or both were.
  """
    with ops.name_scope(name, values=[p, logits]):
        if p is None and logits is None:
            raise ValueError("Must pass p or logits.")
        elif p is not None and logits is not None:
            raise ValueError("Must pass either p or logits, not both.")
        elif p is None:
            logits = array_ops.identity(logits, name="logits")
            with ops.name_scope("p"):
                p = math_ops.sigmoid(logits)
        elif logits is None:
            with ops.name_scope("p"):
                p = array_ops.identity(p)
                if validate_args:
                    one = constant_op.constant(1., p.dtype)
                    dependencies = [check_ops.assert_non_negative(p)]
                    if multidimensional:
                        dependencies += [
                            assert_close(math_ops.reduce_sum(
                                p, reduction_indices=[-1]),
                                         one,
                                         message="p does not sum to 1.")
                        ]
                    else:
                        dependencies += [
                            check_ops.assert_less_equal(
                                p,
                                one,
                                message="p has components greater than 1.")
                        ]
                    p = control_flow_ops.with_dependencies(dependencies, p)
            with ops.name_scope("logits"):
                logits = math_ops.log(p) - math_ops.log(1. - p)
        return (logits, p)
Ejemplo n.º 33
0
def _single_identity_op_at_end():
    inputs = keras.Input(shape=(10, ))
    x = keras.layers.Dense(10)(inputs)
    outputs = array_ops.identity(x)
    return keras.Model(inputs, outputs)
Ejemplo n.º 34
0
 def _get_cross_tower(self):
     device = device_util.canonicalize(device_util.current())
     if device in self._index:
         return array_ops.identity(self._index[device])
     return array_ops.identity(self._primary_var)
Ejemplo n.º 35
0
def _assign_on_device(device, variable, tensor):
    with ops.device(device):
        return variable.assign(array_ops.identity(tensor))
Ejemplo n.º 36
0
 def GraphFn(self, inp):
     # Can use any op that is converted to TRT with int32 inputs
     inp_transposed = array_ops.transpose(inp, [0, 3, 2, 1],
                                          name='transpose_0')
     return array_ops.identity(inp_transposed, name='output_0')
Ejemplo n.º 37
0
        def _model_fn(features, labels, mode, params):
            """A Estimator `model_fn` for NPUEstimator."""
            model_fn_args = function_utils.fn_args(model_fn)
            kwargs = {}
            if 'labels' in model_fn_args:
                kwargs['labels'] = labels
            if 'mode' in model_fn_args:
                kwargs['mode'] = mode
            if 'params' in model_fn_args:
                kwargs['params'] = params
            if 'config' in model_fn_args:
                kwargs['config'] = config
            estimator_spec = model_fn(features=features, **kwargs)
            """
            add hooks:
                NPUInitHook: for all mode, NPUInitHook should be the first session hook
                NPUShutDownHook: for all mode, NPUShutDownHook should be the first session hook
                NPUBroadcastGlobalVariablesHook: train
                NPUCheckpointSaverHook:train
            """
            npu_hooks = []

            if mode == model_fn_lib.ModeKeys.TRAIN:
                if not isinstance(
                        estimator_spec, NPUEstimatorSpec) and not isinstance(
                            estimator_spec, model_fn_lib.EstimatorSpec):
                    raise RuntimeError(
                        'estimator_spec used by NPU train must have type '
                        '`NPUEstimatorSpec` or `EstimatorSpec`. Got {}'.format(
                            type(estimator_spec)))
                # 1. NPUBroadcastGlobalVariablesHook
                rank_size = os.getenv('RANK_SIZE')
                if rank_size != None and rank_size.isdigit(
                ) and int(rank_size) > 1 and not config.horovod_mode:
                    npu_hooks.append(
                        NPUBroadcastGlobalVariablesHook(
                            self.__device_info._root_rank,
                            self.__device_info._index))

                # 2. NPUCheckpointSaverHook
                if config.save_checkpoints_steps or config.save_checkpoints_secs:
                    npu_hooks.append(
                        NPUCheckpointSaverHook(
                            checkpoint_dir=model_dir,
                            save_secs=config.save_checkpoints_secs,
                            save_steps=config.save_checkpoints_steps))

                if isinstance(estimator_spec, NPUEstimatorSpec):
                    if estimator_spec._host_call is not None:
                        host_call = _OutfeedHostCall(mode)
                        host_call.record(
                            {"host_call": estimator_spec._host_call})
                        # add outfeed enqueue op
                        loss, train_op = estimator_spec.loss, estimator_spec.train_op
                        with ops.control_dependencies([train_op]):
                            host_call_outfeed_op = host_call.create_enqueue_op(
                            )
                            with ops.control_dependencies(
                                [host_call_outfeed_op]):
                                loss = array_ops.identity(loss)
                                estimator_spec = estimator_spec._replace(
                                    loss=loss)
                        # add outfeed dnqueue op
                        host_call_ops = host_call.create_npu_hostcall()
                        npu_hooks.append(
                            NPUInfeedOutfeedSessionHook(host_call_ops, mode))
                    npu_hooks.append(NPULogOutfeedSessionHook(sys.stderr))

                # 3. set iterations per loop hook
                if config.iterations_per_loop > 1:
                    npu_hooks.append(
                        SetIterationsVarHook(config.iterations_per_loop))
                    train_op = tf.group(estimator_spec.train_op,
                                        name="IterationOp")
                    estimator_spec = estimator_spec._replace(train_op=train_op)

                train_hooks = estimator_spec.training_hooks
                train_hooks = list(train_hooks or [])
                new_train_hooks = npu_hooks + train_hooks

                estimator_spec = estimator_spec._replace(
                    training_hooks=tuple(new_train_hooks))

            elif mode == model_fn_lib.ModeKeys.EVAL:
                if not isinstance(
                        estimator_spec, NPUEstimatorSpec) and not isinstance(
                            estimator_spec, model_fn_lib.EstimatorSpec):
                    raise RuntimeError(
                        'estimator_spec used by NPU evaluate must have type '
                        '`NPUEstimatorSpec` or `EstimatorSpec`. Got {}'.format(
                            type(estimator_spec)))
                if isinstance(estimator_spec, NPUEstimatorSpec):
                    if estimator_spec._host_call is not None:
                        host_call = _OutfeedHostCall(mode)
                        host_call.record(
                            {"host_call": estimator_spec._host_call})
                        # add outfeed enqueue op
                        loss, train_op = estimator_spec.loss, estimator_spec.train_op
                        with ops.control_dependencies([loss]):
                            host_call_outfeed_op = host_call.create_enqueue_op(
                            )
                            with ops.control_dependencies(
                                [host_call_outfeed_op]):
                                loss = array_ops.identity(loss)
                                estimator_spec = estimator_spec._replace(
                                    loss=loss)
                        # add outfeed dnqueue op
                        host_call_ops = host_call.create_npu_hostcall()
                        npu_hooks.append(
                            NPUInfeedOutfeedSessionHook(host_call_ops, mode))
                    npu_hooks.append(NPULogOutfeedSessionHook(sys.stderr))
                if len(npu_hooks) > 0:
                    evaluation_hooks = estimator_spec.evaluation_hooks
                    evaluation_hooks = list(evaluation_hooks or [])
                    new_evaluation_hooks = npu_hooks + evaluation_hooks
                    estimator_spec = estimator_spec._replace(
                        evaluation_hooks=tuple(new_evaluation_hooks))

            elif mode == model_fn_lib.ModeKeys.PREDICT:
                if len(npu_hooks) > 0:
                    prediction_hooks = estimator_spec.prediction_hooks
                    prediction_hooks = list(prediction_hooks or [])
                    new_prediction_hooks = npu_hooks + prediction_hooks

                    estimator_spec = estimator_spec._replace(
                        prediction_hooks=tuple(new_prediction_hooks))
            return estimator_spec
Ejemplo n.º 38
0
 def _prepare(self, var_list):
     # Get the value of the momentum cache before starting to apply gradients.
     self._m_cache_read = array_ops.identity(self._m_cache)
     return super(NadamW, self)._prepare(var_list)
 def _update_mean_var():
   """Internal function that updates mean and variance during training."""
   with ops.control_dependencies([ema_assign_op]):
     return array_ops_.identity(assign_mean), array_ops_.identity(assign_var)
Ejemplo n.º 40
0
    def _resource_apply_dense(self, grad, var):
        var_dtype = var.dtype.base_dtype
        lr_t = array_ops.identity(self._get_hyper('learning_rate', var_dtype))
        beta_1_t = array_ops.identity(self._get_hyper('beta_1', var_dtype))
        beta_2_t = array_ops.identity(self._get_hyper('beta_2', var_dtype))
        epsilon_t = ops.convert_to_tensor(self.epsilon, var_dtype)
        m = self.get_slot(var, 'm')
        v = self.get_slot(var, 'v')
        local_step = math_ops.cast(self.iterations + 1, var_dtype)
        next_step = math_ops.cast(self.iterations + 2, var_dtype)
        decay_base = math_ops.cast(0.96, var_dtype)

        # Learning rate multipliers
        if self.lr_multipliers is not None:
            lr_t = _apply_lr_multiplier(self, lr_t, var)

        # Due to the recommendations in [2], i.e. warming momentum schedule
        momentum_cache_t = beta_1_t * (
            1. - 0.5 *
            (math_ops.pow(decay_base, self._initial_decay * local_step)))
        momentum_cache_t_1 = beta_1_t * (
            1. - 0.5 *
            (math_ops.pow(decay_base, self._initial_decay * next_step)))
        m_schedule_new = math_ops.cast(self._m_cache_read,
                                       var_dtype) * momentum_cache_t
        if var_dtype is self._m_cache.dtype:
            m_schedule_new = array_ops.identity(
                state_ops.assign(self._m_cache,
                                 m_schedule_new,
                                 use_locking=self._use_locking))
        m_schedule_next = m_schedule_new * momentum_cache_t_1

        # the following equations given in [1]
        g_prime = grad / (1. - m_schedule_new)
        m_t = beta_1_t * m + (1. - beta_1_t) * grad
        m_t_prime = m_t / (1. - m_schedule_next)
        v_t = beta_2_t * v + (1. - beta_2_t) * math_ops.square(grad)
        v_t_prime = v_t / (1. - math_ops.pow(beta_2_t, local_step))
        m_t_bar = (1. - momentum_cache_t) * g_prime + (momentum_cache_t *
                                                       m_t_prime)

        m_t = state_ops.assign(m, m_t, use_locking=self._use_locking)
        v_t = state_ops.assign(v, v_t, use_locking=self._use_locking)

        var_t = math_ops.sub(
            var, self.eta_t * lr_t * m_t_bar /
            (math_ops.sqrt(v_t_prime + epsilon_t)))

        # Weight decays
        if var.name in self.weight_decays.keys():
            var_t = _apply_weight_decays(self, var, var_t)

        var_update = state_ops.assign(var,
                                      var_t,
                                      use_locking=self._use_locking)

        # Cosine annealing
        (iteration_done, t_cur_update,
         eta_t_update) = _update_t_cur_eta_t_v2(self, lr_t, var)
        if iteration_done and not self._init_notified:
            self._init_notified = True

        updates = [var_update, m_t, v_t]
        if iteration_done:
            updates += [t_cur_update]
        if self.use_cosine_annealing and iteration_done:
            updates += [eta_t_update]
        return control_flow_ops.group(*updates)
Ejemplo n.º 41
0
 def call(self, inputs, keyword=None):
   return array_ops.identity(inputs)
Ejemplo n.º 42
0
    def _resource_apply_sparse(self, grad, var, indices):
        var_dtype = var.dtype.base_dtype
        lr_t = self._decayed_lr(var_dtype)
        local_step = math_ops.cast(self.iterations + 1, var_dtype)
        m = self.get_slot(var, 'm')
        v = self.get_slot(var, 'v')
        beta_1_t = array_ops.identity(self._get_hyper('beta_1', var_dtype))
        beta_2_t = array_ops.identity(self._get_hyper('beta_2', var_dtype))
        beta_1_power = math_ops.pow(beta_1_t, local_step)
        beta_2_power = math_ops.pow(beta_2_t, local_step)
        epsilon_t = ops.convert_to_tensor(self.epsilon, var_dtype)

        lr_t = lr_t * math_ops.sqrt(1 - beta_2_power) / (1 - beta_1_power)
        # Learning rate multipliers
        if self.lr_multipliers is not None:
            lr_t = _apply_lr_multiplier(self, lr_t, var)

        m_scaled_g_values = grad * (1 - beta_1_t)
        m_t = state_ops.assign(m, m * beta_1_t, use_locking=self._use_locking)

        # 여기 tf.control_dependencies로 바꿈
        with tf.control_dependencies([m_t]):
            m_t = self._resource_scatter_add(m, indices, m_scaled_g_values)

        v_scaled_g_values = (grad * grad) * (1 - beta_2_t)
        v_t = state_ops.assign(v, v * beta_2_t, use_locking=self._use_locking)
        with tf.control_dependencies([v_t]):
            v_t = self._resource_scatter_add(v, indices, v_scaled_g_values)

        if self.amsgrad:
            vhat = self.get_slot(var, 'vhat')
            vhat_t = state_ops.assign(vhat,
                                      math_ops.maximum(vhat, v_t),
                                      use_locking=self._use_locking)
            var_delta = m_t / (math_ops.sqrt(vhat_t) + epsilon_t)
        else:
            var_delta = m_t / (math_ops.sqrt(v_t) + epsilon_t)
        var_t = math_ops.sub(var, self.eta_t * lr_t * var_delta)

        # Weight decays
        if var.name in self.weight_decays.keys():
            var_t = _apply_weight_decays(self, var, var_t)

        var_update = state_ops.assign(var,
                                      var_t,
                                      use_locking=self._use_locking)

        # Cosine annealing
        (iteration_done, t_cur_update,
         eta_t_update) = _update_t_cur_eta_t_v2(self, lr_t, var)
        if iteration_done and not self._init_notified:
            self._init_notified = True

        updates = [var_update, m_t, v_t]
        if iteration_done:
            updates += [t_cur_update]
        if self.use_cosine_annealing and iteration_done:
            updates += [eta_t_update]
        if self.amsgrad:
            updates.append(vhat_t)
        return control_flow_ops.group(*updates)
Ejemplo n.º 43
0
 def f():
   dataset = dataset_ops.Dataset.range(10)
   dataset = array_ops.identity(dataset)
   return dataset
Ejemplo n.º 44
0
def _flatten_tensor(tensor, sequence_mask, expected_length):
    """Flattens the two first dimensions and reshapes a tensor or sparse tensor.

  If `tensor` is a dense tensor, the sequence_mask is used to infer valid
  inputs.

  Note: If `tensor` is a `SparseTensor` and the indices are not sorted, they
  will be reordered.

  Args:
    tensor: A `Tensor` or `SparseTensor` of dimension at least 2, of shape
      [batch_size, seq_length, D0, D1, ..., DN].
    sequence_mask: A boolean `Tensor` of shape [batch_size, seq_length].
    expected_length: A integer scalar `Tensor` with the expected length of the
      resulting flattenned Tensor.

  Returns:
    A `Tensor` object of shape [expected_length, D0, D1, ..., DN].

  Raises:
    ValueError: If `tensor` has not at least 2 dimensions.
    ValueError: If `tensor` is not a `Tensor` or `SparseTensor` object.
    InvalidArgumentError: If the resulting `Tensor` doesn't have the expected
      length.
  """
    shape = tensor.get_shape()
    if shape.ndims < 2:
        raise ValueError(
            'Input tensor expected to have at least 2 dimensions, '
            'got {} instead.'.format(shape.ndims))
    if isinstance(tensor, sparse_tensor.SparseTensor):
        # What follows depends on the indices ordering. Hence we reorder the indices
        # to ensure correctness.
        flat_tensor = sparse_ops.sparse_reorder(tensor).values
        if shape.ndims > 2:
            new_shape = array_ops.concat([[-1], shape[2:]], axis=0)
            flat_tensor = array_ops.reshape(tensor.values, new_shape)
    elif isinstance(tensor, ops.Tensor):
        flat_tensor = array_ops.boolean_mask_v2(tensor, sequence_mask)
    else:
        raise ValueError(
            '`tensor` expected to be a `Tensor` or  `SparseTensor` '
            'got `{}` instead.'.format(tensor))
    if shape.ndims == 2:
        flat_tensor = array_ops.expand_dims(flat_tensor, -1)
        expected_shape = array_ops.concat([[expected_length], [1]], axis=0)
    else:
        expected_shape = array_ops.concat([[expected_length], shape[2:]],
                                          axis=0)

    # TODO(b/119617064): Unify eager and graph implementations.
    err_message = 'Tensor shape is incompatible with provided mask.'
    if context.executing_eagerly():
        if flat_tensor._shape_tuple() != tuple(expected_shape.numpy()):  # pylint: disable=protected-access
            raise ValueError(err_message)
        return flat_tensor
    with ops.control_dependencies([
            check_ops.assert_equal(array_ops.shape(flat_tensor),
                                   expected_shape,
                                   message=err_message)
    ]):
        return array_ops.identity(flat_tensor)
Ejemplo n.º 45
0
 def read_var(self, replica_local_var):
     """Read the aggregate value of a replica-local variable."""
     if isinstance(replica_local_var, values.ReplicaLocalVariable):
         return replica_local_var._get_cross_replica()  # pylint: disable=protected-access
     assert isinstance(replica_local_var, values.Mirrored)
     return array_ops.identity(replica_local_var.get())
Ejemplo n.º 46
0
 def call(self, inputs, training=True):
   return control_flow_util.smart_cond(training, lambda: inputs * 0,
                                       lambda: array_ops.identity(inputs))
Ejemplo n.º 47
0
 def training_loss(self, features, labels, data_spec=None,
                   name='training_loss'):
   return array_ops.identity(
       self._get_loss(features, labels, data_spec=data_spec), name=name)
Ejemplo n.º 48
0
 def fn(x):
     with ops.control_dependencies([check_ops.assert_equal(x, 0)]):
         return array_ops.identity(x)
Ejemplo n.º 49
0
 def testBatchReduceToDeviceTensors(self, strategy):
   value = strategy.run(lambda: array_ops.identity(1.))
   reduced = strategy.extended.batch_reduce_to(reduce_util.ReduceOp.SUM,
                                               [(value, value),
                                                (value, value)])
   self.assertAllEqual([2., 2.], reduced)
Ejemplo n.º 50
0
 def mode(self, name="mode"):
     """Mode of each batch member."""
     with ops.name_scope(self.name):
         with ops.op_scope([self._mu], name):
             return array_ops.identity(self._mu)
Ejemplo n.º 51
0
 def testBatchReduceToHostTensor(self, strategy):
   value = array_ops.identity(1.)
   reduced = strategy.extended.batch_reduce_to(reduce_util.ReduceOp.SUM,
                                               [(value, value),
                                                (value, value)])
   self.assertAllEqual([2., 2.], reduced)
Ejemplo n.º 52
0
def dynamic_crnn(cell,
                 inputs,
                 gate_vector,
                 sequence_length=None,
                 initial_state=None,
                 dtype=None,
                 parallel_iterations=None,
                 swap_memory=False,
                 time_major=False,
                 scope=None):

    if not _like_rnncell(cell):
        raise TypeError("cell must be an instance of RNNCell")
    flat_input = nest.flatten(inputs)

    if not time_major:
        # (B,T,D) => (T,B,D)
        flat_input = [ops.convert_to_tensor(input_) for input_ in flat_input]
        flat_input = tuple(
            rnn._transpose_batch_time(input_) for input_ in flat_input)

    parallel_iterations = parallel_iterations or 32
    if sequence_length is not None:
        sequence_length = math_ops.to_int32(sequence_length)
        if sequence_length.get_shape().ndims not in (None, 1):
            raise ValueError(
                "sequence_length must be a vector of length batch_size, "
                "but saw shape: %s" % sequence_length.get_shape())
        sequence_length = array_ops.identity(  # Just to find it in the graph.
            sequence_length,
            name="sequence_length")

    with vs.variable_scope(scope or "rnn") as varscope:
        if varscope.caching_device is None:
            varscope.set_caching_device(lambda op: op.device)
        batch_size = rnn._best_effort_input_batch_size(flat_input)

        if initial_state is not None:
            state = initial_state
        else:
            if not dtype:
                raise ValueError(
                    "If there is no initial_state, you must give a dtype.")
            state = cell.zero_state(batch_size, dtype)

        def _assert_has_shape(x, shape):
            x_shape = array_ops.shape(x)
            packed_shape = array_ops.stack(shape)
            return rnn.control_flow_ops.Assert(
                math_ops.reduce_all(math_ops.equal(x_shape, packed_shape)), [
                    "Expected shape for Tensor %s is " % x.name, packed_shape,
                    " but saw shape: ", x_shape
                ])

        if sequence_length is not None:
            # Perform some shape validation
            with ops.control_dependencies(
                [_assert_has_shape(sequence_length, [batch_size])]):
                sequence_length = array_ops.identity(sequence_length,
                                                     name="CheckSeqLen")

        inputs = nest.pack_sequence_as(structure=inputs,
                                       flat_sequence=flat_input)
        (outputs, final_state) = _dynamic_crnn_loop(
            cell,
            inputs,
            state,
            parallel_iterations=parallel_iterations,
            gate_vector=gate_vector,
            swap_memory=swap_memory,
            sequence_length=sequence_length,
            dtype=dtype)
        if not time_major:
            # (T,B,D) => (B,T,D)
            outputs = nest.map_structure(rnn._transpose_batch_time, outputs)
        return (outputs, final_state)
Ejemplo n.º 53
0
 def testReduceHostTensor(self, strategy):
   reduced = strategy.reduce(
       reduce_util.ReduceOp.SUM, array_ops.identity(1.), axis=None)
   self.assertEqual(reduced.numpy(), 2.)
Ejemplo n.º 54
0
 def testReduceToDeviceTensors(self, strategy):
   value = strategy.run(lambda: array_ops.identity(1.))
   reduced = strategy.extended.reduce_to(reduce_util.ReduceOp.SUM, value,
                                         value)
   self.assertEqual(reduced.numpy(), 2.)
Ejemplo n.º 55
0
 def _prepare_local(self, var_device, var_dtype, apply_state):
     super(SGD, self)._prepare_local(var_device, var_dtype, apply_state)
     apply_state[(var_device, var_dtype)]["momentum"] = array_ops.identity(
         self._get_hyper("momentum", var_dtype))
Ejemplo n.º 56
0
 def testReduceToHostTensor(self, strategy):
   value = array_ops.identity(1.)
   reduced = strategy.extended.reduce_to(reduce_util.ReduceOp.SUM, value,
                                         value)
   self.assertEqual(reduced.numpy(), 2.)
Ejemplo n.º 57
0
    def _renorm_correction_and_moments(self, mean, variance, training):
        """Returns the correction and update values for renorm."""
        stddev = math_ops.sqrt(variance + self.epsilon)
        # Compute the average mean and standard deviation, as if they were
        # initialized with this batch's moments.
        mixed_renorm_mean = (self.renorm_mean +
                             (1. - self.renorm_mean_weight) * mean)
        mixed_renorm_stddev = (self.renorm_stddev +
                               (1. - self.renorm_stddev_weight) * stddev)
        # Compute the corrections for batch renorm.
        r = stddev / mixed_renorm_stddev
        d = (mean - mixed_renorm_mean) / mixed_renorm_stddev
        # Ensure the corrections use pre-update moving averages.
        with ops.control_dependencies([r, d]):
            mean = array_ops.identity(mean)
            stddev = array_ops.identity(stddev)
        rmin, rmax, dmax = [
            self.renorm_clipping.get(key) for key in ['rmin', 'rmax', 'dmax']
        ]
        if rmin is not None:
            r = math_ops.maximum(r, rmin)
        if rmax is not None:
            r = math_ops.minimum(r, rmax)
        if dmax is not None:
            d = math_ops.maximum(d, -dmax)
            d = math_ops.minimum(d, dmax)
        # When not training, use r=1, d=0.
        r = tf_utils.smart_cond(training, lambda: r,
                                lambda: array_ops.ones_like(r))
        d = tf_utils.smart_cond(training, lambda: d,
                                lambda: array_ops.zeros_like(d))

        def _update_renorm_variable(var, weight, value):
            """Updates a moving average and weight, returns the unbiased value."""
            value = array_ops.identity(value)

            def _do_update():
                """Updates the var and weight, returns their updated ratio."""
                # Update the variables without zero debiasing. The debiasing will be
                # accomplished by dividing the exponential moving average by the weight.
                # For example, after a single update, the moving average would be
                # (1-decay) * value. and the weight will be 1-decay, with their ratio
                # giving the value.
                # Make sure the weight is not updated until before r and d computation.
                with ops.control_dependencies([value]):
                    weight_value = array_ops.constant(1., dtype=weight.dtype)
                new_var = self._assign_moving_average(var, value,
                                                      self.renorm_momentum)
                new_weight = self._assign_moving_average(
                    weight, weight_value, self.renorm_momentum)
                # TODO(yuefengz): the updates to var and weighted can not be batched
                # together if we fetch their updated values here. Consider calculating
                # new values and delaying the updates.
                return new_var / new_weight

            def _fake_update():
                return array_ops.identity(var)

            return tf_utils.smart_cond(training, _do_update, _fake_update)

        # TODO(yuefengz): colocate the operations
        new_mean = _update_renorm_variable(self.renorm_mean,
                                           self.renorm_mean_weight, mean)
        new_stddev = _update_renorm_variable(self.renorm_stddev,
                                             self.renorm_stddev_weight, stddev)
        # Make sqrt(moving_variance + epsilon) = new_stddev.
        new_variance = math_ops.square(new_stddev) - self.epsilon

        return (r, d, new_mean, new_variance)
Ejemplo n.º 58
0
 def replica_fn():
   value = array_ops.identity(1.0)
   reduced = strategy.extended._replica_ctx_all_reduce(
       reduce_util.ReduceOp.SUM, value)
   return reduced
Ejemplo n.º 59
0
 def send():
   s0 = collective_ops.broadcast_send(
       c * 3, c.shape, c.dtype, group_size=2, group_key=1, instance_key=1)
   with ops.control_dependencies([s0.op]):
     return array_ops.identity(c)
Ejemplo n.º 60
0
 def _fake_update():
     return array_ops.identity(var)