Example #1
0
    def GraphFn(self, x1, x2):
        x = x1
        q = math_ops.abs(x)
        q = q + 1.0
        q = gen_math_ops.exp(q)
        q = gen_math_ops.log(q)
        q = array_ops.squeeze(q, axis=-2)
        q = math_ops.abs(q)
        q = q + 2.2
        q = gen_math_ops.sqrt(q)
        q = gen_math_ops.rsqrt(q)
        q = math_ops.negative(q)
        q = array_ops.squeeze(q, axis=3)
        q = math_ops.abs(q)
        q = q + 3.0
        a = gen_math_ops.reciprocal(q)

        # this chain of operations has a batch size of 5, which is different from
        # the batch size for the other operations.
        x = constant_op.constant(np.random.randn(5, 8, 12), dtype=x.dtype)
        q = math_ops.abs(x)
        q = q + 2.0
        q = gen_math_ops.exp(q)
        q = gen_math_ops.log(q)
        q = math_ops.abs(q)
        q = q + 2.1
        q = gen_math_ops.sqrt(q)
        q = gen_math_ops.rsqrt(q)
        q = math_ops.negative(q)
        q = math_ops.abs(q)
        q = q + 4.0
        b = gen_math_ops.reciprocal(q)

        # TODO(jie): this one will break, broadcasting on batch.
        x = x2
        q = math_ops.abs(x)
        q = q + 5.0
        q = gen_math_ops.exp(q)
        q = array_ops.squeeze(q, axis=[-1, -2, 3])
        q = gen_math_ops.log(q)
        q = math_ops.abs(q)
        q = q + 5.1
        q = gen_array_ops.reshape(q, [12, 5, 1, 1, 8, 1, 12])
        q = array_ops.squeeze(q, axis=[5, 2, 3])
        q = gen_math_ops.sqrt(q)
        q = math_ops.abs(q)
        q = q + 5.2
        q = gen_math_ops.rsqrt(q)
        q = math_ops.negative(q)
        q = math_ops.abs(q)
        q = q + 5.3
        c = gen_math_ops.reciprocal(q)

        q = a * b
        q = q / c
        return array_ops.squeeze(q, name="output_0")
Example #2
0
    def testCloneSplit(self):
        # a -> b -> c
        #       \-> d
        g = ops.Graph()
        with g.as_default():
            a = array_ops.constant(1., name="a")
            b = math_ops.exp(a, name="b")
            c = math_ops.log(b, name="c")
            d = math_ops.negative(b, name="d")

            b_new = array_ops.constant(math.e**2, name="b_new")
            d_new = array_ops.constant(-math.e**2, name="d_new")

            # case 1
            d_out = meta_graph.clone(d, "copy1")
            self.assertEqual(d_out.name, "copy1/d:0")
            self.assertEqual(d_out.op.inputs[:], [b])

            # case 2
            copies = meta_graph.clone([c, d], "copy2")
            self.assertEqual(copies[0].op.inputs[:], [b])
            self.assertEqual(copies[1].op.inputs[:], [b])

            # case 3
            copies = meta_graph.clone([c, d], "copy3", replace={b: b_new})
            with self.test_session(use_gpu=True) as sess:
                c_out_, d_out_ = sess.run(copies)
            self.assertNear(c_out_, 2., 1e-6)
            self.assertNear(d_out_, -math.e**2, 1e-6)

            # case 4
            c_out = meta_graph.clone(c, "copy4", replace={d: d_new})
            self.assertEqual(c_out.op.inputs[:], [b])
            with self.test_session(use_gpu=True) as sess:
                self.assertNear(sess.run(c_out), 1., 1e-6)
Example #3
0
    def testInitializerFunction(self):
        value = [[-42], [133.7]]
        shape = [2, 1]
        with self.test_session():
            initializer = lambda: constant_op.constant(value)

            v1 = variables.Variable(initializer, dtype=dtypes.float32)
            self.assertEqual(shape, v1.get_shape())
            self.assertAllClose(value, v1.initial_value.eval())
            with self.assertRaises(errors_impl.FailedPreconditionError):
                v1.eval()

            v2 = variables.Variable(math_ops.negative(v1.initialized_value()),
                                    dtype=dtypes.float32)
            self.assertEqual(v1.get_shape(), v2.get_shape())
            self.assertAllClose(np.negative(value), v2.initial_value.eval())

            # Once v2.initial_value.eval() has been called, v1 has effectively been
            # initialized.
            self.assertAllClose(value, v1.eval())

            with self.assertRaises(errors_impl.FailedPreconditionError):
                v2.eval()
            variables.global_variables_initializer().run()
            self.assertAllClose(np.negative(value), v2.eval())
Example #4
0
  def testDispatchForUnaryElementwiseAPIs(self):

    @dispatch.dispatch_for_unary_elementwise_apis(MaskedTensor)
    def unary_elementwise_api_handler(api_func, x):
      return MaskedTensor(api_func(x.values), x.mask)

    try:
      x = MaskedTensor([1, -2, -3], [True, True, False])
      # Test calls with positional & keyword argument (& combinations)
      abs_x = math_ops.abs(x)
      sign_x = math_ops.sign(x=x)
      neg_x = math_ops.negative(x, "neg_x")
      invert_x = bitwise_ops.invert(x, name="invert_x")
      ones_like_x = array_ops.ones_like(x, name="ones_like_x")
      ones_like_x_float = array_ops.ones_like(
          x, dtypes.float32, name="ones_like_x_float")
      self.assertAllEqual(abs_x.values, [1, 2, 3])
      self.assertAllEqual(sign_x.values, [1, -1, -1])
      self.assertAllEqual(neg_x.values, [-1, 2, 3])
      self.assertAllEqual(invert_x.values, [-2, 1, 2])
      self.assertAllEqual(ones_like_x.values, [1, 1, 1])
      self.assertAllEqual(ones_like_x_float.values, [1., 1., 1.])
      for result in [
          abs_x, sign_x, neg_x, invert_x, ones_like_x, ones_like_x_float
      ]:
        self.assertAllEqual(result.mask, [True, True, False])
      if not context.executing_eagerly():  # names not defined in eager mode.
        self.assertRegex(neg_x.values.name, r"^neg_x/Neg:.*")
        self.assertRegex(invert_x.values.name, r"^invert_x/.*")
        self.assertRegex(ones_like_x.values.name, r"^ones_like_x/.*")
        self.assertRegex(ones_like_x_float.values.name,
                         r"^ones_like_x_float/.*")

    finally:
      dispatch.unregister_elementwise_api_handler(unary_elementwise_api_handler)
Example #5
0
  def testSideEffect(self):
    a = constant_op.constant(1)
    b = constant_op.constant(1)
    c = math_ops.add(a, b)
    with ops.control_dependencies([c]):
      d = constant_op.constant(42)
    n = math_ops.negative(c)

    shared = []

    def sub(t):
      shared.append(t)
      return t

    c0 = c
    self.assertTrue(c0.op in d.op.control_inputs)
    c = subscribe.subscribe(c,
                            lambda t: script_ops.py_func(sub, [t], [t.dtype]))
    # Verify that control dependencies are correctly moved to the subscription.
    self.assertFalse(c0.op in d.op.control_inputs)
    self.assertTrue(c.op in d.op.control_inputs)

    with self.cached_session() as sess:
      c_out = self.evaluate([c])
      n_out = self.evaluate([n])
      d_out = self.evaluate([d])

    self.assertEqual(n_out, [-2])
    self.assertEqual(c_out, [2])
    self.assertEqual(d_out, [42])
    self.assertEqual(shared, [2, 2, 2])
Example #6
0
  def testInitializerFunction(self):
    value = [[-42], [133.7]]
    shape = [2, 1]
    with self.test_session():
      initializer = lambda: constant_op.constant(value)

      v1 = variables.Variable(initializer, dtype=dtypes.float32)
      self.assertEqual(shape, v1.get_shape())
      self.assertEqual(shape, v1.shape)
      self.assertAllClose(value, v1.initial_value.eval())
      with self.assertRaises(errors_impl.FailedPreconditionError):
        v1.eval()

      v2 = variables.Variable(
          math_ops.negative(v1.initialized_value()), dtype=dtypes.float32)
      self.assertEqual(v1.get_shape(), v2.get_shape())
      self.assertEqual(v1.shape, v2.shape)
      self.assertAllClose(np.negative(value), v2.initial_value.eval())

      # Once v2.initial_value.eval() has been called, v1 has effectively been
      # initialized.
      self.assertAllClose(value, v1.eval())

      with self.assertRaises(errors_impl.FailedPreconditionError):
        v2.eval()
      variables.global_variables_initializer().run()
      self.assertAllClose(np.negative(value), v2.eval())
Example #7
0
  def testSideEffect(self):
    a = constant_op.constant(1)
    b = constant_op.constant(1)
    c = math_ops.add(a, b)
    with ops.control_dependencies([c]):
      d = constant_op.constant(42)
    n = math_ops.negative(c)

    shared = []

    def sub(t):
      shared.append(t)
      return t

    c = subscribe.subscribe(c,
                            lambda t: script_ops.py_func(sub, [t], [t.dtype]))

    with self.test_session() as sess:
      c_out = sess.run([c])
      n_out = sess.run([n])
      d_out = sess.run([d])

    self.assertEquals(n_out, [-2])
    self.assertEquals(c_out, [2])
    self.assertEquals(d_out, [42])
    self.assertEquals(shared, [2, 2, 2])
  def testSideEffect(self):
    a = constant_op.constant(1)
    b = constant_op.constant(1)
    c = math_ops.add(a, b)
    with ops.control_dependencies([c]):
      d = constant_op.constant(42)
    n = math_ops.negative(c)

    shared = []

    def sub(t):
      shared.append(t)
      return t

    c0 = c
    self.assertTrue(c0.op in d.op.control_inputs)
    c = subscribe.subscribe(c,
                            lambda t: script_ops.py_func(sub, [t], [t.dtype]))
    # Verify that control dependencies are correctly moved to the subscription.
    self.assertFalse(c0.op in d.op.control_inputs)
    self.assertTrue(c.op in d.op.control_inputs)

    with self.cached_session() as sess:
      c_out = sess.run([c])
      n_out = sess.run([n])
      d_out = sess.run([d])

    self.assertEqual(n_out, [-2])
    self.assertEqual(c_out, [2])
    self.assertEqual(d_out, [42])
    self.assertEqual(shared, [2, 2, 2])
Example #9
0
  def setUp(self):
    self.a = variables.VariableV1(2.0, name="a")
    self.b = variables.VariableV1(3.0, name="b")

    self.c = math_ops.multiply(self.a, self.b, name="c")  # Should be 6.0.
    self.d = math_ops.multiply(self.a, self.a, name="d")  # Should be 4.0.

    self.e = math_ops.multiply(self.d, self.c, name="e")  # Should be 24.0.

    self.f_y = constant_op.constant(0.30, name="f_y")
    self.f = math_ops.div(self.b, self.f_y, name="f")  # Should be 10.0.

    # The there nodes x, y and z form a graph with "cross-links" in. I.e., x
    # and y are both direct inputs to z, but x is also a direct input to y.
    self.x = variables.VariableV1(2.0, name="x")  # Should be 2.0
    self.y = math_ops.negative(self.x, name="y")  # Should be -2.0.

    self.z = math_ops.multiply(self.x, self.y, name="z")  # Should be -4.0.

    rewriter_config = rewriter_config_pb2.RewriterConfig(
        disable_model_pruning=True,
        arithmetic_optimization=rewriter_config_pb2.RewriterConfig.OFF,
        constant_folding=rewriter_config_pb2.RewriterConfig.OFF)
    graph_options = config_pb2.GraphOptions(rewrite_options=rewriter_config)
    config = config_pb2.ConfigProto(graph_options=graph_options)
    self.sess = session.Session(config=config)
    self.sess.run(variables.global_variables_initializer())
Example #10
0
    def testSideEffect(self):
        a = constant_op.constant(1)
        b = constant_op.constant(1)
        c = math_ops.add(a, b)
        with ops.control_dependencies([c]):
            d = constant_op.constant(42)
        n = math_ops.negative(c)

        shared = []

        def sub(t):
            shared.append(t)
            return t

        c = subscribe.subscribe(
            c, lambda t: script_ops.py_func(sub, [t], [t.dtype]))

        with self.test_session() as sess:
            c_out = sess.run([c])
            n_out = sess.run([n])
            d_out = sess.run([d])

        self.assertEquals(n_out, [-2])
        self.assertEquals(c_out, [2])
        self.assertEquals(d_out, [42])
        self.assertEquals(shared, [2, 2, 2])
Example #11
0
 def wrong_outputs_callback(op_type,
                            inputs,
                            attrs,
                            outputs,
                            op_name=None,
                            graph=None):
   del op_type, inputs, attrs, op_name, graph  # Unused.
   return outputs[0], math_ops.negative(outputs[0])
Example #12
0
 def decayed_lr():
   """Helper to recompute learning rate; most helpful in eager-mode."""
   global_step_recomp = math_ops.cast(global_step, dtype)
   p = global_step_recomp / decay_steps
   if staircase:
     p = math_ops.floor(p)
   exponent = math_ops.exp(
       math_ops.multiply(math_ops.negative(decay_rate), p))
   return math_ops.multiply(learning_rate, exponent, name=name)
    def testTransposeNegate2(self):
        with ops.device("/device:IPU:0"):
            with session_lib.Session() as sess:
                pa = array_ops.placeholder(np.float32, [2, 2, 3], name="a")
                a = array_ops.transpose(pa, [1, 2, 0])
                b = math_ops.negative(a)

                sess.run(variables.global_variables_initializer())

                fd = {pa: [[[1, 2, 3], [3, 4, 5]], [[5, 6, 7], [7, 8, 9]]]}
                result = sess.run(b, fd)
                self.assertAllClose(result, [[[-1, -5], [-2, -6], [-3, -7]],
                                             [[-3, -7], [-4, -8], [-5, -9]]])
Example #14
0
def _FloorModGrad(op, grad):
  """Returns grad * (1, -floor(x/y))."""
  x = math_ops.conj(op.inputs[0])
  y = math_ops.conj(op.inputs[1])

  sx = array_ops.shape(x)
  sy = array_ops.shape(y)
  rx, ry = gen_array_ops.broadcast_gradient_args(sx, sy)
  floor_xy = math_ops.floor_div(x, y)
  gx = array_ops.reshape(math_ops.reduce_sum(grad, rx), sx)
  gy = array_ops.reshape(
      math_ops.reduce_sum(grad * math_ops.negative(floor_xy), ry), sy)
  return gx, gy
Example #15
0
def _FloorModGrad(op, grad):
    """Returns grad * (1, -floor(x/y))."""
    x = math_ops.conj(op.inputs[0])
    y = math_ops.conj(op.inputs[1])

    sx = array_ops.shape(x)
    sy = array_ops.shape(y)
    rx, ry = gen_array_ops.broadcast_gradient_args(sx, sy)
    floor_xy = math_ops.floor_div(x, y)
    gx = array_ops.reshape(math_ops.reduce_sum(grad, rx), sx)
    gy = array_ops.reshape(
        math_ops.reduce_sum(grad * math_ops.negative(floor_xy), ry), sy)
    return gx, gy
Example #16
0
def _XDivyGrad(op, grad):
  """Returns gradient of xdivy(x, y) with respect to x and y."""
  x = op.inputs[0]
  y = op.inputs[1]
  sx = array_ops.shape(x)
  sy = array_ops.shape(y)
  rx, ry = gen_array_ops.broadcast_gradient_args(sx, sy)
  with ops.control_dependencies([grad]):
    not_zero_x = math_ops.cast(
        math_ops.not_equal(x, math_ops.cast(0., dtype=x.dtype)), dtype=x.dtype)
    partial_x = gen_math_ops.xdivy(not_zero_x, y)
    partial_y = gen_math_ops.xdivy(math_ops.negative(x), y**2)
    return (array_ops.reshape(math_ops.reduce_sum(partial_x * grad, rx), sx),
            array_ops.reshape(math_ops.reduce_sum(partial_y * grad, ry), sy))
Example #17
0
def _XDivyGrad(op, grad):
  """Returns gradient of xdivy(x, y) with respect to x and y."""
  x = op.inputs[0]
  y = op.inputs[1]
  sx = array_ops.shape(x)
  sy = array_ops.shape(y)
  rx, ry = gen_array_ops.broadcast_gradient_args(sx, sy)
  with ops.control_dependencies([grad]):
    not_zero_x = math_ops.cast(
        math_ops.not_equal(x, math_ops.cast(0., dtype=x.dtype)), dtype=x.dtype)
    partial_x = gen_math_ops.xdivy(not_zero_x, y)
    partial_y = gen_math_ops.xdivy(math_ops.negative(x), y**2)
    return (array_ops.reshape(math_ops.reduce_sum(partial_x * grad, rx), sx),
            array_ops.reshape(math_ops.reduce_sum(partial_y * grad, ry), sy))
  def _createGraph(self):
    """Create graph for testing.

    Returns:
      Python Graph object.
    """
    with ops.Graph().as_default() as graph:
      with ops.device("/job:worker/task:0/cpu:0"):
        self.a = variables.VariableV1(10.0, name="a")
        self.b = variables.VariableV1(100.0, name="b")
        self.inc_a = state_ops.assign_add(self.a, 2.0, name="inc_a")
        self.dec_b = state_ops.assign_add(self.b, -5.0, name="dec_b")
        self.p = math_ops.multiply(self.inc_a, self.dec_b, name="p")
        self.q = math_ops.negative(self.p, name="q")
    return graph
    def _createGraph(self):
        """Create graph for testing.

    Returns:
      Python Graph object.
    """
        with ops.Graph().as_default() as graph:
            with ops.device("/job:worker/task:0/cpu:0"):
                self.a = variables.VariableV1(10.0, name="a")
                self.b = variables.VariableV1(100.0, name="b")
                self.inc_a = state_ops.assign_add(self.a, 2.0, name="inc_a")
                self.dec_b = state_ops.assign_add(self.b, -5.0, name="dec_b")
                self.p = math_ops.multiply(self.inc_a, self.dec_b, name="p")
                self.q = math_ops.negative(self.p, name="q")
        return graph
  def decayed_lr(learning_rate, global_step, decay_steps, decay_rate, staircase,
                 name):
    """Helper to recompute learning rate; most helpful in eager-mode."""
    with ops.name_scope(name, "NaturalExpDecay",
                        [learning_rate, global_step, decay_rate]) as name:
      learning_rate = ops.convert_to_tensor(learning_rate, name="learning_rate")
      dtype = learning_rate.dtype
      decay_steps = math_ops.cast(decay_steps, dtype)
      decay_rate = math_ops.cast(decay_rate, dtype)

      global_step_recomp = math_ops.cast(global_step, dtype)
      p = global_step_recomp / decay_steps
      if staircase:
        p = math_ops.floor(p)
      exponent = math_ops.exp(
          math_ops.multiply(math_ops.negative(decay_rate), p))
      return math_ops.multiply(learning_rate, exponent, name=name)
  def decayed_lr(learning_rate, global_step, decay_steps, decay_rate, staircase,
                 name):
    """Helper to recompute learning rate; most helpful in eager-mode."""
    with ops.name_scope(name, "NaturalExpDecay",
                        [learning_rate, global_step, decay_rate]) as name:
      learning_rate = ops.convert_to_tensor(learning_rate, name="learning_rate")
      dtype = learning_rate.dtype
      decay_steps = math_ops.cast(decay_steps, dtype)
      decay_rate = math_ops.cast(decay_rate, dtype)

      global_step_recomp = math_ops.cast(global_step, dtype)
      p = global_step_recomp / decay_steps
      if staircase:
        p = math_ops.floor(p)
      exponent = math_ops.exp(
          math_ops.multiply(math_ops.negative(decay_rate), p))
      return math_ops.multiply(learning_rate, exponent, name=name)
Example #22
0
    def testGetBackwardOpsSplit(self):
        # a -> b -> c
        #       \-> d
        a = array_ops.placeholder(dtypes.float32)
        b = math_ops.exp(a)
        c = math_ops.log(b)
        d = math_ops.negative(b)
        self.assertEqual(meta_graph._get_backward_ops([d]), [a.op, b.op, d.op])
        self.assertEqual(meta_graph._get_backward_ops([c]), [a.op, b.op, c.op])
        self.assertEqual(meta_graph._get_backward_ops([c, d]),
                         [a.op, b.op, c.op, d.op])
        self.assertEqual(meta_graph._get_backward_ops([b, d]),
                         [a.op, b.op, d.op])
        self.assertEqual(meta_graph._get_backward_ops([a, d]),
                         [a.op, b.op, d.op])

        self.assertEqual(meta_graph._get_backward_ops([c, d], as_inputs=[b]),
                         [c.op, d.op])
        self.assertEqual(meta_graph._get_backward_ops([c], as_inputs=[d]),
                         [a.op, b.op, c.op])
Example #23
0
  def setUp(self):
    self.a = variables.Variable(2.0, name="a")
    self.b = variables.Variable(3.0, name="b")

    self.c = math_ops.multiply(self.a, self.b, name="c")  # Should be 6.0.
    self.d = math_ops.multiply(self.a, self.a, name="d")  # Should be 4.0.

    self.e = math_ops.multiply(self.d, self.c, name="e")  # Should be 24.0.

    self.f_y = constant_op.constant(0.30, name="f_y")
    self.f = math_ops.div(self.b, self.f_y, name="f")  # Should be 10.0.

    # The there nodes x, y and z form a graph with "cross-links" in. I.e., x
    # and y are both direct inputs to z, but x is also a direct input to y.
    self.x = variables.Variable(2.0, name="x")  # Should be 2.0
    self.y = math_ops.negative(self.x, name="y")  # Should be -2.0.

    self.z = math_ops.multiply(self.x, self.y, name="z")  # Should be -4.0.

    self.sess = session.Session()
    self.sess.run(variables.global_variables_initializer())
Example #24
0
  def setUp(self):
    self.a = variables.Variable(2.0, name="a")
    self.b = variables.Variable(3.0, name="b")

    self.c = math_ops.multiply(self.a, self.b, name="c")  # Should be 6.0.
    self.d = math_ops.multiply(self.a, self.a, name="d")  # Should be 4.0.

    self.e = math_ops.multiply(self.d, self.c, name="e")  # Should be 24.0.

    self.f_y = constant_op.constant(0.30, name="f_y")
    self.f = math_ops.div(self.b, self.f_y, name="f")  # Should be 10.0.

    # The there nodes x, y and z form a graph with "cross-links" in. I.e., x
    # and y are both direct inputs to z, but x is also a direct input to y.
    self.x = variables.Variable(2.0, name="x")  # Should be 2.0
    self.y = math_ops.negative(self.x, name="y")  # Should be -2.0.

    self.z = math_ops.multiply(self.x, self.y, name="z")  # Should be -4.0.

    self.sess = session.Session()
    self.sess.run(variables.global_variables_initializer())
Example #25
0
    def testCloneBridge(self):
        # a -> b -> c -> d -> e
        #       \  ---  /
        g = ops.Graph()
        with g.as_default():
            a = array_ops.constant([2], dtype=dtypes.int32, name='a')
            b = array_ops.identity(a, name='b')
            c = math_ops.negative(b, name='c')
            d = array_ops.tile(c, b, name='d')
            e = math_ops.square(d, name='e')

            a_new = array_ops.constant([3], dtype=dtypes.int32, name='a_new')
            b_new = array_ops.constant([4], dtype=dtypes.int32, name='b_new')
            c_new = array_ops.constant([5], dtype=dtypes.int32, name='c_new')
            d_new = array_ops.constant([5, 5, 5], name='d_new')

            # case 1
            copies = meta_graph.clone([d, e],
                                      "copy1",
                                      replace={
                                          a: a_new,
                                          c: c_new
                                      })
            with self.test_session(use_gpu=True) as sess:
                d_out_, e_out_ = sess.run(copies)
            self.assertAllClose(d_out_, np.array([5, 5, 5]))
            self.assertAllClose(e_out_, np.array([25, 25, 25]))

            # case 2
            copies = meta_graph.clone([c, e],
                                      "copy2",
                                      replace={
                                          a: a_new,
                                          b: b_new,
                                          d: d_new
                                      })
            with self.test_session(use_gpu=True) as sess:
                c_out_, e_out_ = sess.run(copies)
            self.assertAllClose(c_out_, [-4])
            self.assertAllClose(e_out_, np.array([25, 25, 25]))
  def testInitializerFunction(self):
    value = [[-42], [133.7]]
    shape = [2, 1]
    with self.cached_session():
      initializer = lambda: constant_op.constant(value)

      v1 = variables.Variable(initializer, dtype=dtypes.float32)
      self.assertEqual(shape, v1.get_shape())
      self.assertEqual(shape, v1.shape)
      self.assertAllClose(value, v1.initial_value.eval())
      with self.assertRaises(errors_impl.FailedPreconditionError):
        self.evaluate(v1)

      v2 = variables.Variable(
          math_ops.negative(v1.initialized_value()), dtype=dtypes.float32)
      self.assertEqual(v1.get_shape(), v2.get_shape())
      self.assertEqual(v1.shape, v2.shape)
      self.assertAllClose(np.negative(value), v2.initial_value.eval())

      with self.assertRaises(errors_impl.FailedPreconditionError):
        self.evaluate(v2)
      variables.global_variables_initializer().run()
      self.assertAllClose(np.negative(value), self.evaluate(v2))
Example #27
0
  def testInitializerFunction(self):
    value = [[-42], [133.7]]
    shape = [2, 1]
    with self.cached_session():
      initializer = lambda: constant_op.constant(value)

      v1 = variables.Variable(initializer, dtype=dtypes.float32)
      self.assertEqual(shape, v1.get_shape())
      self.assertEqual(shape, v1.shape)
      self.assertAllClose(value, self.evaluate(v1.initial_value))
      with self.assertRaises(errors_impl.FailedPreconditionError):
        self.evaluate(v1)

      v2 = variables.Variable(
          math_ops.negative(v1.initialized_value()), dtype=dtypes.float32)
      self.assertEqual(v1.get_shape(), v2.get_shape())
      self.assertEqual(v1.shape, v2.shape)
      self.assertAllClose(np.negative(value), self.evaluate(v2.initial_value))

      with self.assertRaises(errors_impl.FailedPreconditionError):
        self.evaluate(v2)
      self.evaluate(variables.global_variables_initializer())
      self.assertAllClose(np.negative(value), self.evaluate(v2))
Example #28
0
 def training_loss(self, features, labels, name='training_loss'):
     return math_ops.negative(self.average_size(), name=name)
def natural_exp_decay(learning_rate,
                      global_step,
                      decay_steps,
                      decay_rate,
                      staircase=False,
                      name=None):
    """Applies natural exponential decay to the initial learning rate.

  When training a model, it is often recommended to lower the learning rate as
  the training progresses.  This function applies an exponential decay function
  to a provided initial learning rate.  It requires an `global_step` value to
  compute the decayed learning rate.  You can just pass a TensorFlow variable
  that you increment at each training step.

  The function returns the decayed learning rate.  It is computed as:

  ```python
  decayed_learning_rate = learning_rate * exp(-decay_rate * global_step)
  ```

  Example: decay exponentially with a base of 0.96:

  ```python
  ...
  global_step = tf.Variable(0, trainable=False)
  learning_rate = 0.1
  k = 0.5
  learning_rate = tf.train.exponential_time_decay(learning_rate, global_step, k)

  # Passing global_step to minimize() will increment it at each step.
  learning_step = (
      tf.train.GradientDescentOptimizer(learning_rate)
      .minimize(...my loss..., global_step=global_step)
  )
  ```

  Args:
    learning_rate: A scalar `float32` or `float64` `Tensor` or a
      Python number.  The initial learning rate.
    global_step: A Python number.
      Global step to use for the decay computation.  Must not be negative.
    decay_steps: How often to apply decay.
    decay_rate: A Python number.  The decay rate.
    staircase: Whether to apply decay in a discrete staircase, as opposed to
      continuous, fashion.
    name: String.  Optional name of the operation.  Defaults to
      'ExponentialTimeDecay'.

  Returns:
    A scalar `Tensor` of the same type as `learning_rate`.  The decayed
    learning rate.

  Raises:
    ValueError: if `global_step` is not supplied.
  """
    if global_step is None:
        raise ValueError("global_step is required for natural_exp_decay.")
    with ops.name_scope(name, "NaturalExpDecay",
                        [learning_rate, global_step, decay_rate]) as name:
        learning_rate = ops.convert_to_tensor(learning_rate,
                                              name="learning_rate")
        dtype = learning_rate.dtype
        global_step = math_ops.cast(global_step, dtype)
        decay_steps = math_ops.cast(decay_steps, dtype)
        decay_rate = math_ops.cast(decay_rate, dtype)
        p = global_step / decay_steps
        if staircase:
            p = math_ops.floor(p)
        exponent = math_ops.exp(
            math_ops.multiply(math_ops.negative(decay_rate), p))
        return math_ops.multiply(learning_rate, exponent, name=name)
Example #30
0
def natural_exp_decay(learning_rate,
                      global_step,
                      decay_steps,
                      decay_rate,
                      staircase=False,
                      name=None):
    """Applies natural exponential decay to the initial learning rate.

  When training a model, it is often recommended to lower the learning rate as
  the training progresses.  This function applies an exponential decay function
  to a provided initial learning rate.  It requires an `global_step` value to
  compute the decayed learning rate.  You can just pass a TensorFlow variable
  that you increment at each training step.

  The function returns the decayed learning rate.  It is computed as:

  ```python
  decayed_learning_rate = learning_rate * exp(-decay_rate * global_step /
  decay_step)
  ```

  or, if `staircase` is `True`, as:

  ```python
  decayed_learning_rate = learning_rate * exp(-decay_rate * floor(global_step /
  decay_step))
  ```

  Example: decay exponentially with a base of 0.96:

  ```python
  ...
  global_step = tf.Variable(0, trainable=False)
  learning_rate = 0.1
  decay_steps = 5
  k = 0.5
  learning_rate = tf.compat.v1.train.natural_exp_decay(learning_rate,
  global_step,
                                             decay_steps, k)

  # Passing global_step to minimize() will increment it at each step.
  learning_step = (
      tf.compat.v1.train.GradientDescentOptimizer(learning_rate)
      .minimize(...my loss..., global_step=global_step)
  )
  ```

  Args:
    learning_rate: A scalar `float32` or `float64` `Tensor` or a Python number.
      The initial learning rate.
    global_step: A Python number. Global step to use for the decay computation.
      Must not be negative.
    decay_steps: How often to apply decay.
    decay_rate: A Python number.  The decay rate.
    staircase: Whether to apply decay in a discrete staircase, as opposed to
      continuous, fashion.
    name: String.  Optional name of the operation.  Defaults to
      'ExponentialTimeDecay'.

  Returns:
    A scalar `Tensor` of the same type as `learning_rate`.  The decayed
    learning rate.

  Raises:
    ValueError: if `global_step` is not supplied.

  @compatibility(eager)
  When eager execution is enabled, this function returns a function which in
  turn returns the decayed learning rate Tensor. This can be useful for changing
  the learning rate value across different invocations of optimizer functions.
  @end_compatibility
  """
    natural_exp_rate = math_ops.exp(math_ops.negative(decay_rate))
    decayed_lr = learning_rate_schedule.ExponentialDecay(learning_rate,
                                                         decay_steps,
                                                         natural_exp_rate,
                                                         staircase=staircase,
                                                         name=name)

    if not context.executing_eagerly():
        decayed_lr = decayed_lr(global_step)
    else:
        decayed_lr = functools.partial(decayed_lr, global_step)
    return decayed_lr
Example #31
0
  def GetParams(self):
    """Test for unary operations in TF-TRT."""
    dtype = dtypes.float32
    input_name = "input"
    input_dims = [12, 5, 8, 1, 1, 12]
    input2_name = "input_2"
    input2_dims = [12, 5, 8, 1, 12, 1, 1]
    g = ops.Graph()
    with g.as_default():
      x = array_ops.placeholder(dtype=dtype, shape=input_dims, name=input_name)
      q = math_ops.abs(x)
      q = q + 1.0
      q = gen_math_ops.exp(q)
      q = gen_math_ops.log(q)
      q = array_ops.squeeze(q, axis=-2)
      q = math_ops.abs(q)
      q = q + 2.2
      q = gen_math_ops.sqrt(q)
      q = gen_math_ops.rsqrt(q)
      q = math_ops.negative(q)
      q = array_ops.squeeze(q, axis=3)
      q = math_ops.abs(q)
      q = q + 3.0
      a = gen_math_ops.reciprocal(q)

      x = constant_op.constant(np.random.randn(5, 8, 12), dtype=dtype)
      q = math_ops.abs(x)
      q = q + 2.0
      q = gen_math_ops.exp(q)
      q = gen_math_ops.log(q)
      q = math_ops.abs(q)
      q = q + 2.1
      q = gen_math_ops.sqrt(q)
      q = gen_math_ops.rsqrt(q)
      q = math_ops.negative(q)
      q = math_ops.abs(q)
      q = q + 4.0
      b = gen_math_ops.reciprocal(q)

      # TODO(jie): this one will break, broadcasting on batch.
      x = array_ops.placeholder(
          dtype=dtype, shape=input2_dims, name=input2_name)
      q = math_ops.abs(x)
      q = q + 5.0
      q = gen_math_ops.exp(q)
      q = array_ops.squeeze(q, axis=[-1, -2, 3])
      q = gen_math_ops.log(q)
      q = math_ops.abs(q)
      q = q + 5.1
      q = gen_array_ops.reshape(q, [12, 5, 1, 1, 8, 1, 12])
      q = array_ops.squeeze(q, axis=[5, 2, 3])
      q = gen_math_ops.sqrt(q)
      q = math_ops.abs(q)
      q = q + 5.2
      q = gen_math_ops.rsqrt(q)
      q = math_ops.negative(q)
      q = math_ops.abs(q)
      q = q + 5.3
      c = gen_math_ops.reciprocal(q)

      q = a * b
      q = q / c
      array_ops.squeeze(q, name=self.output_name)
    return trt_test.TfTrtIntegrationTestParams(
        gdef=g.as_graph_def(),
        input_names=[input_name, input2_name],
        input_dims=[input_dims, input2_dims],
        num_expected_engines=5,
        expected_output_dims=(12, 5, 8, 12),
        allclose_atol=1.e-03,
        allclose_rtol=1.e-03)
def attention(query, ):
    """Put attention masks on hidden using hidden_features and query."""
    ds = []  # Results of attention reads will be stored here.
    if nest.is_sequence(query):  # If the query is a tuple, flatten it.
        query_list = nest.flatten(query)
        for q in query_list:  # Check that ndims == 2 if specified.
            ndims = q.get_shape().ndims
            if ndims:
                assert ndims == 2
        query = array_ops.concat(query_list, 1)

    with variable_scope.variable_scope("Attention_%d" % a, dtype=dtype):
    attention_vec_size = attn_size  # Size of query vectors for attention.
    # to calucate wp * ht
    v_p = variable_scope.get_variable("AttnV_p%d" % a, [attention_vec_size])
    qiu = linear(query, attention_vec_size, True)
    qiu = array_ops.reshape(qiu, [batch_size, 1, 1, attention_vec_size])
    tan_v = math_ops.reduce_sum(v_p * math_ops.tanh(qiu),
                                [2, 3])
    # print(tan_v.get_shape())
    pt_sig = math_ops.sigmoid(tan_v)
    # print(pt_sig.get_shape())
    p = attn_length * pt_sig
    # print(p.get_shape())
    # p_t = (array_ops.reshape(p, [-1, attn_length]))
    p_t = math_ops.cast(p, dtype=dtypes.int32)
    p_t = math_ops.cast(p_t, dtype=dtypes.float32)
    # print(p_t.get_shape())
    # print(4)
    # p_t=tf.convert_to_tensor(p_t)

    # print(p_t.shape, attention_states.shape)

    # set a window
    p_t = array_ops.reshape(p_t, [batch_size, ])
    attention_states_windows = []
    D = attn_local_D
    for i in range(attention_states.shape[0]):
        x = tf.constant(D, dtype=dtypes.float32)
        y = math_ops.cast(p_t[i], dtype=dtypes.float32)
        z = tf.constant(attn_length, dtype=dtypes.float32)

        def f1(): return tf.constant(0, dtype=dtypes.int32), math_ops.cast(D - p_t[i], dtype=dtypes.int32)

        def f2():
            return math_ops.cast(p_t[i] - D, dtype=dtypes.int32), tf.constant(0, dtype=dtypes.int32)

        def f3(): return tf.constant(attn_length, dtype=dtypes.int32), math_ops.cast(
            p_t[i] + D + 1 - attn_length, dtype=dtypes.int32)

        def f4(): return math_ops.cast(p_t[i] + D + 1, dtype=dtypes.int32), tf.constant(0, dtype=dtypes.int32)

        begin, pre_num = tf.cond(tf.less(x, y), f2, f1)
        end, last_num = tf.cond(tf.less(y + D + 1, z), f4, f3)

        d = tf.constant(attn_fixed_length, dtype=dtypes.int32)
        # num = tf.cond(tf.less(end - begin, d), f5, f6)
        pre_tmp = tf.zeros([pre_num, attention_vec_size], dtype=dtypes.float32)
        last_tmp = tf.zeros([last_num, attention_vec_size], dtype=dtypes.float32)
        # tmp = tf.zeros([num, attention_vec_size], dtype=dtypes.float32)
        attention_states_window = math_ops.cast(attention_states[i][begin:end], dtype=dtypes.float32)
        attention_states_window = tf.concat([pre_tmp, attention_states_window], 0)
        attention_states_window = tf.concat([attention_states_window, last_tmp], 0)
        attention_states_window = tf.expand_dims(attention_states_window, 0)
        attention_states_windows.append(attention_states_window)

    attention_states_windows = tf.concat(attention_states_windows, 0)
    attention_states_windows = array_ops.reshape(attention_states_windows,
                                                 [batch_size, attn_fixed_length, attention_vec_size])
    # print(attention_states_windows.shape)

    # To calculate W1 * hi we use a 1-by-1 convolution, need to reshape before.
    hidden = array_ops.reshape(attention_states_windows,
                               [batch_size, attn_fixed_length, 1, attn_size])
    k = variable_scope.get_variable("AttnW_%d" % a,
                                    [1, 1, attn_size, attention_vec_size])
    hidden_features = nn_ops.conv2d(hidden, k, [1, 1, 1, 1], "SAME")
    v = variable_scope.get_variable("AttnV_%d" % a, [attention_vec_size])

    with variable_scope.variable_scope("Attention_l_%d" % a, dtype=dtype):
        # w2 * ht
        y = linear(query, attention_vec_size, True)
        y = array_ops.reshape(y, [batch_size, 1, 1, attention_vec_size])
        # Attention mask is a softmax of v^T * tanh(...).
        s = math_ops.reduce_sum(v * math_ops.tanh(hidden_features + y),
                                [2, 3])
        ai = nn_ops.softmax(s)
        ai = tf.reshape(ai, [batch_size, attn_fixed_length, 1])
        # print(5,ai.get_shape())

        # do the p_t part
        center = tf.constant(D, dtype=dtypes.float32, shape=[batch_size, 1])
        extent = tf.ones([1, attn_fixed_length], dtype=dtypes.float32)
        center = center * extent
        center = tf.reshape(center, [batch_size, attn_fixed_length, 1])

        pos = [i for i in xrange(attn_fixed_length)]
        pos = tf.reshape(pos, [attn_fixed_length, 1])
        pos = math_ops.cast(pos, dtype=dtypes.float32)
        # print((p_t - pos).get_shape(), "jing")

        value = math_ops.square(center - pos) * 2 / (D * D)
        pre = math_ops.exp(math_ops.negative(value))
        # print(pre.get_shape(),"qiu")
        ai = ai * pre

        # Now calculate the attention-weighted vector d.
        d = math_ops.reduce_sum(
            array_ops.reshape(ai, [batch_size, attn_fixed_length, 1, 1]) * hidden, [1, 2])
        ds.append(array_ops.reshape(d, [batch_size, attn_size]))
    return ds
def natural_exp_decay(learning_rate,
                      global_step,
                      decay_steps,
                      decay_rate,
                      staircase=False,
                      name=None):
  """Applies natural exponential decay to the initial learning rate.

  When training a model, it is often recommended to lower the learning rate as
  the training progresses.  This function applies an exponential decay function
  to a provided initial learning rate.  It requires an `global_step` value to
  compute the decayed learning rate.  You can just pass a TensorFlow variable
  that you increment at each training step.

  The function returns the decayed learning rate.  It is computed as:

  ```python
  decayed_learning_rate = learning_rate * exp(-decay_rate * global_step /
  decay_step)
  ```

  or, if `staircase` is `True`, as:

  ```python
  decayed_learning_rate = learning_rate * exp(-decay_rate * floor(global_step /
  decay_step))
  ```

  Example: decay exponentially with a base of 0.96:

  ```python
  ...
  global_step = tf.Variable(0, trainable=False)
  learning_rate = 0.1
  decay_steps = 5
  k = 0.5
  learning_rate = tf.compat.v1.train.natural_exp_decay(learning_rate,
  global_step,
                                             decay_steps, k)

  # Passing global_step to minimize() will increment it at each step.
  learning_step = (
      tf.compat.v1.train.GradientDescentOptimizer(learning_rate)
      .minimize(...my loss..., global_step=global_step)
  )
  ```

  Args:
    learning_rate: A scalar `float32` or `float64` `Tensor` or a Python number.
      The initial learning rate.
    global_step: A Python number. Global step to use for the decay computation.
      Must not be negative.
    decay_steps: How often to apply decay.
    decay_rate: A Python number.  The decay rate.
    staircase: Whether to apply decay in a discrete staircase, as opposed to
      continuous, fashion.
    name: String.  Optional name of the operation.  Defaults to
      'ExponentialTimeDecay'.

  Returns:
    A scalar `Tensor` of the same type as `learning_rate`.  The decayed
    learning rate.

  Raises:
    ValueError: if `global_step` is not supplied.

  @compatibility(eager)
  When eager execution is enabled, this function returns a function which in
  turn returns the decayed learning rate Tensor. This can be useful for changing
  the learning rate value across different invocations of optimizer functions.
  @end_compatibility
  """
  natural_exp_rate = math_ops.exp(math_ops.negative(decay_rate))
  decayed_lr = learning_rate_schedule.ExponentialDecay(
      learning_rate,
      decay_steps,
      natural_exp_rate,
      staircase=staircase,
      name=name)

  if not context.executing_eagerly():
    decayed_lr = decayed_lr(global_step)
  else:
    decayed_lr = functools.partial(decayed_lr, global_step)
  return decayed_lr
Example #34
0
 def called_member(self, a):
   return math_ops.negative(a)
def natural_exp_decay(learning_rate, global_step, decay_steps, decay_rate,
                      staircase=False, name=None):
  """Applies natural exponential decay to the initial learning rate.

  When training a model, it is often recommended to lower the learning rate as
  the training progresses.  This function applies an exponential decay function
  to a provided initial learning rate.  It requires an `global_step` value to
  compute the decayed learning rate.  You can just pass a TensorFlow variable
  that you increment at each training step.

  The function returns the decayed learning rate.  It is computed as:

  ```python
  decayed_learning_rate = learning_rate * exp(-decay_rate * global_step)
  ```

  Example: decay exponentially with a base of 0.96:

  ```python
  ...
  global_step = tf.Variable(0, trainable=False)
  learning_rate = 0.1
  k = 0.5
  learning_rate = tf.train.exponential_time_decay(learning_rate, global_step, k)

  # Passing global_step to minimize() will increment it at each step.
  learning_step = (
      tf.train.GradientDescentOptimizer(learning_rate)
      .minimize(...my loss..., global_step=global_step)
  )
  ```

  Args:
    learning_rate: A scalar `float32` or `float64` `Tensor` or a
      Python number.  The initial learning rate.
    global_step: A Python number.
      Global step to use for the decay computation.  Must not be negative.
    decay_steps: How often to apply decay.
    decay_rate: A Python number.  The decay rate.
    staircase: Whether to apply decay in a discrete staircase, as opposed to
      continuous, fashion.
    name: String.  Optional name of the operation.  Defaults to
      'ExponentialTimeDecay'.

  Returns:
    A scalar `Tensor` of the same type as `learning_rate`.  The decayed
    learning rate.

  Raises:
    ValueError: if `global_step` is not supplied.
  """
  if global_step is None:
    raise ValueError("global_step is required for natural_exp_decay.")
  with ops.name_scope(name, "NaturalExpDecay",
                      [learning_rate, global_step, decay_rate]) as name:
    learning_rate = ops.convert_to_tensor(learning_rate, name="learning_rate")
    dtype = learning_rate.dtype
    global_step = math_ops.cast(global_step, dtype)
    decay_steps = math_ops.cast(decay_steps, dtype)
    decay_rate = math_ops.cast(decay_rate, dtype)
    p = global_step / decay_steps
    if staircase:
      p = math_ops.floor(p)
    exponent = math_ops.exp(math_ops.multiply(math_ops.negative(decay_rate), p))
    return math_ops.multiply(learning_rate, exponent, name=name)
def tfassert_eq(_):
  x = array_ops.placeholder(dtypes.int32, name='x_hold')
  y = array_ops.placeholder(dtypes.int32, name='y_hold')
  control_flow_ops.Assert(
      math_ops.equal(x, y), ['Expected x == y.'], name='assert_eq')
  math_ops.add(x, math_ops.negative(y), name='x_y_diff')
Example #37
0
def tfassert_eq(_):
    x = array_ops.placeholder(dtypes.int32, name='x_hold')
    y = array_ops.placeholder(dtypes.int32, name='y_hold')
    control_flow_ops.Assert(math_ops.equal(x, y), ['Expected x == y.'],
                            name='assert_eq')
    math_ops.add(x, math_ops.negative(y), name='x_y_diff')
Example #38
0
 def __neg__(self):
   return math_ops.negative(self)
  def GetParams(self):
    """Test for unary operations in TF-TRT."""
    dtype = dtypes.float32
    input_name = "input"
    input_dims = [12, 5, 8, 1, 1, 12]
    output_name = "output"
    input2_name = "input_2"
    input2_dims = [12, 5, 8, 1, 12, 1, 1]
    g = ops.Graph()
    with g.as_default():
      x = array_ops.placeholder(dtype=dtype, shape=input_dims, name=input_name)
      q = math_ops.abs(x)
      q = q + 1.0
      q = gen_math_ops.exp(q)
      q = gen_math_ops.log(q)
      q = array_ops.squeeze(q, axis=-2)
      q = math_ops.abs(q)
      q = q + 2.2
      q = gen_math_ops.sqrt(q)
      q = gen_math_ops.rsqrt(q)
      q = math_ops.negative(q)
      q = array_ops.squeeze(q, axis=3)
      q = math_ops.abs(q)
      q = q + 3.0
      a = gen_math_ops.reciprocal(q)

      x = constant_op.constant(np.random.randn(5, 8, 12), dtype=dtype)
      q = math_ops.abs(x)
      q = q + 2.0
      q = gen_math_ops.exp(q)
      q = gen_math_ops.log(q)
      q = math_ops.abs(q)
      q = q + 2.1
      q = gen_math_ops.sqrt(q)
      q = gen_math_ops.rsqrt(q)
      q = math_ops.negative(q)
      q = math_ops.abs(q)
      q = q + 4.0
      b = gen_math_ops.reciprocal(q)

      # TODO(jie): this one will break, broadcasting on batch.
      x = array_ops.placeholder(
          dtype=dtype, shape=input2_dims, name=input2_name)
      q = math_ops.abs(x)
      q = q + 5.0
      q = gen_math_ops.exp(q)
      q = array_ops.squeeze(q, axis=[-1, -2, 3])
      q = gen_math_ops.log(q)
      q = math_ops.abs(q)
      q = q + 5.1
      q = gen_array_ops.reshape(q, [12, 5, 1, 1, 8, 1, 12])
      q = array_ops.squeeze(q, axis=[5, 2, 3])
      q = gen_math_ops.sqrt(q)
      q = math_ops.abs(q)
      q = q + 5.2
      q = gen_math_ops.rsqrt(q)
      q = math_ops.negative(q)
      q = math_ops.abs(q)
      q = q + 5.3
      c = gen_math_ops.reciprocal(q)

      q = a * b
      q = q / c
      array_ops.squeeze(q, name=output_name)
    return trt_test.TfTrtIntegrationTestParams(
        gdef=g.as_graph_def(),
        input_names=[input_name, input2_name],
        input_dims=[input_dims, input2_dims],
        output_names=[output_name],
        expected_output_dims=[(12, 5, 8, 12)])
Example #40
0
 def called_member(self, a):
   return math_ops.negative(a)
Example #41
0
 def validation_loss(self, features, labels):
     return math_ops.negative(self.average_size())
Example #42
0
  def training_graph(self,
                     input_data,
                     input_labels,
                     random_seed,
                     data_spec,
                     sparse_features=None,
                     input_weights=None):

    """Constructs a TF graph for training a random tree.

    Args:
      input_data: A tensor or placeholder for input data.
      input_labels: A tensor or placeholder for labels associated with
        input_data.
      random_seed: The random number generator seed to use for this tree.  0
        means use the current time as the seed.
      data_spec: A data_ops.TensorForestDataSpec object specifying the
        original feature/columns of the data.
      sparse_features: A tf.SparseTensor for sparse input data.
      input_weights: A float tensor or placeholder holding per-input weights,
        or None if all inputs are to be weighted equally.

    Returns:
      The last op in the random tree training graph.
    """
    epoch = math_ops.to_int32(get_epoch_variable())

    serialized_input_spec = data_spec.SerializeToString()

    if input_weights is None:
      input_weights = []

    if input_data is None:
      input_data = []

    sparse_indices = []
    sparse_values = []
    sparse_shape = []
    if sparse_features is not None:
      sparse_indices = sparse_features.indices
      sparse_values = sparse_features.values
      sparse_shape = sparse_features.dense_shape

    # Count extremely random stats.
    (node_sums, node_squares, splits_indices, splits_sums, splits_squares,
     totals_indices, totals_sums, totals_squares,
     input_leaves) = (tensor_forest_ops.count_extremely_random_stats(
         input_data,
         sparse_indices,
         sparse_values,
         sparse_shape,
         input_labels,
         input_weights,
         self.variables.tree,
         self.variables.tree_thresholds,
         self.variables.node_to_accumulator_map,
         self.variables.candidate_split_features,
         self.variables.candidate_split_thresholds,
         self.variables.start_epoch,
         epoch,
         input_spec=serialized_input_spec,
         num_classes=self.params.num_output_columns,
         regression=self.params.regression))
    node_update_ops = []
    node_update_ops.append(
        state_ops.assign_add(self.variables.node_sums, node_sums))

    splits_update_ops = []
    splits_update_ops.append(
        tensor_forest_ops.scatter_add_ndim(self.variables.candidate_split_sums,
                                           splits_indices, splits_sums))
    splits_update_ops.append(
        tensor_forest_ops.scatter_add_ndim(self.variables.accumulator_sums,
                                           totals_indices, totals_sums))

    if self.params.regression:
      node_update_ops.append(state_ops.assign_add(self.variables.node_squares,
                                                  node_squares))
      splits_update_ops.append(
          tensor_forest_ops.scatter_add_ndim(
              self.variables.candidate_split_squares, splits_indices,
              splits_squares))
      splits_update_ops.append(
          tensor_forest_ops.scatter_add_ndim(self.variables.accumulator_squares,
                                             totals_indices, totals_squares))

    # Sample inputs.
    update_indices, feature_updates, threshold_updates = (
        tensor_forest_ops.sample_inputs(
            input_data,
            sparse_indices,
            sparse_values,
            sparse_shape,
            input_weights,
            self.variables.node_to_accumulator_map,
            input_leaves,
            self.variables.candidate_split_features,
            self.variables.candidate_split_thresholds,
            input_spec=serialized_input_spec,
            split_initializations_per_input=(
                self.params.split_initializations_per_input),
            split_sampling_random_seed=random_seed))
    update_features_op = state_ops.scatter_update(
        self.variables.candidate_split_features, update_indices,
        feature_updates)
    update_thresholds_op = state_ops.scatter_update(
        self.variables.candidate_split_thresholds, update_indices,
        threshold_updates)

    # Calculate finished nodes.
    with ops.control_dependencies(splits_update_ops):
      # Passing input_leaves to finished nodes here means that nodes that
      # have become stale won't be deallocated until an input reaches them,
      # because we're trying to avoid considering every fertile node for
      # performance reasons.
      finished, stale = tensor_forest_ops.finished_nodes(
          input_leaves,
          self.variables.node_to_accumulator_map,
          self.variables.candidate_split_sums,
          self.variables.candidate_split_squares,
          self.variables.accumulator_sums,
          self.variables.accumulator_squares,
          self.variables.start_epoch,
          epoch,
          num_split_after_samples=self.params.split_after_samples,
          min_split_samples=self.params.min_split_samples,
          dominate_method=self.params.dominate_method,
          dominate_fraction=self.params.dominate_fraction)

    # Update leaf scores.
    # TODO(thomaswc): Store the leaf scores in a TopN and only update the
    # scores of the leaves that were touched by this batch of input.
    children = array_ops.squeeze(
        array_ops.slice(self.variables.tree, [0, 0], [-1, 1]), squeeze_dims=[1])
    is_leaf = math_ops.equal(constants.LEAF_NODE, children)
    leaves = math_ops.to_int32(
        array_ops.squeeze(
            array_ops.where(is_leaf), squeeze_dims=[1]))
    non_fertile_leaves = array_ops.boolean_mask(
        leaves, math_ops.less(array_ops.gather(
            self.variables.node_to_accumulator_map, leaves), 0))

    # TODO(gilberth): It should be possible to limit the number of non
    # fertile leaves we calculate scores for, especially since we can only take
    # at most array_ops.shape(finished)[0] of them.
    with ops.control_dependencies(node_update_ops):
      sums = array_ops.gather(self.variables.node_sums, non_fertile_leaves)
      if self.params.regression:
        squares = array_ops.gather(self.variables.node_squares,
                                   non_fertile_leaves)
        non_fertile_leaf_scores = self._variance(sums, squares)
      else:
        non_fertile_leaf_scores = self._weighted_gini(sums)

    # Calculate best splits.
    with ops.control_dependencies(splits_update_ops):
      split_indices = tensor_forest_ops.best_splits(
          finished,
          self.variables.node_to_accumulator_map,
          self.variables.candidate_split_sums,
          self.variables.candidate_split_squares,
          self.variables.accumulator_sums,
          self.variables.accumulator_squares,
          regression=self.params.regression)

    # Grow tree.
    with ops.control_dependencies([update_features_op, update_thresholds_op,
                                   non_fertile_leaves.op]):
      (tree_update_indices, tree_children_updates, tree_threshold_updates,
       new_eot) = (tensor_forest_ops.grow_tree(
           self.variables.end_of_tree, self.variables.node_to_accumulator_map,
           finished, split_indices, self.variables.candidate_split_features,
           self.variables.candidate_split_thresholds))
      tree_update_op = state_ops.scatter_update(
          self.variables.tree, tree_update_indices, tree_children_updates)
      thresholds_update_op = state_ops.scatter_update(
          self.variables.tree_thresholds, tree_update_indices,
          tree_threshold_updates)
      # TODO(thomaswc): Only update the epoch on the new leaves.
      new_epoch_updates = epoch * array_ops.ones_like(tree_threshold_updates,
                                                      dtype=dtypes.int32)
      epoch_update_op = state_ops.scatter_update(
          self.variables.start_epoch, tree_update_indices,
          new_epoch_updates)

    # Update fertile slots.
    with ops.control_dependencies([tree_update_op]):
      (n2a_map_updates, a2n_map_updates, accumulators_cleared,
       accumulators_allocated) = (tensor_forest_ops.update_fertile_slots(
           finished,
           non_fertile_leaves,
           non_fertile_leaf_scores,
           self.variables.end_of_tree,
           self.variables.accumulator_sums,
           self.variables.node_to_accumulator_map,
           stale,
           self.variables.node_sums,
           regression=self.params.regression))

    # Ensure end_of_tree doesn't get updated until UpdateFertileSlots has
    # used it to calculate new leaves.
    with ops.control_dependencies([n2a_map_updates.op]):
      eot_update_op = state_ops.assign(self.variables.end_of_tree, new_eot)

    updates = []
    updates.append(eot_update_op)
    updates.append(tree_update_op)
    updates.append(thresholds_update_op)
    updates.append(epoch_update_op)

    updates.append(
        state_ops.scatter_update(self.variables.node_to_accumulator_map,
                                 n2a_map_updates[0], n2a_map_updates[1]))

    updates.append(
        state_ops.scatter_update(self.variables.accumulator_to_node_map,
                                 a2n_map_updates[0], a2n_map_updates[1]))

    cleared_and_allocated_accumulators = array_ops.concat(
        [accumulators_cleared, accumulators_allocated], 0)

    # Calculate values to put into scatter update for candidate counts.
    # Candidate split counts are always reset back to 0 for both cleared
    # and allocated accumulators. This means some accumulators might be doubly
    # reset to 0 if the were released and not allocated, then later allocated.
    split_values = array_ops.tile(
        array_ops.expand_dims(array_ops.expand_dims(
            array_ops.zeros_like(cleared_and_allocated_accumulators,
                                 dtype=dtypes.float32), 1), 2),
        [1, self.params.num_splits_to_consider, self.params.num_output_columns])
    updates.append(state_ops.scatter_update(
        self.variables.candidate_split_sums,
        cleared_and_allocated_accumulators, split_values))
    if self.params.regression:
      updates.append(state_ops.scatter_update(
          self.variables.candidate_split_squares,
          cleared_and_allocated_accumulators, split_values))

    # Calculate values to put into scatter update for total counts.
    total_cleared = array_ops.tile(
        array_ops.expand_dims(
            math_ops.negative(array_ops.ones_like(accumulators_cleared,
                                                  dtype=dtypes.float32)), 1),
        [1, self.params.num_output_columns])
    total_reset = array_ops.tile(
        array_ops.expand_dims(
            array_ops.zeros_like(accumulators_allocated,
                                 dtype=dtypes.float32), 1),
        [1, self.params.num_output_columns])
    accumulator_updates = array_ops.concat([total_cleared, total_reset], 0)
    updates.append(state_ops.scatter_update(
        self.variables.accumulator_sums,
        cleared_and_allocated_accumulators, accumulator_updates))
    if self.params.regression:
      updates.append(state_ops.scatter_update(
          self.variables.accumulator_squares,
          cleared_and_allocated_accumulators, accumulator_updates))

    # Calculate values to put into scatter update for candidate splits.
    split_features_updates = array_ops.tile(
        array_ops.expand_dims(
            math_ops.negative(array_ops.ones_like(
                cleared_and_allocated_accumulators)), 1),
        [1, self.params.num_splits_to_consider])
    updates.append(state_ops.scatter_update(
        self.variables.candidate_split_features,
        cleared_and_allocated_accumulators, split_features_updates))

    updates += self.finish_iteration()

    return control_flow_ops.group(*updates)
Example #43
0
 def training_loss(self, features, labels, name='training_loss'):
   return math_ops.negative(self.average_size(), name=name)
Example #44
0
    def __call__(self, position, query, delta=delta):
        batch_size = 32
        # position = next_position(position, query, self._values, self._memory_sequence_length)
        # local_memory = get_local_matrix(self._memory, position, delta)
        # alignment_bah = bah_attend(self._query_layer(query), local_memory)
        # alignment_gauss = norm * tf.ones([self._batch_size, 1], tf.float32)
        # alignment = alignment_gauss * alignment_bah
        # expand_alignment = tf.expand_dims(alignment, 1)
        # context = tf.matmul(expand_alignment, local_memory)
        # context = tf.squeeze(context, 1)
        # return position, alignment, context
        """Put attention masks on hidden using hidden_features and query."""
        position, l = next_position(position, query)
        attention_states_windows = []
        D = delta
        attn_fixed_length = 2 * delta + 1
        for i in range(batch_size):
            x = tf.constant(D, dtype=dtypes.float32)
            y = math_ops.cast(position[i], dtype=dtypes.float32)

            def f1():
                return tf.constant(0, dtype=dtypes.int32), math_ops.cast(
                    D - position[i] + 1, dtype=dtypes.int32)

            def f2():
                return math_ops.cast(position[i] - D,
                                     dtype=dtypes.int32), tf.constant(
                                         0, dtype=dtypes.int32)

            def f3():
                return self._memory_sequence_length[i], math_ops.cast(
                    position[i] + D + 2 -
                    tf.cast(self._memory_sequence_length[i], tf.float32),
                    dtype=dtypes.int32)

            def f4():
                return math_ops.cast(position[i] + D + 1,
                                     dtype=dtypes.int32), tf.constant(
                                         0, dtype=dtypes.int32)

            begin, pre_num = tf.cond(tf.less(x, y), f2, f1)
            end, last_num = tf.cond(
                y + D + 1 < tf.cast(self._memory_sequence_length[i],
                                    tf.float32), f4, f3)
            # num = tf.cond(tf.less(end - begin, d), f5, f6)
            pre_tmp = tf.zeros([pre_num, self._num_units],
                               dtype=dtypes.float32)
            last_tmp = tf.zeros([last_num, self._num_units],
                                dtype=dtypes.float32)
            # tmp = tf.zeros([num, attention_vec_size], dtype=dtypes.float32)
            attention_states_window = math_ops.cast(self._values[i][begin:end],
                                                    dtype=dtypes.float32)
            attention_states_window = tf.concat(
                [pre_tmp, attention_states_window], 0)
            attention_states_window = tf.concat(
                [attention_states_window, last_tmp], 0)
            attention_states_window = attention_states_window[0:2 * delta + 1]
            attention_states_window = tf.expand_dims(attention_states_window,
                                                     0)
            attention_states_windows.append(attention_states_window)

        attention_states_windows = tf.concat(attention_states_windows, 0)
        attention_states_windows = array_ops.reshape(
            attention_states_windows,
            [batch_size, attn_fixed_length, self._num_units])
        # print(attention_states_windows.shape)

        # To calculate W1 * hi we use a 1-by-1 convolution, need to reshape before.
        hidden_features = attention_states_windows
        v = variable_scope.get_variable("v", [self._num_units])

        with variable_scope.variable_scope("Attention_l"):
            # w2 * ht
            y = self._query_layer(query)
            y = array_ops.reshape(y, [batch_size, 1, self._num_units])
            # Attention mask is a softmax of v^T * tanh(...).
            s = math_ops.reduce_sum(v * math_ops.tanh(hidden_features + y), 2)
            ai = nn_ops.softmax(s)
            ai = tf.reshape(ai, [batch_size, attn_fixed_length, 1])
            # print(5,ai.get_shape())

            # do the p_t part
            center = tf.constant(D,
                                 dtype=dtypes.float32,
                                 shape=[batch_size, 1])
            extent = tf.ones([1, attn_fixed_length], dtype=dtypes.float32)
            center = center * extent
            center = tf.reshape(center, [batch_size, attn_fixed_length, 1])

            pos = [i for i in xrange(attn_fixed_length)]
            pos = tf.reshape(pos, [attn_fixed_length, 1])
            pos = math_ops.cast(pos, dtype=dtypes.float32)
            # print((p_t - pos).get_shape(), "jing")

            value = math_ops.square(center - pos) * 2 / (D * D)
            pre = math_ops.exp(math_ops.negative(value))
            # print(pre.get_shape(),"qiu")
            l = tf.reshape(l, [32, 1, 1])
            ai = l * ai * pre

            # Now calculate the attention-weighted vector d.
            context = math_ops.reduce_sum(ai * hidden_features, 1)
            ai = tf.squeeze(ai)
        return position, ai, context
Example #45
0
 def validation_loss(self, features, labels):
   return math_ops.negative(self.average_size())
Example #46
0
 def __neg__(self):
     return math_ops.negative(self)
        def attention(query):
            """Put attention masks on hidden using hidden_features and query."""
            ds = []  # Results of attention reads will be stored here.
            if nest.is_sequence(query):  # If the query is a tuple, flatten it.
                query_list = nest.flatten(query)
                for q in query_list:  # Check that ndims == 2 if specified.
                    ndims = q.get_shape().ndims
                    if ndims:
                        assert ndims == 2
                query = array_ops.concat(query_list, 1)
            for a in xrange(num_heads):
                with variable_scope.variable_scope("Attention_%d" % a,
                                                   dtype=dtype):
                    attention_vec_size = attn_size  # Size of query vectors for attention.
                    # to calucate wp * ht
                    v_p = variable_scope.get_variable("AttnV_p%d" % a,
                                                      [attention_vec_size])
                    qiu = linear(query, attention_vec_size, True)
                    qiu = array_ops.reshape(qiu,
                                            [-1, 1, 1, attention_vec_size])
                    tan_v = math_ops.reduce_sum(v_p * math_ops.tanh(qiu),
                                                [2, 3])
                    # print(tan_v.get_shape())
                    pt_sig = math_ops.sigmoid(tan_v)
                    # print(pt_sig.get_shape())
                    p = attn_length * pt_sig
                    # print(p.get_shape())
                    # p_t = (array_ops.reshape(p, [-1, attn_length]))
                    p_t = math_ops.cast(p, dtype=dtypes.int32)
                    p_t = math_ops.cast(p_t, dtype=dtypes.float32)
                    # print(p_t.get_shape())
                    # print(4)

                    # To calculate W1 * hi we use a 1-by-1 convolution, need to reshape before.
                    hidden = array_ops.reshape(attention_states,
                                               [-1, attn_length, 1, attn_size])
                    k = variable_scope.get_variable(
                        "AttnW_%d" % a, [1, 1, attn_size, attention_vec_size])
                    hidden_features = nn_ops.conv2d(hidden, k, [1, 1, 1, 1],
                                                    "SAME")
                    v = variable_scope.get_variable("AttnV_%d" % a,
                                                    [attention_vec_size])

                with variable_scope.variable_scope("Attention_l_%d" % a,
                                                   dtype=dtype):
                    # w2 * ht
                    y = linear(query, attention_vec_size, True)
                    y = array_ops.reshape(y, [-1, 1, 1, attention_vec_size])
                    # Attention mask is a softmax of v^T * tanh(...).
                    s = math_ops.reduce_sum(
                        v * math_ops.tanh(hidden_features + y), [2, 3])
                    ai = nn_ops.softmax(s)
                    ai = tf.reshape(ai, [-1, attn_length, 1])
                    # print(5,ai.get_shape())

                    # do the p_t part
                    extent = tf.ones([1, attn_length], dtype=dtypes.float32)
                    p_t = p_t * extent
                    p_t = tf.reshape(p_t, [-1, attn_length, 1])
                    # print (p_t.get_shape())

                    pos = [i for i in xrange(attn_length)]
                    pos = tf.reshape(pos, [attn_length, 1])
                    pos = math_ops.cast(pos, dtype=dtypes.float32)
                    # print((p_t-pos).get_shape(),"jing")

                    value = math_ops.square(p_t - pos) * 2 / (attn_local_D *
                                                              attn_local_D)
                    pre = math_ops.exp(math_ops.negative(value))
                    # print(pre.get_shape(),"qiu")
                    ai = ai * pre

                    # Now calculate the attention-weighted vector d.
                    d = math_ops.reduce_sum(
                        array_ops.reshape(ai, [-1, attn_length, 1, 1]) *
                        hidden, [1, 2])
                    ds.append(array_ops.reshape(d, [-1, attn_size]))
            return ds