def GraphFn(self, x1, x2): x = x1 q = math_ops.abs(x) q = q + 1.0 q = gen_math_ops.exp(q) q = gen_math_ops.log(q) q = array_ops.squeeze(q, axis=-2) q = math_ops.abs(q) q = q + 2.2 q = gen_math_ops.sqrt(q) q = gen_math_ops.rsqrt(q) q = math_ops.negative(q) q = array_ops.squeeze(q, axis=3) q = math_ops.abs(q) q = q + 3.0 a = gen_math_ops.reciprocal(q) # this chain of operations has a batch size of 5, which is different from # the batch size for the other operations. x = constant_op.constant(np.random.randn(5, 8, 12), dtype=x.dtype) q = math_ops.abs(x) q = q + 2.0 q = gen_math_ops.exp(q) q = gen_math_ops.log(q) q = math_ops.abs(q) q = q + 2.1 q = gen_math_ops.sqrt(q) q = gen_math_ops.rsqrt(q) q = math_ops.negative(q) q = math_ops.abs(q) q = q + 4.0 b = gen_math_ops.reciprocal(q) # TODO(jie): this one will break, broadcasting on batch. x = x2 q = math_ops.abs(x) q = q + 5.0 q = gen_math_ops.exp(q) q = array_ops.squeeze(q, axis=[-1, -2, 3]) q = gen_math_ops.log(q) q = math_ops.abs(q) q = q + 5.1 q = gen_array_ops.reshape(q, [12, 5, 1, 1, 8, 1, 12]) q = array_ops.squeeze(q, axis=[5, 2, 3]) q = gen_math_ops.sqrt(q) q = math_ops.abs(q) q = q + 5.2 q = gen_math_ops.rsqrt(q) q = math_ops.negative(q) q = math_ops.abs(q) q = q + 5.3 c = gen_math_ops.reciprocal(q) q = a * b q = q / c return array_ops.squeeze(q, name="output_0")
def testCloneSplit(self): # a -> b -> c # \-> d g = ops.Graph() with g.as_default(): a = array_ops.constant(1., name="a") b = math_ops.exp(a, name="b") c = math_ops.log(b, name="c") d = math_ops.negative(b, name="d") b_new = array_ops.constant(math.e**2, name="b_new") d_new = array_ops.constant(-math.e**2, name="d_new") # case 1 d_out = meta_graph.clone(d, "copy1") self.assertEqual(d_out.name, "copy1/d:0") self.assertEqual(d_out.op.inputs[:], [b]) # case 2 copies = meta_graph.clone([c, d], "copy2") self.assertEqual(copies[0].op.inputs[:], [b]) self.assertEqual(copies[1].op.inputs[:], [b]) # case 3 copies = meta_graph.clone([c, d], "copy3", replace={b: b_new}) with self.test_session(use_gpu=True) as sess: c_out_, d_out_ = sess.run(copies) self.assertNear(c_out_, 2., 1e-6) self.assertNear(d_out_, -math.e**2, 1e-6) # case 4 c_out = meta_graph.clone(c, "copy4", replace={d: d_new}) self.assertEqual(c_out.op.inputs[:], [b]) with self.test_session(use_gpu=True) as sess: self.assertNear(sess.run(c_out), 1., 1e-6)
def testInitializerFunction(self): value = [[-42], [133.7]] shape = [2, 1] with self.test_session(): initializer = lambda: constant_op.constant(value) v1 = variables.Variable(initializer, dtype=dtypes.float32) self.assertEqual(shape, v1.get_shape()) self.assertAllClose(value, v1.initial_value.eval()) with self.assertRaises(errors_impl.FailedPreconditionError): v1.eval() v2 = variables.Variable(math_ops.negative(v1.initialized_value()), dtype=dtypes.float32) self.assertEqual(v1.get_shape(), v2.get_shape()) self.assertAllClose(np.negative(value), v2.initial_value.eval()) # Once v2.initial_value.eval() has been called, v1 has effectively been # initialized. self.assertAllClose(value, v1.eval()) with self.assertRaises(errors_impl.FailedPreconditionError): v2.eval() variables.global_variables_initializer().run() self.assertAllClose(np.negative(value), v2.eval())
def testDispatchForUnaryElementwiseAPIs(self): @dispatch.dispatch_for_unary_elementwise_apis(MaskedTensor) def unary_elementwise_api_handler(api_func, x): return MaskedTensor(api_func(x.values), x.mask) try: x = MaskedTensor([1, -2, -3], [True, True, False]) # Test calls with positional & keyword argument (& combinations) abs_x = math_ops.abs(x) sign_x = math_ops.sign(x=x) neg_x = math_ops.negative(x, "neg_x") invert_x = bitwise_ops.invert(x, name="invert_x") ones_like_x = array_ops.ones_like(x, name="ones_like_x") ones_like_x_float = array_ops.ones_like( x, dtypes.float32, name="ones_like_x_float") self.assertAllEqual(abs_x.values, [1, 2, 3]) self.assertAllEqual(sign_x.values, [1, -1, -1]) self.assertAllEqual(neg_x.values, [-1, 2, 3]) self.assertAllEqual(invert_x.values, [-2, 1, 2]) self.assertAllEqual(ones_like_x.values, [1, 1, 1]) self.assertAllEqual(ones_like_x_float.values, [1., 1., 1.]) for result in [ abs_x, sign_x, neg_x, invert_x, ones_like_x, ones_like_x_float ]: self.assertAllEqual(result.mask, [True, True, False]) if not context.executing_eagerly(): # names not defined in eager mode. self.assertRegex(neg_x.values.name, r"^neg_x/Neg:.*") self.assertRegex(invert_x.values.name, r"^invert_x/.*") self.assertRegex(ones_like_x.values.name, r"^ones_like_x/.*") self.assertRegex(ones_like_x_float.values.name, r"^ones_like_x_float/.*") finally: dispatch.unregister_elementwise_api_handler(unary_elementwise_api_handler)
def testSideEffect(self): a = constant_op.constant(1) b = constant_op.constant(1) c = math_ops.add(a, b) with ops.control_dependencies([c]): d = constant_op.constant(42) n = math_ops.negative(c) shared = [] def sub(t): shared.append(t) return t c0 = c self.assertTrue(c0.op in d.op.control_inputs) c = subscribe.subscribe(c, lambda t: script_ops.py_func(sub, [t], [t.dtype])) # Verify that control dependencies are correctly moved to the subscription. self.assertFalse(c0.op in d.op.control_inputs) self.assertTrue(c.op in d.op.control_inputs) with self.cached_session() as sess: c_out = self.evaluate([c]) n_out = self.evaluate([n]) d_out = self.evaluate([d]) self.assertEqual(n_out, [-2]) self.assertEqual(c_out, [2]) self.assertEqual(d_out, [42]) self.assertEqual(shared, [2, 2, 2])
def testInitializerFunction(self): value = [[-42], [133.7]] shape = [2, 1] with self.test_session(): initializer = lambda: constant_op.constant(value) v1 = variables.Variable(initializer, dtype=dtypes.float32) self.assertEqual(shape, v1.get_shape()) self.assertEqual(shape, v1.shape) self.assertAllClose(value, v1.initial_value.eval()) with self.assertRaises(errors_impl.FailedPreconditionError): v1.eval() v2 = variables.Variable( math_ops.negative(v1.initialized_value()), dtype=dtypes.float32) self.assertEqual(v1.get_shape(), v2.get_shape()) self.assertEqual(v1.shape, v2.shape) self.assertAllClose(np.negative(value), v2.initial_value.eval()) # Once v2.initial_value.eval() has been called, v1 has effectively been # initialized. self.assertAllClose(value, v1.eval()) with self.assertRaises(errors_impl.FailedPreconditionError): v2.eval() variables.global_variables_initializer().run() self.assertAllClose(np.negative(value), v2.eval())
def testSideEffect(self): a = constant_op.constant(1) b = constant_op.constant(1) c = math_ops.add(a, b) with ops.control_dependencies([c]): d = constant_op.constant(42) n = math_ops.negative(c) shared = [] def sub(t): shared.append(t) return t c = subscribe.subscribe(c, lambda t: script_ops.py_func(sub, [t], [t.dtype])) with self.test_session() as sess: c_out = sess.run([c]) n_out = sess.run([n]) d_out = sess.run([d]) self.assertEquals(n_out, [-2]) self.assertEquals(c_out, [2]) self.assertEquals(d_out, [42]) self.assertEquals(shared, [2, 2, 2])
def testSideEffect(self): a = constant_op.constant(1) b = constant_op.constant(1) c = math_ops.add(a, b) with ops.control_dependencies([c]): d = constant_op.constant(42) n = math_ops.negative(c) shared = [] def sub(t): shared.append(t) return t c0 = c self.assertTrue(c0.op in d.op.control_inputs) c = subscribe.subscribe(c, lambda t: script_ops.py_func(sub, [t], [t.dtype])) # Verify that control dependencies are correctly moved to the subscription. self.assertFalse(c0.op in d.op.control_inputs) self.assertTrue(c.op in d.op.control_inputs) with self.cached_session() as sess: c_out = sess.run([c]) n_out = sess.run([n]) d_out = sess.run([d]) self.assertEqual(n_out, [-2]) self.assertEqual(c_out, [2]) self.assertEqual(d_out, [42]) self.assertEqual(shared, [2, 2, 2])
def setUp(self): self.a = variables.VariableV1(2.0, name="a") self.b = variables.VariableV1(3.0, name="b") self.c = math_ops.multiply(self.a, self.b, name="c") # Should be 6.0. self.d = math_ops.multiply(self.a, self.a, name="d") # Should be 4.0. self.e = math_ops.multiply(self.d, self.c, name="e") # Should be 24.0. self.f_y = constant_op.constant(0.30, name="f_y") self.f = math_ops.div(self.b, self.f_y, name="f") # Should be 10.0. # The there nodes x, y and z form a graph with "cross-links" in. I.e., x # and y are both direct inputs to z, but x is also a direct input to y. self.x = variables.VariableV1(2.0, name="x") # Should be 2.0 self.y = math_ops.negative(self.x, name="y") # Should be -2.0. self.z = math_ops.multiply(self.x, self.y, name="z") # Should be -4.0. rewriter_config = rewriter_config_pb2.RewriterConfig( disable_model_pruning=True, arithmetic_optimization=rewriter_config_pb2.RewriterConfig.OFF, constant_folding=rewriter_config_pb2.RewriterConfig.OFF) graph_options = config_pb2.GraphOptions(rewrite_options=rewriter_config) config = config_pb2.ConfigProto(graph_options=graph_options) self.sess = session.Session(config=config) self.sess.run(variables.global_variables_initializer())
def testSideEffect(self): a = constant_op.constant(1) b = constant_op.constant(1) c = math_ops.add(a, b) with ops.control_dependencies([c]): d = constant_op.constant(42) n = math_ops.negative(c) shared = [] def sub(t): shared.append(t) return t c = subscribe.subscribe( c, lambda t: script_ops.py_func(sub, [t], [t.dtype])) with self.test_session() as sess: c_out = sess.run([c]) n_out = sess.run([n]) d_out = sess.run([d]) self.assertEquals(n_out, [-2]) self.assertEquals(c_out, [2]) self.assertEquals(d_out, [42]) self.assertEquals(shared, [2, 2, 2])
def wrong_outputs_callback(op_type, inputs, attrs, outputs, op_name=None, graph=None): del op_type, inputs, attrs, op_name, graph # Unused. return outputs[0], math_ops.negative(outputs[0])
def decayed_lr(): """Helper to recompute learning rate; most helpful in eager-mode.""" global_step_recomp = math_ops.cast(global_step, dtype) p = global_step_recomp / decay_steps if staircase: p = math_ops.floor(p) exponent = math_ops.exp( math_ops.multiply(math_ops.negative(decay_rate), p)) return math_ops.multiply(learning_rate, exponent, name=name)
def testTransposeNegate2(self): with ops.device("/device:IPU:0"): with session_lib.Session() as sess: pa = array_ops.placeholder(np.float32, [2, 2, 3], name="a") a = array_ops.transpose(pa, [1, 2, 0]) b = math_ops.negative(a) sess.run(variables.global_variables_initializer()) fd = {pa: [[[1, 2, 3], [3, 4, 5]], [[5, 6, 7], [7, 8, 9]]]} result = sess.run(b, fd) self.assertAllClose(result, [[[-1, -5], [-2, -6], [-3, -7]], [[-3, -7], [-4, -8], [-5, -9]]])
def _FloorModGrad(op, grad): """Returns grad * (1, -floor(x/y)).""" x = math_ops.conj(op.inputs[0]) y = math_ops.conj(op.inputs[1]) sx = array_ops.shape(x) sy = array_ops.shape(y) rx, ry = gen_array_ops.broadcast_gradient_args(sx, sy) floor_xy = math_ops.floor_div(x, y) gx = array_ops.reshape(math_ops.reduce_sum(grad, rx), sx) gy = array_ops.reshape( math_ops.reduce_sum(grad * math_ops.negative(floor_xy), ry), sy) return gx, gy
def _XDivyGrad(op, grad): """Returns gradient of xdivy(x, y) with respect to x and y.""" x = op.inputs[0] y = op.inputs[1] sx = array_ops.shape(x) sy = array_ops.shape(y) rx, ry = gen_array_ops.broadcast_gradient_args(sx, sy) with ops.control_dependencies([grad]): not_zero_x = math_ops.cast( math_ops.not_equal(x, math_ops.cast(0., dtype=x.dtype)), dtype=x.dtype) partial_x = gen_math_ops.xdivy(not_zero_x, y) partial_y = gen_math_ops.xdivy(math_ops.negative(x), y**2) return (array_ops.reshape(math_ops.reduce_sum(partial_x * grad, rx), sx), array_ops.reshape(math_ops.reduce_sum(partial_y * grad, ry), sy))
def _createGraph(self): """Create graph for testing. Returns: Python Graph object. """ with ops.Graph().as_default() as graph: with ops.device("/job:worker/task:0/cpu:0"): self.a = variables.VariableV1(10.0, name="a") self.b = variables.VariableV1(100.0, name="b") self.inc_a = state_ops.assign_add(self.a, 2.0, name="inc_a") self.dec_b = state_ops.assign_add(self.b, -5.0, name="dec_b") self.p = math_ops.multiply(self.inc_a, self.dec_b, name="p") self.q = math_ops.negative(self.p, name="q") return graph
def decayed_lr(learning_rate, global_step, decay_steps, decay_rate, staircase, name): """Helper to recompute learning rate; most helpful in eager-mode.""" with ops.name_scope(name, "NaturalExpDecay", [learning_rate, global_step, decay_rate]) as name: learning_rate = ops.convert_to_tensor(learning_rate, name="learning_rate") dtype = learning_rate.dtype decay_steps = math_ops.cast(decay_steps, dtype) decay_rate = math_ops.cast(decay_rate, dtype) global_step_recomp = math_ops.cast(global_step, dtype) p = global_step_recomp / decay_steps if staircase: p = math_ops.floor(p) exponent = math_ops.exp( math_ops.multiply(math_ops.negative(decay_rate), p)) return math_ops.multiply(learning_rate, exponent, name=name)
def testGetBackwardOpsSplit(self): # a -> b -> c # \-> d a = array_ops.placeholder(dtypes.float32) b = math_ops.exp(a) c = math_ops.log(b) d = math_ops.negative(b) self.assertEqual(meta_graph._get_backward_ops([d]), [a.op, b.op, d.op]) self.assertEqual(meta_graph._get_backward_ops([c]), [a.op, b.op, c.op]) self.assertEqual(meta_graph._get_backward_ops([c, d]), [a.op, b.op, c.op, d.op]) self.assertEqual(meta_graph._get_backward_ops([b, d]), [a.op, b.op, d.op]) self.assertEqual(meta_graph._get_backward_ops([a, d]), [a.op, b.op, d.op]) self.assertEqual(meta_graph._get_backward_ops([c, d], as_inputs=[b]), [c.op, d.op]) self.assertEqual(meta_graph._get_backward_ops([c], as_inputs=[d]), [a.op, b.op, c.op])
def setUp(self): self.a = variables.Variable(2.0, name="a") self.b = variables.Variable(3.0, name="b") self.c = math_ops.multiply(self.a, self.b, name="c") # Should be 6.0. self.d = math_ops.multiply(self.a, self.a, name="d") # Should be 4.0. self.e = math_ops.multiply(self.d, self.c, name="e") # Should be 24.0. self.f_y = constant_op.constant(0.30, name="f_y") self.f = math_ops.div(self.b, self.f_y, name="f") # Should be 10.0. # The there nodes x, y and z form a graph with "cross-links" in. I.e., x # and y are both direct inputs to z, but x is also a direct input to y. self.x = variables.Variable(2.0, name="x") # Should be 2.0 self.y = math_ops.negative(self.x, name="y") # Should be -2.0. self.z = math_ops.multiply(self.x, self.y, name="z") # Should be -4.0. self.sess = session.Session() self.sess.run(variables.global_variables_initializer())
def testCloneBridge(self): # a -> b -> c -> d -> e # \ --- / g = ops.Graph() with g.as_default(): a = array_ops.constant([2], dtype=dtypes.int32, name='a') b = array_ops.identity(a, name='b') c = math_ops.negative(b, name='c') d = array_ops.tile(c, b, name='d') e = math_ops.square(d, name='e') a_new = array_ops.constant([3], dtype=dtypes.int32, name='a_new') b_new = array_ops.constant([4], dtype=dtypes.int32, name='b_new') c_new = array_ops.constant([5], dtype=dtypes.int32, name='c_new') d_new = array_ops.constant([5, 5, 5], name='d_new') # case 1 copies = meta_graph.clone([d, e], "copy1", replace={ a: a_new, c: c_new }) with self.test_session(use_gpu=True) as sess: d_out_, e_out_ = sess.run(copies) self.assertAllClose(d_out_, np.array([5, 5, 5])) self.assertAllClose(e_out_, np.array([25, 25, 25])) # case 2 copies = meta_graph.clone([c, e], "copy2", replace={ a: a_new, b: b_new, d: d_new }) with self.test_session(use_gpu=True) as sess: c_out_, e_out_ = sess.run(copies) self.assertAllClose(c_out_, [-4]) self.assertAllClose(e_out_, np.array([25, 25, 25]))
def testInitializerFunction(self): value = [[-42], [133.7]] shape = [2, 1] with self.cached_session(): initializer = lambda: constant_op.constant(value) v1 = variables.Variable(initializer, dtype=dtypes.float32) self.assertEqual(shape, v1.get_shape()) self.assertEqual(shape, v1.shape) self.assertAllClose(value, v1.initial_value.eval()) with self.assertRaises(errors_impl.FailedPreconditionError): self.evaluate(v1) v2 = variables.Variable( math_ops.negative(v1.initialized_value()), dtype=dtypes.float32) self.assertEqual(v1.get_shape(), v2.get_shape()) self.assertEqual(v1.shape, v2.shape) self.assertAllClose(np.negative(value), v2.initial_value.eval()) with self.assertRaises(errors_impl.FailedPreconditionError): self.evaluate(v2) variables.global_variables_initializer().run() self.assertAllClose(np.negative(value), self.evaluate(v2))
def testInitializerFunction(self): value = [[-42], [133.7]] shape = [2, 1] with self.cached_session(): initializer = lambda: constant_op.constant(value) v1 = variables.Variable(initializer, dtype=dtypes.float32) self.assertEqual(shape, v1.get_shape()) self.assertEqual(shape, v1.shape) self.assertAllClose(value, self.evaluate(v1.initial_value)) with self.assertRaises(errors_impl.FailedPreconditionError): self.evaluate(v1) v2 = variables.Variable( math_ops.negative(v1.initialized_value()), dtype=dtypes.float32) self.assertEqual(v1.get_shape(), v2.get_shape()) self.assertEqual(v1.shape, v2.shape) self.assertAllClose(np.negative(value), self.evaluate(v2.initial_value)) with self.assertRaises(errors_impl.FailedPreconditionError): self.evaluate(v2) self.evaluate(variables.global_variables_initializer()) self.assertAllClose(np.negative(value), self.evaluate(v2))
def training_loss(self, features, labels, name='training_loss'): return math_ops.negative(self.average_size(), name=name)
def natural_exp_decay(learning_rate, global_step, decay_steps, decay_rate, staircase=False, name=None): """Applies natural exponential decay to the initial learning rate. When training a model, it is often recommended to lower the learning rate as the training progresses. This function applies an exponential decay function to a provided initial learning rate. It requires an `global_step` value to compute the decayed learning rate. You can just pass a TensorFlow variable that you increment at each training step. The function returns the decayed learning rate. It is computed as: ```python decayed_learning_rate = learning_rate * exp(-decay_rate * global_step) ``` Example: decay exponentially with a base of 0.96: ```python ... global_step = tf.Variable(0, trainable=False) learning_rate = 0.1 k = 0.5 learning_rate = tf.train.exponential_time_decay(learning_rate, global_step, k) # Passing global_step to minimize() will increment it at each step. learning_step = ( tf.train.GradientDescentOptimizer(learning_rate) .minimize(...my loss..., global_step=global_step) ) ``` Args: learning_rate: A scalar `float32` or `float64` `Tensor` or a Python number. The initial learning rate. global_step: A Python number. Global step to use for the decay computation. Must not be negative. decay_steps: How often to apply decay. decay_rate: A Python number. The decay rate. staircase: Whether to apply decay in a discrete staircase, as opposed to continuous, fashion. name: String. Optional name of the operation. Defaults to 'ExponentialTimeDecay'. Returns: A scalar `Tensor` of the same type as `learning_rate`. The decayed learning rate. Raises: ValueError: if `global_step` is not supplied. """ if global_step is None: raise ValueError("global_step is required for natural_exp_decay.") with ops.name_scope(name, "NaturalExpDecay", [learning_rate, global_step, decay_rate]) as name: learning_rate = ops.convert_to_tensor(learning_rate, name="learning_rate") dtype = learning_rate.dtype global_step = math_ops.cast(global_step, dtype) decay_steps = math_ops.cast(decay_steps, dtype) decay_rate = math_ops.cast(decay_rate, dtype) p = global_step / decay_steps if staircase: p = math_ops.floor(p) exponent = math_ops.exp( math_ops.multiply(math_ops.negative(decay_rate), p)) return math_ops.multiply(learning_rate, exponent, name=name)
def natural_exp_decay(learning_rate, global_step, decay_steps, decay_rate, staircase=False, name=None): """Applies natural exponential decay to the initial learning rate. When training a model, it is often recommended to lower the learning rate as the training progresses. This function applies an exponential decay function to a provided initial learning rate. It requires an `global_step` value to compute the decayed learning rate. You can just pass a TensorFlow variable that you increment at each training step. The function returns the decayed learning rate. It is computed as: ```python decayed_learning_rate = learning_rate * exp(-decay_rate * global_step / decay_step) ``` or, if `staircase` is `True`, as: ```python decayed_learning_rate = learning_rate * exp(-decay_rate * floor(global_step / decay_step)) ``` Example: decay exponentially with a base of 0.96: ```python ... global_step = tf.Variable(0, trainable=False) learning_rate = 0.1 decay_steps = 5 k = 0.5 learning_rate = tf.compat.v1.train.natural_exp_decay(learning_rate, global_step, decay_steps, k) # Passing global_step to minimize() will increment it at each step. learning_step = ( tf.compat.v1.train.GradientDescentOptimizer(learning_rate) .minimize(...my loss..., global_step=global_step) ) ``` Args: learning_rate: A scalar `float32` or `float64` `Tensor` or a Python number. The initial learning rate. global_step: A Python number. Global step to use for the decay computation. Must not be negative. decay_steps: How often to apply decay. decay_rate: A Python number. The decay rate. staircase: Whether to apply decay in a discrete staircase, as opposed to continuous, fashion. name: String. Optional name of the operation. Defaults to 'ExponentialTimeDecay'. Returns: A scalar `Tensor` of the same type as `learning_rate`. The decayed learning rate. Raises: ValueError: if `global_step` is not supplied. @compatibility(eager) When eager execution is enabled, this function returns a function which in turn returns the decayed learning rate Tensor. This can be useful for changing the learning rate value across different invocations of optimizer functions. @end_compatibility """ natural_exp_rate = math_ops.exp(math_ops.negative(decay_rate)) decayed_lr = learning_rate_schedule.ExponentialDecay(learning_rate, decay_steps, natural_exp_rate, staircase=staircase, name=name) if not context.executing_eagerly(): decayed_lr = decayed_lr(global_step) else: decayed_lr = functools.partial(decayed_lr, global_step) return decayed_lr
def GetParams(self): """Test for unary operations in TF-TRT.""" dtype = dtypes.float32 input_name = "input" input_dims = [12, 5, 8, 1, 1, 12] input2_name = "input_2" input2_dims = [12, 5, 8, 1, 12, 1, 1] g = ops.Graph() with g.as_default(): x = array_ops.placeholder(dtype=dtype, shape=input_dims, name=input_name) q = math_ops.abs(x) q = q + 1.0 q = gen_math_ops.exp(q) q = gen_math_ops.log(q) q = array_ops.squeeze(q, axis=-2) q = math_ops.abs(q) q = q + 2.2 q = gen_math_ops.sqrt(q) q = gen_math_ops.rsqrt(q) q = math_ops.negative(q) q = array_ops.squeeze(q, axis=3) q = math_ops.abs(q) q = q + 3.0 a = gen_math_ops.reciprocal(q) x = constant_op.constant(np.random.randn(5, 8, 12), dtype=dtype) q = math_ops.abs(x) q = q + 2.0 q = gen_math_ops.exp(q) q = gen_math_ops.log(q) q = math_ops.abs(q) q = q + 2.1 q = gen_math_ops.sqrt(q) q = gen_math_ops.rsqrt(q) q = math_ops.negative(q) q = math_ops.abs(q) q = q + 4.0 b = gen_math_ops.reciprocal(q) # TODO(jie): this one will break, broadcasting on batch. x = array_ops.placeholder( dtype=dtype, shape=input2_dims, name=input2_name) q = math_ops.abs(x) q = q + 5.0 q = gen_math_ops.exp(q) q = array_ops.squeeze(q, axis=[-1, -2, 3]) q = gen_math_ops.log(q) q = math_ops.abs(q) q = q + 5.1 q = gen_array_ops.reshape(q, [12, 5, 1, 1, 8, 1, 12]) q = array_ops.squeeze(q, axis=[5, 2, 3]) q = gen_math_ops.sqrt(q) q = math_ops.abs(q) q = q + 5.2 q = gen_math_ops.rsqrt(q) q = math_ops.negative(q) q = math_ops.abs(q) q = q + 5.3 c = gen_math_ops.reciprocal(q) q = a * b q = q / c array_ops.squeeze(q, name=self.output_name) return trt_test.TfTrtIntegrationTestParams( gdef=g.as_graph_def(), input_names=[input_name, input2_name], input_dims=[input_dims, input2_dims], num_expected_engines=5, expected_output_dims=(12, 5, 8, 12), allclose_atol=1.e-03, allclose_rtol=1.e-03)
def attention(query, ): """Put attention masks on hidden using hidden_features and query.""" ds = [] # Results of attention reads will be stored here. if nest.is_sequence(query): # If the query is a tuple, flatten it. query_list = nest.flatten(query) for q in query_list: # Check that ndims == 2 if specified. ndims = q.get_shape().ndims if ndims: assert ndims == 2 query = array_ops.concat(query_list, 1) with variable_scope.variable_scope("Attention_%d" % a, dtype=dtype): attention_vec_size = attn_size # Size of query vectors for attention. # to calucate wp * ht v_p = variable_scope.get_variable("AttnV_p%d" % a, [attention_vec_size]) qiu = linear(query, attention_vec_size, True) qiu = array_ops.reshape(qiu, [batch_size, 1, 1, attention_vec_size]) tan_v = math_ops.reduce_sum(v_p * math_ops.tanh(qiu), [2, 3]) # print(tan_v.get_shape()) pt_sig = math_ops.sigmoid(tan_v) # print(pt_sig.get_shape()) p = attn_length * pt_sig # print(p.get_shape()) # p_t = (array_ops.reshape(p, [-1, attn_length])) p_t = math_ops.cast(p, dtype=dtypes.int32) p_t = math_ops.cast(p_t, dtype=dtypes.float32) # print(p_t.get_shape()) # print(4) # p_t=tf.convert_to_tensor(p_t) # print(p_t.shape, attention_states.shape) # set a window p_t = array_ops.reshape(p_t, [batch_size, ]) attention_states_windows = [] D = attn_local_D for i in range(attention_states.shape[0]): x = tf.constant(D, dtype=dtypes.float32) y = math_ops.cast(p_t[i], dtype=dtypes.float32) z = tf.constant(attn_length, dtype=dtypes.float32) def f1(): return tf.constant(0, dtype=dtypes.int32), math_ops.cast(D - p_t[i], dtype=dtypes.int32) def f2(): return math_ops.cast(p_t[i] - D, dtype=dtypes.int32), tf.constant(0, dtype=dtypes.int32) def f3(): return tf.constant(attn_length, dtype=dtypes.int32), math_ops.cast( p_t[i] + D + 1 - attn_length, dtype=dtypes.int32) def f4(): return math_ops.cast(p_t[i] + D + 1, dtype=dtypes.int32), tf.constant(0, dtype=dtypes.int32) begin, pre_num = tf.cond(tf.less(x, y), f2, f1) end, last_num = tf.cond(tf.less(y + D + 1, z), f4, f3) d = tf.constant(attn_fixed_length, dtype=dtypes.int32) # num = tf.cond(tf.less(end - begin, d), f5, f6) pre_tmp = tf.zeros([pre_num, attention_vec_size], dtype=dtypes.float32) last_tmp = tf.zeros([last_num, attention_vec_size], dtype=dtypes.float32) # tmp = tf.zeros([num, attention_vec_size], dtype=dtypes.float32) attention_states_window = math_ops.cast(attention_states[i][begin:end], dtype=dtypes.float32) attention_states_window = tf.concat([pre_tmp, attention_states_window], 0) attention_states_window = tf.concat([attention_states_window, last_tmp], 0) attention_states_window = tf.expand_dims(attention_states_window, 0) attention_states_windows.append(attention_states_window) attention_states_windows = tf.concat(attention_states_windows, 0) attention_states_windows = array_ops.reshape(attention_states_windows, [batch_size, attn_fixed_length, attention_vec_size]) # print(attention_states_windows.shape) # To calculate W1 * hi we use a 1-by-1 convolution, need to reshape before. hidden = array_ops.reshape(attention_states_windows, [batch_size, attn_fixed_length, 1, attn_size]) k = variable_scope.get_variable("AttnW_%d" % a, [1, 1, attn_size, attention_vec_size]) hidden_features = nn_ops.conv2d(hidden, k, [1, 1, 1, 1], "SAME") v = variable_scope.get_variable("AttnV_%d" % a, [attention_vec_size]) with variable_scope.variable_scope("Attention_l_%d" % a, dtype=dtype): # w2 * ht y = linear(query, attention_vec_size, True) y = array_ops.reshape(y, [batch_size, 1, 1, attention_vec_size]) # Attention mask is a softmax of v^T * tanh(...). s = math_ops.reduce_sum(v * math_ops.tanh(hidden_features + y), [2, 3]) ai = nn_ops.softmax(s) ai = tf.reshape(ai, [batch_size, attn_fixed_length, 1]) # print(5,ai.get_shape()) # do the p_t part center = tf.constant(D, dtype=dtypes.float32, shape=[batch_size, 1]) extent = tf.ones([1, attn_fixed_length], dtype=dtypes.float32) center = center * extent center = tf.reshape(center, [batch_size, attn_fixed_length, 1]) pos = [i for i in xrange(attn_fixed_length)] pos = tf.reshape(pos, [attn_fixed_length, 1]) pos = math_ops.cast(pos, dtype=dtypes.float32) # print((p_t - pos).get_shape(), "jing") value = math_ops.square(center - pos) * 2 / (D * D) pre = math_ops.exp(math_ops.negative(value)) # print(pre.get_shape(),"qiu") ai = ai * pre # Now calculate the attention-weighted vector d. d = math_ops.reduce_sum( array_ops.reshape(ai, [batch_size, attn_fixed_length, 1, 1]) * hidden, [1, 2]) ds.append(array_ops.reshape(d, [batch_size, attn_size])) return ds
def natural_exp_decay(learning_rate, global_step, decay_steps, decay_rate, staircase=False, name=None): """Applies natural exponential decay to the initial learning rate. When training a model, it is often recommended to lower the learning rate as the training progresses. This function applies an exponential decay function to a provided initial learning rate. It requires an `global_step` value to compute the decayed learning rate. You can just pass a TensorFlow variable that you increment at each training step. The function returns the decayed learning rate. It is computed as: ```python decayed_learning_rate = learning_rate * exp(-decay_rate * global_step / decay_step) ``` or, if `staircase` is `True`, as: ```python decayed_learning_rate = learning_rate * exp(-decay_rate * floor(global_step / decay_step)) ``` Example: decay exponentially with a base of 0.96: ```python ... global_step = tf.Variable(0, trainable=False) learning_rate = 0.1 decay_steps = 5 k = 0.5 learning_rate = tf.compat.v1.train.natural_exp_decay(learning_rate, global_step, decay_steps, k) # Passing global_step to minimize() will increment it at each step. learning_step = ( tf.compat.v1.train.GradientDescentOptimizer(learning_rate) .minimize(...my loss..., global_step=global_step) ) ``` Args: learning_rate: A scalar `float32` or `float64` `Tensor` or a Python number. The initial learning rate. global_step: A Python number. Global step to use for the decay computation. Must not be negative. decay_steps: How often to apply decay. decay_rate: A Python number. The decay rate. staircase: Whether to apply decay in a discrete staircase, as opposed to continuous, fashion. name: String. Optional name of the operation. Defaults to 'ExponentialTimeDecay'. Returns: A scalar `Tensor` of the same type as `learning_rate`. The decayed learning rate. Raises: ValueError: if `global_step` is not supplied. @compatibility(eager) When eager execution is enabled, this function returns a function which in turn returns the decayed learning rate Tensor. This can be useful for changing the learning rate value across different invocations of optimizer functions. @end_compatibility """ natural_exp_rate = math_ops.exp(math_ops.negative(decay_rate)) decayed_lr = learning_rate_schedule.ExponentialDecay( learning_rate, decay_steps, natural_exp_rate, staircase=staircase, name=name) if not context.executing_eagerly(): decayed_lr = decayed_lr(global_step) else: decayed_lr = functools.partial(decayed_lr, global_step) return decayed_lr
def called_member(self, a): return math_ops.negative(a)
def natural_exp_decay(learning_rate, global_step, decay_steps, decay_rate, staircase=False, name=None): """Applies natural exponential decay to the initial learning rate. When training a model, it is often recommended to lower the learning rate as the training progresses. This function applies an exponential decay function to a provided initial learning rate. It requires an `global_step` value to compute the decayed learning rate. You can just pass a TensorFlow variable that you increment at each training step. The function returns the decayed learning rate. It is computed as: ```python decayed_learning_rate = learning_rate * exp(-decay_rate * global_step) ``` Example: decay exponentially with a base of 0.96: ```python ... global_step = tf.Variable(0, trainable=False) learning_rate = 0.1 k = 0.5 learning_rate = tf.train.exponential_time_decay(learning_rate, global_step, k) # Passing global_step to minimize() will increment it at each step. learning_step = ( tf.train.GradientDescentOptimizer(learning_rate) .minimize(...my loss..., global_step=global_step) ) ``` Args: learning_rate: A scalar `float32` or `float64` `Tensor` or a Python number. The initial learning rate. global_step: A Python number. Global step to use for the decay computation. Must not be negative. decay_steps: How often to apply decay. decay_rate: A Python number. The decay rate. staircase: Whether to apply decay in a discrete staircase, as opposed to continuous, fashion. name: String. Optional name of the operation. Defaults to 'ExponentialTimeDecay'. Returns: A scalar `Tensor` of the same type as `learning_rate`. The decayed learning rate. Raises: ValueError: if `global_step` is not supplied. """ if global_step is None: raise ValueError("global_step is required for natural_exp_decay.") with ops.name_scope(name, "NaturalExpDecay", [learning_rate, global_step, decay_rate]) as name: learning_rate = ops.convert_to_tensor(learning_rate, name="learning_rate") dtype = learning_rate.dtype global_step = math_ops.cast(global_step, dtype) decay_steps = math_ops.cast(decay_steps, dtype) decay_rate = math_ops.cast(decay_rate, dtype) p = global_step / decay_steps if staircase: p = math_ops.floor(p) exponent = math_ops.exp(math_ops.multiply(math_ops.negative(decay_rate), p)) return math_ops.multiply(learning_rate, exponent, name=name)
def tfassert_eq(_): x = array_ops.placeholder(dtypes.int32, name='x_hold') y = array_ops.placeholder(dtypes.int32, name='y_hold') control_flow_ops.Assert( math_ops.equal(x, y), ['Expected x == y.'], name='assert_eq') math_ops.add(x, math_ops.negative(y), name='x_y_diff')
def tfassert_eq(_): x = array_ops.placeholder(dtypes.int32, name='x_hold') y = array_ops.placeholder(dtypes.int32, name='y_hold') control_flow_ops.Assert(math_ops.equal(x, y), ['Expected x == y.'], name='assert_eq') math_ops.add(x, math_ops.negative(y), name='x_y_diff')
def __neg__(self): return math_ops.negative(self)
def GetParams(self): """Test for unary operations in TF-TRT.""" dtype = dtypes.float32 input_name = "input" input_dims = [12, 5, 8, 1, 1, 12] output_name = "output" input2_name = "input_2" input2_dims = [12, 5, 8, 1, 12, 1, 1] g = ops.Graph() with g.as_default(): x = array_ops.placeholder(dtype=dtype, shape=input_dims, name=input_name) q = math_ops.abs(x) q = q + 1.0 q = gen_math_ops.exp(q) q = gen_math_ops.log(q) q = array_ops.squeeze(q, axis=-2) q = math_ops.abs(q) q = q + 2.2 q = gen_math_ops.sqrt(q) q = gen_math_ops.rsqrt(q) q = math_ops.negative(q) q = array_ops.squeeze(q, axis=3) q = math_ops.abs(q) q = q + 3.0 a = gen_math_ops.reciprocal(q) x = constant_op.constant(np.random.randn(5, 8, 12), dtype=dtype) q = math_ops.abs(x) q = q + 2.0 q = gen_math_ops.exp(q) q = gen_math_ops.log(q) q = math_ops.abs(q) q = q + 2.1 q = gen_math_ops.sqrt(q) q = gen_math_ops.rsqrt(q) q = math_ops.negative(q) q = math_ops.abs(q) q = q + 4.0 b = gen_math_ops.reciprocal(q) # TODO(jie): this one will break, broadcasting on batch. x = array_ops.placeholder( dtype=dtype, shape=input2_dims, name=input2_name) q = math_ops.abs(x) q = q + 5.0 q = gen_math_ops.exp(q) q = array_ops.squeeze(q, axis=[-1, -2, 3]) q = gen_math_ops.log(q) q = math_ops.abs(q) q = q + 5.1 q = gen_array_ops.reshape(q, [12, 5, 1, 1, 8, 1, 12]) q = array_ops.squeeze(q, axis=[5, 2, 3]) q = gen_math_ops.sqrt(q) q = math_ops.abs(q) q = q + 5.2 q = gen_math_ops.rsqrt(q) q = math_ops.negative(q) q = math_ops.abs(q) q = q + 5.3 c = gen_math_ops.reciprocal(q) q = a * b q = q / c array_ops.squeeze(q, name=output_name) return trt_test.TfTrtIntegrationTestParams( gdef=g.as_graph_def(), input_names=[input_name, input2_name], input_dims=[input_dims, input2_dims], output_names=[output_name], expected_output_dims=[(12, 5, 8, 12)])
def validation_loss(self, features, labels): return math_ops.negative(self.average_size())
def training_graph(self, input_data, input_labels, random_seed, data_spec, sparse_features=None, input_weights=None): """Constructs a TF graph for training a random tree. Args: input_data: A tensor or placeholder for input data. input_labels: A tensor or placeholder for labels associated with input_data. random_seed: The random number generator seed to use for this tree. 0 means use the current time as the seed. data_spec: A data_ops.TensorForestDataSpec object specifying the original feature/columns of the data. sparse_features: A tf.SparseTensor for sparse input data. input_weights: A float tensor or placeholder holding per-input weights, or None if all inputs are to be weighted equally. Returns: The last op in the random tree training graph. """ epoch = math_ops.to_int32(get_epoch_variable()) serialized_input_spec = data_spec.SerializeToString() if input_weights is None: input_weights = [] if input_data is None: input_data = [] sparse_indices = [] sparse_values = [] sparse_shape = [] if sparse_features is not None: sparse_indices = sparse_features.indices sparse_values = sparse_features.values sparse_shape = sparse_features.dense_shape # Count extremely random stats. (node_sums, node_squares, splits_indices, splits_sums, splits_squares, totals_indices, totals_sums, totals_squares, input_leaves) = (tensor_forest_ops.count_extremely_random_stats( input_data, sparse_indices, sparse_values, sparse_shape, input_labels, input_weights, self.variables.tree, self.variables.tree_thresholds, self.variables.node_to_accumulator_map, self.variables.candidate_split_features, self.variables.candidate_split_thresholds, self.variables.start_epoch, epoch, input_spec=serialized_input_spec, num_classes=self.params.num_output_columns, regression=self.params.regression)) node_update_ops = [] node_update_ops.append( state_ops.assign_add(self.variables.node_sums, node_sums)) splits_update_ops = [] splits_update_ops.append( tensor_forest_ops.scatter_add_ndim(self.variables.candidate_split_sums, splits_indices, splits_sums)) splits_update_ops.append( tensor_forest_ops.scatter_add_ndim(self.variables.accumulator_sums, totals_indices, totals_sums)) if self.params.regression: node_update_ops.append(state_ops.assign_add(self.variables.node_squares, node_squares)) splits_update_ops.append( tensor_forest_ops.scatter_add_ndim( self.variables.candidate_split_squares, splits_indices, splits_squares)) splits_update_ops.append( tensor_forest_ops.scatter_add_ndim(self.variables.accumulator_squares, totals_indices, totals_squares)) # Sample inputs. update_indices, feature_updates, threshold_updates = ( tensor_forest_ops.sample_inputs( input_data, sparse_indices, sparse_values, sparse_shape, input_weights, self.variables.node_to_accumulator_map, input_leaves, self.variables.candidate_split_features, self.variables.candidate_split_thresholds, input_spec=serialized_input_spec, split_initializations_per_input=( self.params.split_initializations_per_input), split_sampling_random_seed=random_seed)) update_features_op = state_ops.scatter_update( self.variables.candidate_split_features, update_indices, feature_updates) update_thresholds_op = state_ops.scatter_update( self.variables.candidate_split_thresholds, update_indices, threshold_updates) # Calculate finished nodes. with ops.control_dependencies(splits_update_ops): # Passing input_leaves to finished nodes here means that nodes that # have become stale won't be deallocated until an input reaches them, # because we're trying to avoid considering every fertile node for # performance reasons. finished, stale = tensor_forest_ops.finished_nodes( input_leaves, self.variables.node_to_accumulator_map, self.variables.candidate_split_sums, self.variables.candidate_split_squares, self.variables.accumulator_sums, self.variables.accumulator_squares, self.variables.start_epoch, epoch, num_split_after_samples=self.params.split_after_samples, min_split_samples=self.params.min_split_samples, dominate_method=self.params.dominate_method, dominate_fraction=self.params.dominate_fraction) # Update leaf scores. # TODO(thomaswc): Store the leaf scores in a TopN and only update the # scores of the leaves that were touched by this batch of input. children = array_ops.squeeze( array_ops.slice(self.variables.tree, [0, 0], [-1, 1]), squeeze_dims=[1]) is_leaf = math_ops.equal(constants.LEAF_NODE, children) leaves = math_ops.to_int32( array_ops.squeeze( array_ops.where(is_leaf), squeeze_dims=[1])) non_fertile_leaves = array_ops.boolean_mask( leaves, math_ops.less(array_ops.gather( self.variables.node_to_accumulator_map, leaves), 0)) # TODO(gilberth): It should be possible to limit the number of non # fertile leaves we calculate scores for, especially since we can only take # at most array_ops.shape(finished)[0] of them. with ops.control_dependencies(node_update_ops): sums = array_ops.gather(self.variables.node_sums, non_fertile_leaves) if self.params.regression: squares = array_ops.gather(self.variables.node_squares, non_fertile_leaves) non_fertile_leaf_scores = self._variance(sums, squares) else: non_fertile_leaf_scores = self._weighted_gini(sums) # Calculate best splits. with ops.control_dependencies(splits_update_ops): split_indices = tensor_forest_ops.best_splits( finished, self.variables.node_to_accumulator_map, self.variables.candidate_split_sums, self.variables.candidate_split_squares, self.variables.accumulator_sums, self.variables.accumulator_squares, regression=self.params.regression) # Grow tree. with ops.control_dependencies([update_features_op, update_thresholds_op, non_fertile_leaves.op]): (tree_update_indices, tree_children_updates, tree_threshold_updates, new_eot) = (tensor_forest_ops.grow_tree( self.variables.end_of_tree, self.variables.node_to_accumulator_map, finished, split_indices, self.variables.candidate_split_features, self.variables.candidate_split_thresholds)) tree_update_op = state_ops.scatter_update( self.variables.tree, tree_update_indices, tree_children_updates) thresholds_update_op = state_ops.scatter_update( self.variables.tree_thresholds, tree_update_indices, tree_threshold_updates) # TODO(thomaswc): Only update the epoch on the new leaves. new_epoch_updates = epoch * array_ops.ones_like(tree_threshold_updates, dtype=dtypes.int32) epoch_update_op = state_ops.scatter_update( self.variables.start_epoch, tree_update_indices, new_epoch_updates) # Update fertile slots. with ops.control_dependencies([tree_update_op]): (n2a_map_updates, a2n_map_updates, accumulators_cleared, accumulators_allocated) = (tensor_forest_ops.update_fertile_slots( finished, non_fertile_leaves, non_fertile_leaf_scores, self.variables.end_of_tree, self.variables.accumulator_sums, self.variables.node_to_accumulator_map, stale, self.variables.node_sums, regression=self.params.regression)) # Ensure end_of_tree doesn't get updated until UpdateFertileSlots has # used it to calculate new leaves. with ops.control_dependencies([n2a_map_updates.op]): eot_update_op = state_ops.assign(self.variables.end_of_tree, new_eot) updates = [] updates.append(eot_update_op) updates.append(tree_update_op) updates.append(thresholds_update_op) updates.append(epoch_update_op) updates.append( state_ops.scatter_update(self.variables.node_to_accumulator_map, n2a_map_updates[0], n2a_map_updates[1])) updates.append( state_ops.scatter_update(self.variables.accumulator_to_node_map, a2n_map_updates[0], a2n_map_updates[1])) cleared_and_allocated_accumulators = array_ops.concat( [accumulators_cleared, accumulators_allocated], 0) # Calculate values to put into scatter update for candidate counts. # Candidate split counts are always reset back to 0 for both cleared # and allocated accumulators. This means some accumulators might be doubly # reset to 0 if the were released and not allocated, then later allocated. split_values = array_ops.tile( array_ops.expand_dims(array_ops.expand_dims( array_ops.zeros_like(cleared_and_allocated_accumulators, dtype=dtypes.float32), 1), 2), [1, self.params.num_splits_to_consider, self.params.num_output_columns]) updates.append(state_ops.scatter_update( self.variables.candidate_split_sums, cleared_and_allocated_accumulators, split_values)) if self.params.regression: updates.append(state_ops.scatter_update( self.variables.candidate_split_squares, cleared_and_allocated_accumulators, split_values)) # Calculate values to put into scatter update for total counts. total_cleared = array_ops.tile( array_ops.expand_dims( math_ops.negative(array_ops.ones_like(accumulators_cleared, dtype=dtypes.float32)), 1), [1, self.params.num_output_columns]) total_reset = array_ops.tile( array_ops.expand_dims( array_ops.zeros_like(accumulators_allocated, dtype=dtypes.float32), 1), [1, self.params.num_output_columns]) accumulator_updates = array_ops.concat([total_cleared, total_reset], 0) updates.append(state_ops.scatter_update( self.variables.accumulator_sums, cleared_and_allocated_accumulators, accumulator_updates)) if self.params.regression: updates.append(state_ops.scatter_update( self.variables.accumulator_squares, cleared_and_allocated_accumulators, accumulator_updates)) # Calculate values to put into scatter update for candidate splits. split_features_updates = array_ops.tile( array_ops.expand_dims( math_ops.negative(array_ops.ones_like( cleared_and_allocated_accumulators)), 1), [1, self.params.num_splits_to_consider]) updates.append(state_ops.scatter_update( self.variables.candidate_split_features, cleared_and_allocated_accumulators, split_features_updates)) updates += self.finish_iteration() return control_flow_ops.group(*updates)
def __call__(self, position, query, delta=delta): batch_size = 32 # position = next_position(position, query, self._values, self._memory_sequence_length) # local_memory = get_local_matrix(self._memory, position, delta) # alignment_bah = bah_attend(self._query_layer(query), local_memory) # alignment_gauss = norm * tf.ones([self._batch_size, 1], tf.float32) # alignment = alignment_gauss * alignment_bah # expand_alignment = tf.expand_dims(alignment, 1) # context = tf.matmul(expand_alignment, local_memory) # context = tf.squeeze(context, 1) # return position, alignment, context """Put attention masks on hidden using hidden_features and query.""" position, l = next_position(position, query) attention_states_windows = [] D = delta attn_fixed_length = 2 * delta + 1 for i in range(batch_size): x = tf.constant(D, dtype=dtypes.float32) y = math_ops.cast(position[i], dtype=dtypes.float32) def f1(): return tf.constant(0, dtype=dtypes.int32), math_ops.cast( D - position[i] + 1, dtype=dtypes.int32) def f2(): return math_ops.cast(position[i] - D, dtype=dtypes.int32), tf.constant( 0, dtype=dtypes.int32) def f3(): return self._memory_sequence_length[i], math_ops.cast( position[i] + D + 2 - tf.cast(self._memory_sequence_length[i], tf.float32), dtype=dtypes.int32) def f4(): return math_ops.cast(position[i] + D + 1, dtype=dtypes.int32), tf.constant( 0, dtype=dtypes.int32) begin, pre_num = tf.cond(tf.less(x, y), f2, f1) end, last_num = tf.cond( y + D + 1 < tf.cast(self._memory_sequence_length[i], tf.float32), f4, f3) # num = tf.cond(tf.less(end - begin, d), f5, f6) pre_tmp = tf.zeros([pre_num, self._num_units], dtype=dtypes.float32) last_tmp = tf.zeros([last_num, self._num_units], dtype=dtypes.float32) # tmp = tf.zeros([num, attention_vec_size], dtype=dtypes.float32) attention_states_window = math_ops.cast(self._values[i][begin:end], dtype=dtypes.float32) attention_states_window = tf.concat( [pre_tmp, attention_states_window], 0) attention_states_window = tf.concat( [attention_states_window, last_tmp], 0) attention_states_window = attention_states_window[0:2 * delta + 1] attention_states_window = tf.expand_dims(attention_states_window, 0) attention_states_windows.append(attention_states_window) attention_states_windows = tf.concat(attention_states_windows, 0) attention_states_windows = array_ops.reshape( attention_states_windows, [batch_size, attn_fixed_length, self._num_units]) # print(attention_states_windows.shape) # To calculate W1 * hi we use a 1-by-1 convolution, need to reshape before. hidden_features = attention_states_windows v = variable_scope.get_variable("v", [self._num_units]) with variable_scope.variable_scope("Attention_l"): # w2 * ht y = self._query_layer(query) y = array_ops.reshape(y, [batch_size, 1, self._num_units]) # Attention mask is a softmax of v^T * tanh(...). s = math_ops.reduce_sum(v * math_ops.tanh(hidden_features + y), 2) ai = nn_ops.softmax(s) ai = tf.reshape(ai, [batch_size, attn_fixed_length, 1]) # print(5,ai.get_shape()) # do the p_t part center = tf.constant(D, dtype=dtypes.float32, shape=[batch_size, 1]) extent = tf.ones([1, attn_fixed_length], dtype=dtypes.float32) center = center * extent center = tf.reshape(center, [batch_size, attn_fixed_length, 1]) pos = [i for i in xrange(attn_fixed_length)] pos = tf.reshape(pos, [attn_fixed_length, 1]) pos = math_ops.cast(pos, dtype=dtypes.float32) # print((p_t - pos).get_shape(), "jing") value = math_ops.square(center - pos) * 2 / (D * D) pre = math_ops.exp(math_ops.negative(value)) # print(pre.get_shape(),"qiu") l = tf.reshape(l, [32, 1, 1]) ai = l * ai * pre # Now calculate the attention-weighted vector d. context = math_ops.reduce_sum(ai * hidden_features, 1) ai = tf.squeeze(ai) return position, ai, context
def attention(query): """Put attention masks on hidden using hidden_features and query.""" ds = [] # Results of attention reads will be stored here. if nest.is_sequence(query): # If the query is a tuple, flatten it. query_list = nest.flatten(query) for q in query_list: # Check that ndims == 2 if specified. ndims = q.get_shape().ndims if ndims: assert ndims == 2 query = array_ops.concat(query_list, 1) for a in xrange(num_heads): with variable_scope.variable_scope("Attention_%d" % a, dtype=dtype): attention_vec_size = attn_size # Size of query vectors for attention. # to calucate wp * ht v_p = variable_scope.get_variable("AttnV_p%d" % a, [attention_vec_size]) qiu = linear(query, attention_vec_size, True) qiu = array_ops.reshape(qiu, [-1, 1, 1, attention_vec_size]) tan_v = math_ops.reduce_sum(v_p * math_ops.tanh(qiu), [2, 3]) # print(tan_v.get_shape()) pt_sig = math_ops.sigmoid(tan_v) # print(pt_sig.get_shape()) p = attn_length * pt_sig # print(p.get_shape()) # p_t = (array_ops.reshape(p, [-1, attn_length])) p_t = math_ops.cast(p, dtype=dtypes.int32) p_t = math_ops.cast(p_t, dtype=dtypes.float32) # print(p_t.get_shape()) # print(4) # To calculate W1 * hi we use a 1-by-1 convolution, need to reshape before. hidden = array_ops.reshape(attention_states, [-1, attn_length, 1, attn_size]) k = variable_scope.get_variable( "AttnW_%d" % a, [1, 1, attn_size, attention_vec_size]) hidden_features = nn_ops.conv2d(hidden, k, [1, 1, 1, 1], "SAME") v = variable_scope.get_variable("AttnV_%d" % a, [attention_vec_size]) with variable_scope.variable_scope("Attention_l_%d" % a, dtype=dtype): # w2 * ht y = linear(query, attention_vec_size, True) y = array_ops.reshape(y, [-1, 1, 1, attention_vec_size]) # Attention mask is a softmax of v^T * tanh(...). s = math_ops.reduce_sum( v * math_ops.tanh(hidden_features + y), [2, 3]) ai = nn_ops.softmax(s) ai = tf.reshape(ai, [-1, attn_length, 1]) # print(5,ai.get_shape()) # do the p_t part extent = tf.ones([1, attn_length], dtype=dtypes.float32) p_t = p_t * extent p_t = tf.reshape(p_t, [-1, attn_length, 1]) # print (p_t.get_shape()) pos = [i for i in xrange(attn_length)] pos = tf.reshape(pos, [attn_length, 1]) pos = math_ops.cast(pos, dtype=dtypes.float32) # print((p_t-pos).get_shape(),"jing") value = math_ops.square(p_t - pos) * 2 / (attn_local_D * attn_local_D) pre = math_ops.exp(math_ops.negative(value)) # print(pre.get_shape(),"qiu") ai = ai * pre # Now calculate the attention-weighted vector d. d = math_ops.reduce_sum( array_ops.reshape(ai, [-1, attn_length, 1, 1]) * hidden, [1, 2]) ds.append(array_ops.reshape(d, [-1, attn_size])) return ds