def _testTensorArrayEvalEmptyWithDefault(self): with self.cached_session(), self.test_scope(): def fn(): ta = tensor_array_ops.TensorArray( dtype=dtypes.float32, size=0, infer_shape=True) size = ta.size() ta = ta.unstack(array_ops.zeros([0, 3, 5])) return [size, ta.stack()] [size, stack] = self.evaluate(xla.compile(fn)) self.assertEqual(0, size) self.assertAllEqual([0, 3, 5], stack.shape) # Concatenating zero tensors along their first dimension gives a # first dimension of zero if not control_flow_util.ENABLE_CONTROL_FLOW_V2: def fn(): ta = tensor_array_ops.TensorArray( dtype=dtypes.float32, size=0, infer_shape=True) ta = ta.unstack(array_ops.zeros([0, 3, 5])) return ta.concat() # TODO(b/122315751): Enable this. self.assertAllEqual([0, 5], self.evaluate(xla.compile(fn))[0].shape)
def testTensorArrayReadWrongIndexOrDataTypeFails(self): # Find two different floating point types, create an array of # the first type, but try to read the other type. if len(self.float_types) > 1: dtype1, dtype2 = list(self.float_types)[:2] with self.cached_session(), self.test_scope(): def fn(): ta = tensor_array_ops.TensorArray( dtype=dtype1, tensor_array_name="foo", size=3) w0 = ta.write(0, math_ops.cast([[4.0, 5.0]], dtype1)) # Test reading wrong datatype. return gen_data_flow_ops.tensor_array_read_v3( handle=w0.handle, index=0, dtype=dtype2, flow_in=w0.flow) with self.assertRaisesOpError("TensorArray dtype is "): self.evaluate(xla.compile(fn)) def fn(): ta = tensor_array_ops.TensorArray( dtype=dtype1, tensor_array_name="foo", size=3) w0 = ta.write(0, math_ops.cast([[4.0, 5.0]], dtype1)) # Test reading from a different index than the one we wrote to with ops.control_dependencies([w0.read(1)]): return 1.0 xla.compile(fn)[0].eval()
def _testTensorArraySplitRead(self, tf_dtype): with self.cached_session() as session, self.test_scope(): convert = _make_converter(tf_dtype) def fn(): ta = tensor_array_ops.TensorArray( dtype=tf_dtype, tensor_array_name="foo", size=3) # Split an empty vector. lengths = constant_op.constant([0, 0, 0]) w0 = ta.split(convert([]), lengths=lengths) r0 = w0.read(0) r1 = w0.read(1) r2 = w0.read(2) return [r0, r1, r2] d0, d1, d2 = self.evaluate(xla.compile(fn)) self.assertAllEqual(convert([]), d0) self.assertAllEqual(convert([]), d1) self.assertAllEqual(convert([]), d2) def fn(): # Split a vector. ta = tensor_array_ops.TensorArray( dtype=tf_dtype, tensor_array_name="foo", size=3) lengths = constant_op.constant([1, 1, 1]) w0 = ta.split(convert([1.0, 2.0, 3.0]), lengths=lengths) r0 = w0.read(0) r1 = w0.read(1) r2 = w0.read(2) return [r0, r1, r2] d0, d1, d2 = self.evaluate(xla.compile(fn)) self.assertAllEqual(convert([1.0]), d0) self.assertAllEqual(convert([2.0]), d1) self.assertAllEqual(convert([3.0]), d2) def fn(): # Split a matrix. ta = tensor_array_ops.TensorArray( dtype=tf_dtype, tensor_array_name="foo", size=3) lengths = constant_op.constant([1, 1, 1]) w0 = ta.split( convert([[1.0, 101.0], [2.0, 201.0], [3.0, 301.0]]), lengths=lengths) r0 = w0.read(0) r1 = w0.read(1) r2 = w0.read(2) return [r0, r1, r2] d0, d1, d2 = self.evaluate(xla.compile(fn)) self.assertAllEqual(convert([[1.0, 101.0]]), d0) self.assertAllEqual(convert([[2.0, 201.0]]), d1) self.assertAllEqual(convert([[3.0, 301.0]]), d2)
def test_xla_compile_eager(self): """Tests that xla.compile raises proper exception when used eagerly.""" def computation(): return 1 with self.assertRaisesRegexp( RuntimeError, 'xla.experimental.compile is not supported when eager ' 'execution is enabled. Try use it inside tf.function.'): xla.compile(computation)
def _testTensorArrayUnpackRead(self, tf_dtype): with self.cached_session() as session, self.test_scope(): convert = _make_converter(tf_dtype) def fn(): ta = tensor_array_ops.TensorArray( dtype=tf_dtype, tensor_array_name="foo", size=3) # Unpack a vector into scalars w0 = ta.unstack(convert([1.0, 2.0, 3.0])) r0 = w0.read(0) r1 = w0.read(1) r2 = w0.read(2) return [r0, r1, r2] d0, d1, d2 = self.evaluate(xla.compile(fn)) self.assertAllEqual(convert(1.0), d0) self.assertAllEqual(convert(2.0), d1) self.assertAllEqual(convert(3.0), d2) def fn(): ta = tensor_array_ops.TensorArray( dtype=tf_dtype, tensor_array_name="foo", size=3) # Unpack a matrix into vectors. w1 = ta.unstack(convert([[1.0, 1.1], [2.0, 2.1], [3.0, 3.1]])) r0 = w1.read(0) r1 = w1.read(1) r2 = w1.read(2) return [r0, r1, r2] d0, d1, d2 = self.evaluate(xla.compile(fn)) self.assertAllEqual(convert([1.0, 1.1]), d0) self.assertAllEqual(convert([2.0, 2.1]), d1) self.assertAllEqual(convert([3.0, 3.1]), d2) def fn(): # Reset ta because we're going to change the shape, else shape # inference will throw an error. ta = tensor_array_ops.TensorArray( dtype=tf_dtype, tensor_array_name="foo", size=3) # Try unpacking an empty matrix, which should not cause an error. w2 = ta.unstack(convert([[], [], []])) r0 = w2.read(0) r1 = w2.read(1) r2 = w2.read(2) return [r0, r1, r2] d0, d1, d2 = self.evaluate(xla.compile(fn)) self.assertAllEqual(convert([]), d0) self.assertAllEqual(convert([]), d1) self.assertAllEqual(convert([]), d2)
def testTensorArrayWriteWrongIndexOrDataTypeFails(self): with self.cached_session(), self.test_scope(): def fn(): ta = tensor_array_ops.TensorArray( dtype=dtypes.float32, tensor_array_name="foo", size=3) return ta.write(-1, np.int32(7)).flow # Test writing the wrong datatype. with self.assertRaisesOpError( "TensorArray dtype is float but op has dtype int32"): xla.compile(fn)[0].eval()
def _testTensorArrayEvalEmpty(self): with self.cached_session(), self.test_scope(): def fn(): ta = tensor_array_ops.TensorArray( dtype=dtypes.float32, size=0, infer_shape=False) return ta.stack() with self.assertRaisesWithPredicateMatch( errors.InvalidArgumentError, "Uninitialized TensorArray passed to " "TensorArrayStack/TensorArrayGatherV3"): xla.compile(fn)[0].eval()
def testSumOfTwoReadVariablesWithoutRepeatGrad(self): with self.cached_session() as session, self.test_scope(): g0 = -(np.arange(3 * 5, dtype=np.float32).reshape(3, 5) + 1) def fn(): a = array_ops.identity( np.arange(3 * 5, dtype=np.float32).reshape(3, 5) + 1) b = array_ops.identity( np.arange(3 * 5, dtype=np.float32).reshape(3, 5) + 1 + 3 * 5) ta = tensor_array_ops.TensorArray(dtype=dtypes.float32, size=2) ta = ta.write(0, a, name="write_a") ta = ta.write(1, b, name="write_b") c = ( ta.read(0, name="read_a_0") + # a + b ta.read(1, name="read_b_0")) grad_a = gradients_impl.gradients([c], [a], [g0])[0] # d(a+b)/da = 1 grad_b = gradients_impl.gradients([c], [b], [g0])[0] # d(a+b)/db = 1 return [grad_a, grad_b] grad_a, grad_b = xla.compile(fn) # Test gradients calculated individually grad_a_t, = self.evaluate([grad_a]) self.assertAllEqual(grad_a_t, g0) grad_b_t, = self.evaluate([grad_b]) self.assertAllEqual(grad_b_t, g0) # Test gradients calculated jointly. joint_grad_a_t, joint_grad_b_t = self.evaluate([grad_a, grad_b]) self.assertAllEqual(joint_grad_a_t, g0) self.assertAllEqual(joint_grad_b_t, g0)
def testTensorGradArrayWriteRead(self): with self.cached_session() as session, self.test_scope(): def fn(): ta = tensor_array_ops.TensorArray( dtype=dtypes.float32, tensor_array_name="foo", size=3) w0 = ta.write(0, [[4.0]]) w1 = w0.write(1, [[1.0]]) w2 = w1.write(2, [[-3.0]]) g_ta = w2.grad("grad") g_w0 = g_ta.write(0, [[5.0]]) g_w1 = g_w0.write(1, [[2.0]]) g_w2 = g_w1.write(2, [[-2.0]]) r0 = w2.read(0) r1 = w2.read(1) r2 = w2.read(2) g_r0 = g_w2.read(0) g_r1 = g_w2.read(1) g_r2 = g_w2.read(2) return [r0, r1, r2, g_r0, g_r1, g_r2] d0, d1, d2, g_d0, g_d1, g_d2 = self.evaluate(xla.compile(fn)) self.assertAllEqual([[4.0]], d0) self.assertAllEqual([[1.0]], d1) self.assertAllEqual([[-3.0]], d2) self.assertAllEqual([[5.0]], g_d0) self.assertAllEqual([[2.0]], g_d1) self.assertAllEqual([[-2.0]], g_d2)
def _testTensorArrayGradientUnpackRead(self): with self.cached_session() as session, self.test_scope(): def fn(): ta = tensor_array_ops.TensorArray( dtype=dtypes.float32, tensor_array_name="foo", size=2, clear_after_read=False) value = constant_op.constant([[1.0, -1.0], [10.0, -10.0]]) w = ta.unstack(value) r0 = w.read(0) r0_1 = w.read(0) r1 = w.read(1) # Test combined gradients + aggregation of read(0). return gradients_impl.gradients( ys=[r0, r0_1, r1], xs=[value], grad_ys=[[2.0, 3.0], [-1.5, 1.5], [4.0, 5.0]]) grad_vals = self.evaluate(xla.compile(fn)) self.assertEqual(len(grad_vals), 1) self.assertAllEqual([[2.0 - 1.5, 3.0 + 1.5], [4.0, 5.0]], grad_vals[0])
def testTensorArrayGradientSplitConcat(self): with self.cached_session() as session, self.test_scope(): def fn(): ta = tensor_array_ops.TensorArray( dtype=dtypes.float32, tensor_array_name="foo", size=2) value = constant_op.constant([[1.0, -1.0], [10.0, -10.0], [100.0, -100.0], [1000.0, -1000.0]]) w = ta.split(value, [2, 2]) r = w.concat() # Test combined gradients return gradients_impl.gradients( ys=[r], xs=[value], grad_ys=[[[2.0, -2.0], [20.0, -20.0], [200.0, -200.0], [2000.0, -2000.0]]]) grad_vals = self.evaluate(xla.compile(fn)) self.assertEqual(len(grad_vals), 1) self.assertAllEqual([[2.0, -2.0], [20.0, -20.0], [200.0, -200.0], [2000.0, -2000.0]], grad_vals[0])
def testTensorArrayScatterReadAndGradients(self): with self.cached_session() as session, self.test_scope(): id0 = array_ops.placeholder(dtypes.int32) id1 = array_ops.placeholder(dtypes.int32) def fn(): ta = tensor_array_ops.TensorArray( dtype=dtypes.float32, tensor_array_name="foo", size=10) indices = constant_op.constant([1, 8]) value = constant_op.constant([[1.0, -1.0], [10.0, -10.0]]) w = ta.scatter(indices, value) r0 = w.read(id0) r1 = w.read(id1) # Test combined gradients + aggregation of read(0). grad = gradients_impl.gradients( ys=[r0, r1], xs=[value], grad_ys=[[2.0, 3.0], [4.0, 5.0]]) return [[r0, r1], grad] read_vals, grad_vals = session.run( xla.compile(fn), feed_dict={ id0: 1, id1: 8 }) self.assertEqual(len(read_vals), 2) self.assertEqual(len(grad_vals), 1) self.assertAllEqual([1.0, -1.0], read_vals[0]) self.assertAllEqual([10.0, -10.0], read_vals[1]) self.assertAllEqual([[2.0, 3.0], [4.0, 5.0]], grad_vals[0])
def test_xla_compile_eager(self): """Tests that xla.compile raises proper exception when used eagerly.""" def computation(a, b): return a + b self.assertEqual(self.evaluate(xla.compile(computation, [1, 2])[0]), 3)
def testTensorArrayWriteGatherAndGradients(self): with self.cached_session() as session, self.test_scope(): def fn(): ta = tensor_array_ops.TensorArray( dtype=dtypes.float32, tensor_array_name="foo", size=10) values = constant_op.constant([[1.0 * x, -1.0 * x] for x in range(10)]) indices = constant_op.constant([1, 8]) w = ta.unstack(values) g = w.gather(indices) # Test combined gradients + aggregation of read(0). grad = gradients_impl.gradients( ys=[g], xs=[values], grad_ys=[[[2.0, 3.0], [4.0, 5.0]]]) return [[g], grad] g_vals, grad_vals = self.evaluate(xla.compile(fn)) # Gradients for 8 of the 10 unread components are zero. expected_grad = np.zeros((10, 2)) expected_grad[1] = [2.0, 3.0] expected_grad[8] = [4.0, 5.0] self.assertEqual(len(g_vals), 1) self.assertEqual(len(grad_vals), 1) self.assertAllEqual([[1.0, -1.0], [8.0, -8.0]], g_vals[0]) self.assertAllEqual(expected_grad, grad_vals[0])
def testTensorGradAccessTwiceReceiveSameObject(self): with self.cached_session() as session, self.test_scope(): ta_out = {} def fn(): ta = tensor_array_ops.TensorArray( dtype=dtypes.float32, tensor_array_name="foo", size=3, element_shape=[1, 2]) g_ta_0 = ta.grad("grad") g_ta_1 = ta.grad("grad") ta_out[0] = g_ta_0.handle ta_out[1] = g_ta_1.handle with ops.control_dependencies([g_ta_0.write(0, [[4.0, 5.0]]).flow]): # Write with one gradient handle, read with another copy of it r1_0 = g_ta_1.read(0) with ops.control_dependencies([g_ta_0.handle.op, g_ta_1.handle.op]): return [r1_0] [d_r1_0] = self.evaluate(xla.compile(fn)) self.assertAllEqual([[4.0, 5.0]], d_r1_0)
def _testTensorArrayGradientWritePackConcatAndRead(self): with self.cached_session() as sess, self.test_scope(): def fn(): ta = tensor_array_ops.TensorArray( dtype=dtypes.float32, tensor_array_name="foo", size=2, clear_after_read=False) value_0 = constant_op.constant([-1.0, 1.0]) value_1 = constant_op.constant([-10.0, 10.0]) w0 = ta.write(0, value_0) w1 = w0.write(1, value_1) p0 = w1.stack() r0 = w1.read(0) s0 = w1.concat() # Test gradient accumulation between read(0), pack(), and concat(). with ops.control_dependencies([p0, r0, s0]): return gradients_impl.gradients( ys=[p0, r0, s0], xs=[value_0, value_1], grad_ys=[ [[2.0, 3.0], [4.0, 5.0]], # stack gradient [-0.5, 1.5], # read(0) gradient [20.0, 30.0, 40.0, 50.0], # concat gradient ]) grad_vals = self.evaluate(xla.compile(fn)) # 2 + 2 entries self.assertAllClose([2.0 - 0.5 + 20.0, 3.0 + 1.5 + 30.0], grad_vals[0]) self.assertAllEqual([4.0 + 40.0, 5.0 + 50.0], grad_vals[1])
def func_wrapper(): def compute(): a.assign_add(1) a.assign_sub(2) return a.read_value() return xla.compile(compute)
def testSizeTensorArray(self): with self.cached_session(), self.test_scope(): def fn(): ta = tensor_array_ops.TensorArray( dtype=dtypes.float32, tensor_array_name="foo", size=3) return ta.size() self.assertAllEqual(3, self.evaluate(xla.compile(fn))[0])
def testTensorArrayWriteWrongIndexOrDataTypeFails(self): with self.cached_session(), self.test_scope(): def fn(): ta = tensor_array_ops.TensorArray( dtype=dtypes.float32, tensor_array_name="foo", size=3) return ta.write(-1, constant_op.constant(7)).flow # Test writing the wrong datatype. # TODO(b/129870929): Remove InvalidArgumentError/second regexp after all # callers provide proper init dtype. with self.assertRaisesRegexp( (ValueError, errors.InvalidArgumentError), r"(" r"conversion requested dtype float32 for Tensor with dtype int32" r"|" r"TensorArray dtype is float but op has dtype int32" r")"): xla.compile(fn)[0].eval()
def testCloseTensorArray(self): with self.cached_session() as session, self.test_scope(): def fn(): ta = tensor_array_ops.TensorArray( dtype=dtypes.float32, tensor_array_name="foo", size=3) with ops.control_dependencies([ta.close()]): return 1.0 self.evaluate(xla.compile(fn)[0])
def testTensorArraySplitIncompatibleShapesFails(self): with self.cached_session(), self.test_scope(): def fn(): ta = tensor_array_ops.TensorArray( dtype=dtypes.float32, tensor_array_name="foo", size=3, infer_shape=False) return ta.split([1.0, 2.0, 3.0], 1).flow with self.assertRaisesWithPredicateMatch( ValueError, r"Shape must be rank 1 but is rank 0"): xla.compile(fn)[0].eval() def fn(): ta = tensor_array_ops.TensorArray( dtype=dtypes.float32, tensor_array_name="foo", size=3, infer_shape=False) return ta.split([1.0, 2.0, 3.0], [1, 2, 3]).flow with self.assertRaisesOpError( r"lengths must be equal: 1 vs. 2"): xla.compile(fn)[0].eval() def fn(): ta = tensor_array_ops.TensorArray( dtype=dtypes.float32, tensor_array_name="foo", size=3, infer_shape=False) return ta.split(1.0, [1]).flow with self.assertRaisesOpError( r"value must have rank >= 1"): xla.compile(fn)[0].eval() def fn(): ta = tensor_array_ops.TensorArray( dtype=dtypes.float32, tensor_array_name="foo", size=2, infer_shape=False) return ta.split([1.0], [1]).flow with self.assertRaisesOpError( r"TensorArray's size is not equal to the size of lengths " r"\(1 vs. 2\)"): xla.compile(fn)[0].eval()
def testWriteShape(self): with self.cached_session(), self.test_scope(): def fn(): ta = tensor_array_ops.TensorArray( dtype=dtypes.float32, tensor_array_name="foo", size=3) c0 = constant_op.constant([4.0, 5.0]) w0 = ta.write(0, c0) r0 = w0.read(0) return [c0, r0] c0, r0 = xla.compile(fn) self.assertAllEqual(c0.get_shape(), r0.get_shape()) def fn(): ta = tensor_array_ops.TensorArray( dtype=dtypes.float32, tensor_array_name="foo", size=3) c1 = constant_op.constant([6.0, 7.0]) w0 = ta.write(0, c0) w1 = w0.write(1, c1) r0 = w1.read(0) r1 = w1.read(1) return [r0, c1, r1] [r0, c1, r1] = xla.compile(fn) self.assertAllEqual(c0.get_shape(), r0.get_shape()) self.assertAllEqual(c1.get_shape(), r1.get_shape()) def fn(): ta = tensor_array_ops.TensorArray( dtype=dtypes.float32, tensor_array_name="foo", size=3) w0 = ta.write(0, c0) c2 = constant_op.constant([4.0, 5.0, 6.0]) return w0.write(0, c2).flow with self.assertRaises(ValueError): self.evaluate(xla.compile(fn))
def _testGradientWhenNotAllComponentsRead(self): with self.cached_session() as session, self.test_scope(): def fn(): ta = tensor_array_ops.TensorArray(dtype=dtypes.float32, size=2) x = constant_op.constant([2.0, 3.0]) w = ta.unstack(x) r0 = w.read(0) # Calculate (dr0/dx0, dr0/dx1). since r0 = x0, gradients are (1, 0). return gradients_impl.gradients(ys=[r0], xs=[x], grad_ys=[1.0]) grad_r0_vals = self.evaluate(xla.compile(fn))[0] self.assertAllEqual(grad_r0_vals, [1.0, 0.0])
def _testTensorArrayWriteGradientAddMultipleAdds(self, dtype): with self.cached_session(), self.test_scope(): c = lambda x: np.asarray(x, dtype=dtype.as_numpy_dtype) def fn(): ta = tensor_array_ops.TensorArray( dtype=dtype, tensor_array_name="foo", size=3, infer_shape=False) w0 = ta.write(2, c(3.0)) w1 = w0.write(2, c(4.0)) ta_grad = w1.grad("grad") w0_grad = ta_grad.write(2, c(3.0)) w1_grad = w0_grad.write(2, c(4.0)) w2_grad = w1_grad.write(2, c(5.0)) return w2_grad.read(2) # Assert that aggregation works correctly self.assertAllEqual(c(12.00), xla.compile(fn)[0].eval()) def fn(): ta = tensor_array_ops.TensorArray( dtype=dtype, tensor_array_name="foo", size=3, infer_shape=False) w0 = ta.write(2, c(3.0)) w1 = w0.write(2, c(4.0)) ta_grad = w1.grad("grad") # Using differing shapes causes an exception wb0_grad = ta_grad.write(1, c(1.0)) wb1_grad = wb0_grad.write(1, c([1.0])) return wb1_grad.flow with self.assertRaisesOpError( r"Mismatched TensorArray sizes"): xla.compile(fn)[0].eval()
def testTensorArrayIdentity(self): with self.cached_session() as session, self.test_scope(): tensor_arrays = {} v0 = resource_variable_ops.ResourceVariable(0.0) v1 = resource_variable_ops.ResourceVariable(0.0) def fn(): ta0 = tensor_array_ops.TensorArray( dtype=dtypes.float32, size=2, infer_shape=False) ta1 = tensor_array_ops.TensorArray( dtype=dtypes.int32, size=4, infer_shape=True) ta0 = ta0.write(0, 0.) ta1 = ta1.write(0, 1) with ops.control_dependencies([v0.assign_add(1.0)]): ta0 = ta0.identity() with ops.control_dependencies([v1.assign_add(1.0)]): ta1 = ta1.identity() read0 = ta0.read(0) read1 = ta1.read(0) size0 = ta0.size() size1 = ta1.size() tensor_arrays[0] = ta0 tensor_arrays[1] = ta1 return [read0, read1, size0, size1, v0, v1] variables.global_variables_initializer().run() read0_v, read1_v, size0_v, size1_v, v0, v1 = self.evaluate( xla.compile(fn)) # Tests correct properties on new TensorArrays. self.assertEqual(dtypes.float32, tensor_arrays[0].dtype) self.assertEqual(dtypes.int32, tensor_arrays[1].dtype) # Tests that the control dependencies was added and executed. self.assertEqual(1.0, v0) self.assertEqual(1.0, v1) # Tests correct TensorArray. self.assertEqual(read0_v, 0) self.assertEqual(read1_v, 1) self.assertEqual(size0_v, 2) self.assertEqual(size1_v, 4)
def _testTensorArrayGradientWriteReadType(self, dtype): with self.cached_session() as session, self.test_scope(): c = lambda x: np.array(x, dtype=dtype) def fn(): ta = tensor_array_ops.TensorArray( dtype=dtypes.as_dtype(dtype), tensor_array_name="foo", size=3, infer_shape=False) value_0 = constant_op.constant(c([[4.0, 5.0]])) value_1 = constant_op.constant(c([[3.0, 3.5]])) w0 = ta.write(0, value_0) w1 = w0.write(1, value_1) r0 = w1.read(0) r1 = w1.read(1) r0_2 = w1.read(0) # Test individual components' gradients grad_just_r0 = gradients_impl.gradients( ys=[r0], xs=[value_0], grad_ys=[c([[2.0, 3.0]])]) grad_r0_r0_2 = gradients_impl.gradients( ys=[r0, r0_2], xs=[value_0], grad_ys=[c([[2.0, 3.0]]), c([[1.0, -1.0]])]) grad_just_r1 = gradients_impl.gradients( ys=[r1], xs=[value_1], grad_ys=[c([[-2.0, -4.0]])]) # Test combined gradients grad = gradients_impl.gradients( ys=[r0, r0_2, r1], xs=[value_0, value_1], grad_ys=[c([[2.0, 3.0]]), c([[1.0, -1.0]]), c([[-2.0, -10.0]])]) return [grad_just_r0, grad_r0_r0_2, grad_just_r1, grad] [grad_just_r0_vals, grad_r0_r0_2_vals, grad_just_r1_vals, grad_vals] = self.evaluate(xla.compile(fn)) self.assertAllEqual(c([[2.0, 3.0]]), grad_just_r0_vals[0]) self.assertAllEqual(c([[3.0, 2.0]]), grad_r0_r0_2_vals[0]) self.assertAllEqual(c([[-2.0, -4.0]]), grad_just_r1_vals[0]) self.assertEqual(len(grad_vals), 2) self.assertAllEqual(c([[3.0, 2.0]]), grad_vals[0]) self.assertAllEqual(c([[-2.0, -10.0]]), grad_vals[1])
def testEmptyTensorArrayPack(self): with self.cached_session(), self.test_scope(): def fn(): ta = tensor_array_ops.TensorArray( dtype=dtypes.float32, tensor_array_name="foo", size=3) empty_element = np.zeros((0, 1), dtype=np.float32) w0 = ta.write(0, empty_element) w1 = w0.write(1, empty_element) w2 = w1.write(2, empty_element) return w2.stack() self.assertAllEqual([3, 0, 1], self.evaluate(xla.compile(fn)[0]).shape)
def testWriteCloseTensorArray(self): with self.cached_session(), self.test_scope(): def fn(): ta = tensor_array_ops.TensorArray( dtype=dtypes.float32, tensor_array_name="foo", size=3, infer_shape=False) w0 = ta.write(0, [[4.0, 5.0]]) w1 = w0.write(1, [[3.0, 1.0]]) with ops.control_dependencies([w1.close()]): return 1.0 self.evaluate(xla.compile(fn))
def testMultiTensorArray(self): with self.cached_session(), self.test_scope(): def fn(): h1 = tensor_array_ops.TensorArray( size=1, dtype=dtypes.float32, tensor_array_name="foo") w1 = h1.write(0, 4.0) r1 = w1.read(0) h2 = tensor_array_ops.TensorArray( size=1, dtype=dtypes.float32, tensor_array_name="bar") w2 = h2.write(0, 5.0) r2 = w2.read(0) return r1 + r2 self.assertAllClose(9.0, self.evaluate(xla.compile(fn)[0]))
def _testTensorArrayWritePack(self, tf_dtype): with self.cached_session(), self.test_scope(): convert = _make_converter(tf_dtype) def fn(): ta = tensor_array_ops.TensorArray( dtype=tf_dtype, tensor_array_name="foo", size=3) w0 = ta.write(0, convert([[4.0, 5.0]])) w1 = w0.write(1, convert([[6.0, 7.0]])) w2 = w1.write(2, convert([[8.0, 9.0]])) return w2.stack() self.assertAllEqual( convert([[[4.0, 5.0]], [[6.0, 7.0]], [[8.0, 9.0]]]), self.evaluate(xla.compile(fn)[0]))
def _testTensorArrayWriteConcat(self, tf_dtype): with self.session(), self.test_scope(): convert = _make_converter(tf_dtype) def fn(): ta = tensor_array_ops.TensorArray(dtype=tf_dtype, tensor_array_name="foo", size=3) w0 = ta.write(0, convert([[4.0, 5.0], [104.0, 105.0]])) w1 = w0.write(1, convert([[6.0, 7.0], [106.0, 107.0]])) w2 = w1.write(2, convert([[8.0, 9.0], [204.0, 205.0]])) return w2.concat() self.assertAllEqual( convert([[4.0, 5.0], [104.0, 105.0], [6.0, 7.0], [106.0, 107.0], [8.0, 9.0], [204.0, 205.0]]), self.evaluate(xla.compile(fn)[0]))
def testTensorArrayReadTwice(self): with self.session(), self.test_scope(): def fn(): value = constant_op.constant([[1.0, -1.0], [10.0, -10.0]]) ta_readtwice = tensor_array_ops.TensorArray( dtype=dtypes.float32, tensor_array_name="foo", size=2, clear_after_read=False) w_readtwice = ta_readtwice.unstack(value) r0_readtwice = w_readtwice.read(0) with ops.control_dependencies([r0_readtwice]): r1_readtwice = w_readtwice.read(0) return [r0_readtwice, r1_readtwice] self.assertAllEqual([1.0, -1.0], self.evaluate(xla.compile(fn))[0])
def testMultiTensorArray(self): with self.session(), self.test_scope(): def fn(): h1 = tensor_array_ops.TensorArray(size=1, dtype=dtypes.float32, tensor_array_name="foo") w1 = h1.write(0, 4.0) r1 = w1.read(0) h2 = tensor_array_ops.TensorArray(size=1, dtype=dtypes.float32, tensor_array_name="bar") w2 = h2.write(0, 5.0) r2 = w2.read(0) return r1 + r2 self.assertAllClose(9.0, self.evaluate(xla.compile(fn)[0]))
def testCondAndTensorArray_xlaCompile(self): self.skipTest("b/127846988") # Fails with "Uninitialized arguments" in XlaIfOp::Compile with self.session(), self.test_scope(): xla_context = control_flow_ops.XLAControlFlowContext() xla_context.Enter() def f(): ta = tensor_array_ops.TensorArray(dtype=dtypes.float32, size=1) output = control_flow_ops.cond(constant_op.constant(True), lambda: ta.write(0, 5.), lambda: ta.write(0, 10.)) return output.stack() output_t, = xla.compile(f) self.assertAllEqual([5.], self.evaluate(output_t)) xla_context.Exit()
def _test_loop_fn(self, loop_fn, iters, force_xla=False): def f(): return pfor_control_flow_ops.pfor(loop_fn, iters) @def_function.function def jit_f(): with jit.experimental_jit_scope(): return f() out = f() jit_out = jit_f() self.run_and_assert_equal(out, jit_out) # TODO(agarwal): The following may complain about uncompilable nodes. Hence # these are currently not enabled for all tests. if force_xla: out_exp_compile_f = def_function.function(jit_compile=True)(f)() self.run_and_assert_equal(out, out_exp_compile_f) out_xla_compile_f = xla.compile(f, inputs=[]) self.run_and_assert_equal(out, out_xla_compile_f)
def testTensorGradArrayDynamicWriteRead(self): with self.session() as session, self.test_scope(): def fn(): ta = tensor_array_ops.TensorArray(dtype=dtypes.float32, tensor_array_name="foo", size=3) w0 = ta.write(0, [[4.0]]) w1 = w0.write(1, [[1.0]]) w2 = w1.write(2, [[-3.0]]) g_ta = w2.grad( "grad") # Get gradient array here so we know the shape s = w2.size() g_s = g_ta.size() g_w0 = g_ta.write(0, [[5.0]]) g_w1 = g_w0.write(1, [[2.0]]) g_w2 = g_w1.write(2, [[-2.0]]) r0 = w2.read(0) r1 = w2.read(1) r2 = w2.read(2) g_r0 = g_w2.read(0) g_r1 = g_w2.read(1) g_r2 = g_w2.read(2) return [r0, r1, r2, g_r0, g_r1, g_r2, s, g_s] d0, d1, d2, g_d0, g_d1, g_d2, vs, g_vs = self.evaluate( xla.compile(fn)) self.assertAllEqual([[4.0]], d0) self.assertAllEqual([[1.0]], d1) self.assertAllEqual([[-3.0]], d2) self.assertAllEqual([[5.0]], g_d0) self.assertAllEqual([[2.0]], g_d1) self.assertAllEqual([[-2.0]], g_d2) self.assertAllEqual(3, vs) self.assertAllEqual(3, g_vs)
def testMultiStack(self): with self.session(), self.test_scope(): v = array_ops.placeholder(dtypes.float32) def fn(): h1 = gen_data_flow_ops.stack_v2(5, dtypes.float32, stack_name="foo") c1 = gen_data_flow_ops.stack_push_v2(h1, v) with ops.control_dependencies([c1]): c1 = gen_data_flow_ops.stack_pop_v2(h1, dtypes.float32) h2 = gen_data_flow_ops.stack_v2(5, dtypes.float32, stack_name="bar") c2 = gen_data_flow_ops.stack_push_v2(h2, 5.0) with ops.control_dependencies([c2]): c2 = gen_data_flow_ops.stack_pop_v2(h2, dtypes.float32) return c1 + c2 self.assertAllClose(9.0, xla.compile(fn)[0].eval({v: 4.0}))
def test_xla_while_loop(self): def compute(x): return math_ops.reduce_mean(x, axis=0, keepdims=True) def vectorized_compute(x, i): inp = array_ops.gather(x, i) output = pfor_control_flow_ops.vectorized_map(compute, inp) output.set_shape([5, 1]) return output def while_compute(x): return control_flow_ops.while_loop_v2( lambda i, _: i < 10, lambda i, y: (i + 1, y + vectorized_compute(x, i)), (0, array_ops.zeros([5, 1])))[1] result = xla.compile(while_compute, inputs=[array_ops.ones((10, 5, 3))]) expected = array_ops.ones([5, 1]) * 10 self.run_and_assert_equal(expected, result)
def testSwitchCaseAndTensorArray_xlaCompile(self): self.skipTest("b/127846988") with self.session(), self.test_scope(): xla_context = control_flow_ops.XLAControlFlowContext() xla_context.Enter() def f(): ta = tensor_array_ops.TensorArray(dtype=dtypes.float32, size=1) output = control_flow_ops.switch_case( constant_op.constant(1), { 0: lambda: ta.write(0, 5.), 1: lambda: ta.write(0, 10.), 2: lambda: ta.write(0, 15.), }) return output.stack() output_t, = xla.compile(f) self.assertAllEqual([10.], self.evaluate(output_t)) xla_context.Exit()
def testInplaceTuple(self): with self.session() as sess: def my_net(x): def cond(i, x, y): del x del y return i < 1 def body(i, x, y): i = i + 1 x = nn.tanh(x) y = nn.tanh(y) return (i, x, y) i = 0 return control_flow_ops.while_loop(cond, body, (i, x, x), name='')[1:] with ops.device('cpu'): x = array_ops.placeholder(np.float32, [4]) report = tu.ReportJSON(self, sess) with ops.device("/device:IPU:0"): r = xla.compile(my_net, inputs=[x]) report.reset() x, y = sess.run(r, {x: np.full([4], 2)}) self.assertAllClose(x, np.full([4], np.tanh(2))) self.assertAllClose(y, np.full([4], np.tanh(2))) report.parse_log(assert_len=4) ok = [ '__seed*', 'Copy_*_to_*', 'Tanh/tanh*/Op/Tanh', 'Tanh_1/tanh*/Op/Tanh' ] report.assert_all_compute_sets_and_list(ok)
def testInplaceTuple(self): def my_net(x): def cond(i, x, y): return i < 1 def body(i, x, y): i = i + 1 x = nn.tanh(x) y = nn.tanh(y) return (i, x, y) i = 0 return control_flow_ops.while_loop(cond, body, (i, x, x))[1:] with ops.device('cpu'): x = array_ops.placeholder(np.float32, [4]) report = gen_ipu_ops.ipu_event_trace() tu.configure_ipu_system() with ops.device("/device:IPU:0"): r = xla.compile(my_net, inputs=[x]) with tu.ipu_session() as sess: sess.run(report) x, y = sess.run(r, {x: np.full([4], 2)}) self.assertAllClose(x, np.full([4], np.tanh(2))) self.assertAllClose(y, np.full([4], np.tanh(2))) result = sess.run(report) self.assertTrue(len(result) == 3) s = tu.extract_all_strings_from_event_trace(result) cs_list = tu.get_compute_sets_from_report(s) ok = [ '__seed*', 'Copy_*_to_*', 'while/Tanh/tanh*/Op/Tanh', 'while/Tanh_1/tanh*/Op/Tanh' ] self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
def testTensorArrayWriteRead(self): with self.session() as session, self.test_scope(): def fn(): ta = tensor_array_ops.TensorArray( dtype=dtypes.float32, tensor_array_name="foo", size=3) w0 = ta.write(0, [[4.0, 5.0]]) w1 = w0.write(1, [[1.0, 3.0]]) w2 = w1.write(2, [[7.0, -8.5]]) r0 = w2.read(0) r1 = w2.read(1) r2 = w2.read(2) flow = w2.flow return [r0, r1, r2, flow] d0, d1, d2, flow_val = self.evaluate(xla.compile(fn)) self.assertAllEqual([[4.0, 5.0]], d0) self.assertAllEqual([[1.0, 3.0]], d1) self.assertAllEqual([[7.0, -8.5]], d2) self.assertAllEqual([], flow_val.shape)
def testSameNameStacks(self): """Different stacks with the same name do not interfere.""" with self.session() as sess, self.test_scope(): v1 = array_ops.placeholder(dtypes.float32) v2 = array_ops.placeholder(dtypes.float32) def fn(): h1 = gen_data_flow_ops.stack_v2(5, dtypes.float32, stack_name="foo") h2 = gen_data_flow_ops.stack_v2(5, dtypes.float32, stack_name="foo") c1 = gen_data_flow_ops.stack_push_v2(h1, v1) with ops.control_dependencies([c1]): c2 = gen_data_flow_ops.stack_push_v2(h2, v2) with ops.control_dependencies([c2]): pop1 = gen_data_flow_ops.stack_pop_v2(h1, dtypes.float32) pop2 = gen_data_flow_ops.stack_pop_v2(h2, dtypes.float32) return [pop1, pop2] [pop1_compiled, pop2_compiled] = xla.compile(fn) out1, out2 = sess.run([pop1_compiled, pop2_compiled], {v1: 4.0, v2: 5.0}) self.assertAllClose(out1, 4.0) self.assertAllClose(out2, 5.0)
def _testTensorArrayScatterRead(self, tf_dtype): with self.session() as session, self.test_scope(): convert = _make_converter(tf_dtype) id0 = array_ops.placeholder(dtypes.int32) id1 = array_ops.placeholder(dtypes.int32) def fn(): ta = tensor_array_ops.TensorArray( dtype=tf_dtype, tensor_array_name="foo", size=10) indices = constant_op.constant([1, 8]) value = constant_op.constant(convert([[1.0, -1.0], [10.0, -10.0]])) w = ta.scatter(indices, value) r0 = w.read(id0) r1 = w.read(id1) return [r0, r1] # Test aggregation of read read_vals = session.run(xla.compile(fn), feed_dict={id0: 1, id1: 8}) self.assertAllEqual(convert([1.0, -1.0]), read_vals[0]) self.assertAllEqual(convert([10.0, -10.0]), read_vals[1])
def testCondConstPropagation_errorMsg_xlaCompile(self): with self.session() as sess, self.test_scope(): xla_context = control_flow_ops.XLAControlFlowContext() xla_context.Enter() x = array_ops.placeholder(dtypes.float32) p = random_ops.random_uniform([], minval=1, maxval=3, dtype=dtypes.int32) condition = math_ops.cast( random_ops.random_uniform([], minval=0, maxval=2, dtype=dtypes.int32), dtypes.bool) def f(): # TODO(b/129021699): Wrapping this in a tf.function does not work. def if_true(): # This emits a StridedSlice op which expects the index to be a # compile-time const. return x[:p] def if_false(): return array_ops.fill([p], 5.) return control_flow_ops.cond(condition, if_true, if_false) output = xla.compile(f) with self.assertRaisesRegex(errors.InvalidArgumentError, "must be a compile-time constant"): sess.run(output, feed_dict={ x: [0., 1., 2.], }) xla_context.Exit()
def testLoopRepeatCountDoesntDivide(self): with self.session() as sess: dtype = np.float32 def my_net(y): def cond(i, x, y): del x del y return i < 10 def body(i, x, y): x = x + gen_poputil_ops.ipu_stateful_gradient_accumulate( array_ops.ones_like(x), num_mini_batches=4, verify_usage=False) y = y + array_ops.ones_like(x) i = i + 1 return (i, x, y) i = 0 return control_flow_ops.while_loop(cond, body, (i, y, y)) with ops.device('cpu'): y = array_ops.placeholder(dtype, [1]) opts = utils.create_ipu_config() utils.configure_ipu_system(opts) with ops.device("/device:IPU:0"): r = xla.compile(my_net, inputs=[y]) with self.assertRaisesRegex( errors.FailedPreconditionError, "Detected a gradient accumulation operation with 4 number of mini " "batches inside a loop with 10 iterations."): sess.run(r, {y: [10]})
def testSumOfTwoReadVariablesWithoutRepeatGrad(self): with self.session() as session, self.test_scope(): g0 = -(np.arange(3 * 5, dtype=np.float32).reshape(3, 5) + 1) def fn(): a = array_ops.identity( np.arange(3 * 5, dtype=np.float32).reshape(3, 5) + 1) b = array_ops.identity( np.arange(3 * 5, dtype=np.float32).reshape(3, 5) + 1 + 3 * 5) ta = tensor_array_ops.TensorArray(dtype=dtypes.float32, size=2) ta = ta.write(0, a, name="write_a") ta = ta.write(1, b, name="write_b") c = ( ta.read(0, name="read_a_0") + # a + b ta.read(1, name="read_b_0")) grad_a = gradients_impl.gradients([c], [a], [g0])[0] # d(a+b)/da = 1 grad_b = gradients_impl.gradients([c], [b], [g0])[0] # d(a+b)/db = 1 return [grad_a, grad_b] grad_a, grad_b = xla.compile(fn) # Test gradients calculated individually grad_a_t, = self.evaluate([grad_a]) self.assertAllEqual(grad_a_t, g0) grad_b_t, = self.evaluate([grad_b]) self.assertAllEqual(grad_b_t, g0) # Test gradients calculated jointly. joint_grad_a_t, joint_grad_b_t = self.evaluate([grad_a, grad_b]) self.assertAllEqual(joint_grad_a_t, g0) self.assertAllEqual(joint_grad_b_t, g0)
def testUnsortedSegmentSumVariableLR(self): with self.session() as sess: def network(x, y1, y2, lr): del x with variable_scope.variable_scope("vs", use_resource=True): w1 = variable_scope.get_variable( "w1", shape=[10, 200], dtype=np.float32, initializer=init_ops.constant_initializer(1)) g1 = array_ops.gather(w1, y1) g2 = array_ops.gather(w1, y2) a = math_ops.reduce_sum(g1 + g2) optimizer = gradient_descent.GradientDescentOptimizer(lr) grads = [a] grads = [ gradients_impl.gradients(g, variables.trainable_variables())[0] for g in grads ] grads = [array_ops.expand_dims(g, 0) for g in grads] grad = array_ops.concat(grads, axis=0) grad = math_ops.reduce_mean(grad, 0) train = optimizer.apply_gradients([(grad, w1)]) return a, train with ops.device('cpu'): x = array_ops.placeholder(np.float32, shape=[10, 200]) y1 = array_ops.placeholder(np.int32, shape=[10]) y2 = array_ops.placeholder(np.int32, shape=[10]) lr = array_ops.placeholder(np.float32, shape=[]) report = tu.ReportJSON(self, sess) with ops.device("/device:IPU:0"): r = xla.compile(network, inputs=[x, y1, y2, lr]) sess.run(variables.global_variables_initializer()) report.reset() out = sess.run( r, { x: np.ones(x.shape), y1: np.ones(y1.shape), y2: np.ones(y2.shape), lr: 0.1, }) self.assertAllClose(out, [-4000.0]) report.parse_log() ok = [ '__seed*', 'host-exchange-local-copy-*/OnTileCopy-0', '/negate/Op/Negate', 'ExpandDims/input/multi-update-add.3/multiUpdateAdd', 'Copy_*/OnTileCopy-0', 'vs/Gather*/gather.*/multiSlice', 'vs/add/add*/Add', 'vs/Sum/reduce*/Reduce', ] report.assert_all_compute_sets_and_list(ok)
def testScatterMultipleLookupsWithReshape(self): with self.session() as sess: def network(x, y1, y2, la, lr): del x with variable_scope.variable_scope("vs", use_resource=True): w = variable_scope.get_variable( "w", shape=[200, 10], dtype=np.float32, initializer=init_ops.constant_initializer(2.)) y = array_ops.reshape(w, [10, 200]) g1 = nn.embedding_lookup(y, y1) g2 = nn.embedding_lookup(y, y2) g = array_ops.concat([g1, g2], axis=1) ce = losses.absolute_difference(labels=la, predictions=g) loss = math_ops.reduce_mean(ce) optimizer = gradient_descent.GradientDescentOptimizer(lr) train = optimizer.minimize(loss) return loss, train with ops.device('cpu'): x = array_ops.placeholder(np.float32, shape=[1, 100, 100, 2]) y1 = array_ops.placeholder(np.int32, shape=[10]) y2 = array_ops.placeholder(np.int32, shape=[10]) la = array_ops.placeholder(np.float32, shape=[10, 400]) lr = array_ops.placeholder(np.float32, shape=[]) report = tu.ReportJSON(self, sess) with ops.device("/device:IPU:0"): r = xla.compile(network, inputs=[x, y1, y2, la, lr]) sess.run(variables.global_variables_initializer()) report.reset() out = sess.run( r, { x: np.ones(x.shape), y1: np.ones(y1.shape), y2: np.ones(y2.shape), la: np.ones(la.shape), lr: 0.1, }) self.assertAllClose(out, [1.0]) report.parse_log() # pylint: disable=line-too-long ok = [ '__seed*', 'host-exchange-local-copy-*/OnTileCopy-0', 'gradients/vs/absolute_difference/Abs_grad/Sign', 'gradients/vs/absolute_difference/Abs_grad/mul/fusion', '/negate/Op/Negate', 'gradients/vs/Reshape_grad/Reshape/tensor/multi-update-add*/multiUpdateAdd', 'vs/embedding_lookup/gather.*/multiSlice', 'vs/embedding_lookup_1/gather.*/multiSlice', 'vs/absolute_difference/Sub/subtract.*/Subtract', 'vs/absolute_difference/Abs/abs.*/Op/Absolute', 'vs/absolute_difference/Sum/reduce', 'vs/absolute_difference/value/multiply', ] # pylint: enable=line-too-long report.assert_all_compute_sets_and_list(ok)
def f(x): return xla.compile( lambda x: stateless.stateless_random_normal([], seed=x), [x])
def compile(computation, inputs=None): """Builds an operator that compiles and runs `computation` with the Graphcore IPU XLA backend. Args: computation: A Python function that builds a computation to apply to the input. If the function takes n inputs, `inputs` should be a list of n tensors. `computation` may return a list of operations and tensors. Tensors must come before operations in the returned list. The return value of `compile` is a list of tensors corresponding to the tensors from the output of `computation`. All operations returned from `computation` will be executed when evaluating any of the returned output tensors. inputs: A list of inputs or `None` (equivalent to an empty list). Each input can be a nested structure containing values that are convertible to tensors. Note that passing an N-dimension list of compatible values will result in a N-dimension list of scalar tensors rather than a single Rank-N tensors. If you need different behaviour, convert part of inputs to tensors with `tf.convert_to_tensor`. Returns: Same data structure as if `computation(inputs)` is called directly with some exceptions for correctness. 1. None output. a NoOp would be returned which control-depends on computation. 2. Single value output. A tuple containing the value would be returned. 3. Operation-only outputs. a NoOp would be returned which control-depends on computation. Raises: Exception: If the computation was not compiled for an IPU device. """ old_op_list = ops.get_default_graph().get_operations() try: with ipu_scope.ipu_jit_scope(0): result = xla.compile(computation, inputs) new_op_list = ops.get_default_graph().get_operations() added_ops = set(old_op_list) ^ set(new_op_list) # Go over all the new added ops, check that they have been placed on an IPU # device. placed_on_ipu = False all_no_ops = True for o in added_ops: device_spec = tf_device.DeviceSpec.from_string(o.device) if device_spec.device_type == 'IPU': placed_on_ipu = True break elif o.type != 'NoOp': all_no_ops = False if not placed_on_ipu and not all_no_ops: raise Exception("""\ A computation has been compiled, however it was not placed on an IPU device. \ This computation will not be executed on an IPU. To execute it on an IPU use the `ipu_scope` from `tensorflow.python.ipu.scopes`, \ for example: with ipu_scope('/device:IPU:0'): result = ipu_compiler.compile(comp, inputs) """) return result except Exception as e: is_estimator = False try: # Retrieve the outputs of the computation from the trace outputs = inspect.trace()[-1][0].f_locals['outputs'] is_estimator = _is_estimatorspec(outputs) except: raise e from None if is_estimator: raise ValueError("""\ Your computation output contains an EstimatorSpec or IPUEstimatorSpec object. When you use an IPUEstimator, it already handles all the xla compilation and no manual call to compile() is needed. """) raise e
def func_wrapper(a): def compute(a): return a + 1 return xla.compile(compute, [a])