def testAutoclusteringWithTfFunction(self): if 'tpu' in self.device.lower(): self.skipTest('Autoclustering does not run on TPU') with ops.device('device:{}:0'.format(self.device)): @def_function.function(experimental_compile=False) def outer(a, b, c): return a * inner(b, c) + c @def_function.function(experimental_compile=True) def inner(b, c): return b + c * b i1 = constant_op.constant([1.0, 2.0, 3.0, 4.0, 5.0]) i2 = constant_op.constant([1.0, 2.0, 3.0, 4.0, 5.0]) i3 = constant_op.constant([1.0, 2.0, 3.0, 4.0, 5.0]) with context.collect_graphs(optimized=True) as graphs: outer(i1, i2, i3) if test_util.is_xla_enabled(): self.assertIn('_XlaRun', [n.op for n in graphs[0].node]) else: self.assertNotIn('_XlaRun', [n.op for n in graphs[0].node])
def testGradientGraphOptimization(self): @def_function.function def f(x, y): with backprop.GradientTape() as tape: z = math_ops.mul(x, array_ops.zeros_like(x)) l = math_ops.add(z, y) l = math_ops.reduce_sum(l) gx, gy = tape.gradient(l, [x, y]) x.assign_add(gx) y.assign_add(gy) return x + y # XLA completely optimizes away the variable reads and # assignments, so skip the test. if test_util.is_xla_enabled(): self.skipTest('Not relevant for XLA') with context.eager_mode(): x = resource_variable_ops.ResourceVariable( np.random.uniform(size=[2, 2]), dtype=dtypes.float32) y = resource_variable_ops.ResourceVariable( np.random.uniform(size=[2, 2]), dtype=dtypes.float32) with context.collect_graphs(optimized=True) as graphs: f(x, y).numpy() self.assertLen(graphs, 1) assign_count = 0 for node in graphs[0].node: if node.op == 'AssignAddVariableOp': self.assertEqual(node.input[0], 'y') assign_count += 1 # Make sure that the only variable update that remains after # grappler optimization is that of y. self.assertEqual(assign_count, 1) self.assertLen(graphs[0].node, 11)
def testGraphCollectionAfterDevicePlacement(self): @def_function.function def f(x): return x + constant_op.constant(1.) with context.collect_graphs() as graphs: with ops.device('CPU:0'): f(constant_op.constant(1.)) self.assertLen(graphs, 1) graph, = graphs self.assertIn('CPU:0', graph.node[0].device)
def testSimpleGraphCollection(self): @def_function.function def f(x): with ops.device('CPU:0'): return x + constant_op.constant(1.) with context.collect_graphs() as graphs: with ops.device('CPU:0'): x = constant_op.constant(1.) f(x) self.assertLen(graphs, 1) graph, = graphs self.assertIn('CPU:0', graph.node[1].device)
def testFunctionArgShapeInference(self): @def_function.function def f(x, y): return math_ops.matmul( x, array_ops.reshape(array_ops.transpose(y), [384, 1536])) with context.eager_mode(): x = array_ops.ones((1, 384)) y = array_ops.ones((1536, 384)) with context.collect_graphs(optimized=True) as graphs: f(x, y).numpy() self.assertLen(graphs, 1) self.assertLen(graphs[0].node, 4) self.assertEqual( graphs[0].node[2].name, 'ArithmeticOptimizer/FoldTransposeIntoMatMul_MatMul')
def testAutoclusteringWithTfFunction(self): @def_function.function(experimental_compile=False) def outer(a, b, c): return a * inner(b, c) + c @def_function.function(experimental_compile=True) def inner(b, c): return b + c * b i1 = constant_op.constant([1.0, 2.0, 3.0, 4.0, 5.0]) i2 = constant_op.constant([1.0, 2.0, 3.0, 4.0, 5.0]) i3 = constant_op.constant([1.0, 2.0, 3.0, 4.0, 5.0]) with context.collect_graphs(optimized=True) as graphs: outer(i1, i2, i3) self.assertIn('_XlaRun', [n.op for n in graphs[0].node])