def testConv8x8_WithBias(self): for fmt in self.data_formats: with self.session() as sess: with ops.device("/device:IPU:0"): inp = array_ops.placeholder(np.float32, self._ip_shp([1, 84, 84, 4], fmt), name="inp") wei = array_ops.placeholder(np.float32, [8, 8, 4, 16], name="wei") bia = array_ops.placeholder(np.float32, [16], name="bia") output = nn_ops.conv2d(inp, wei, strides=self._ip_shp([1, 4, 4, 1], fmt), padding="VALID", data_format=fmt, name='cnv4') output = nn_ops.bias_add(output, bia, data_format=fmt, name='ba4') report = tu.ReportJSON(self, sess) report.reset() fd = { inp: np.zeros(self._ip_shp([1, 84, 84, 4], fmt)), wei: np.zeros([8, 8, 4, 16]), bia: np.zeros([16]), } result = sess.run(output, fd) self.assertAllClose( result, np.zeros(self._ip_shp([1, 20, 20, 16], fmt))) report.parse_log() ok = [ '__seed*', 'host-exchange-local-copy-', 'Copy_{*_input,*_weights}_to_{*actsRearranged,*weightsRearranged}', 'cnv4*/convolution.*/Conv_8x8_stride4x4', 'ba4*/fusion/Op/Add' ] report.assert_all_compute_sets_and_list(ok)
def testScaledAddTo(self): with self.session() as sess: with ops.device("/device:IPU:0"): pa = array_ops.placeholder(np.float16, [3]) pb = array_ops.placeholder(np.float16, [3]) const = array_ops.constant(2.0, np.float16) c = pa + pb * const report = tu.ReportJSON(self, sess) report.reset() fd = {pa: [2.0, 0.5, 1.0], pb: [1.0, 2.0, 3.0]} result = sess.run(c, fd) self.assertAllClose(result, [4.0, 4.5, 7.0]) report.parse_log(assert_len=4) ok = ['__seed*', 'host-exchange-local-copy-', 'add/fusion/AddTo'] report.assert_all_compute_sets_and_list(ok)
def testScaledSubtractFromVariable(self): with self.session() as sess: with ops.device("/device:IPU:0"): pa = array_ops.placeholder(np.float16, [3]) pb = array_ops.placeholder(np.float16, [3]) pc = array_ops.placeholder(np.float16, [1]) c = pa - pc * pb report = tu.ReportJSON(self, sess) report.reset() fd = {pa: [2.0, 0.5, 1.0], pb: [1.0, 2.0, 3.0], pc: [2.0]} result = sess.run(c, fd) self.assertAllClose(result, [0.0, -3.5, -5.0]) report.parse_log(assert_len=4) ok = ['__seed*', 'host-exchange-local-copy-', 'sub/fusion/AddTo'] report.assert_all_compute_sets_and_list(ok)
def testConv3x3_WithBias(self): for fmt in self.data_formats: with self.session() as sess: with ops.device("/device:IPU:0"): pa = array_ops.placeholder(np.float32, self._ip_shp([1, 14, 14, 64], fmt), name="a") pb = array_ops.placeholder(np.float32, [3, 3, 64, 128], name="b") bi = array_ops.placeholder(np.float32, [128], name="b") output = nn_ops.convolution(pa, pb, padding="SAME", data_format=fmt, name='cnv3') output = nn_ops.bias_add(output, bi, data_format=fmt, name='ba3') report = tu.ReportJSON(self, sess) report.reset() fd = { pa: np.zeros(self._ip_shp([1, 14, 14, 64], fmt)), pb: np.zeros([3, 3, 64, 128]), bi: np.zeros([128]), } result = sess.run(output, fd) self.assertAllClose( result, np.zeros(self._ip_shp([1, 14, 14, 128], fmt))) report.parse_log() ok = [ '__seed*', 'Copy_*actsRearranged', 'host-exchange-local-copy-', 'cnv3*/convolution.*/Conv_3x3', 'ba3*/fusion/Op/Add' ] report.assert_all_compute_sets_and_list(ok)
def testScaledAddaXbY(self): with self.session() as sess: with ops.device("/device:IPU:0"): px = array_ops.placeholder(np.float16, [3]) py = array_ops.placeholder(np.float16, [3]) const_a = array_ops.constant(2.0, np.float16) const_b = array_ops.constant(3.0, np.float16) axby = const_a * px + const_b * py report = tu.ReportJSON(self, sess) report.reset() fd = {px: [2.0, 0.5, 1.0], py: [1.0, 2.0, 3.0]} result = sess.run(axby, fd) self.assertAllClose(result, [7.0, 7.0, 11.0]) report.parse_log(assert_len=4) ok = ['__seed*', 'host-exchange-local-copy-', 'add/fusion/AddTo'] report.assert_all_compute_sets_and_list(ok)
def testNamedOperations(self): with self.session() as sess: with ops.device("/device:IPU:0"): pa = array_ops.placeholder(np.float32, [2, 2], name="a") pb = array_ops.placeholder(np.float32, [2, 2], name="b") with ops.name_scope('my_ops'): out = math_ops.add(pa, pb, 'my_add_op') r = tu.ReportJSON(self, sess) fd = {pa: [[1., 1.], [2., 3.]], pb: [[0., 1.], [4., 5.]]} r.reset() result = sess.run(out, fd) self.assertAllClose(result, [[1., 2.], [6., 8.]]) r.parse_log() ok = ['__seed*', 'my_ops/my_add_op/add'] r.assert_all_compute_sets_and_list(ok)
def testNonModifiedResourceIsNotOverwrittenInPlaceOp(self): # This test verifies that if we have a resource varaible (w) which is marked # as not modified then a copy is inserted to make sure it is not overwritten # between executions if it is used by an inplace op w_val = [1, 2, 3, 4] with self.session() as sess: with ops.device("/device:IPU:0"): with variable_scope.variable_scope("vs", use_resource=True): w = variable_scope.get_variable( "w", shape=[4], dtype=np.float32, initializer=init_ops.constant_initializer( np.array(w_val, dtype=np.float32))) px = array_ops.placeholder(np.float32, shape=[4]) y = w + px report = tu.ReportJSON(self, sess) sess.run(variables.global_variables_initializer()) report.reset() xs = [ np.array([7, 3, 5, 9], dtype=np.float32), np.array([1, 8, 3, 4], dtype=np.float32), np.array([9, 2, 2, 6], dtype=np.float32) ] for x in xs: out = sess.run(y, {px: x}) self.assertAllClose(out, x + w_val) report.parse_log() w_dl = "1.0" report.assert_host_to_device_event_names( [w_dl], "w should be copied to device once and " "that should be the only io event") report.assert_device_to_host_event_names( [], "w should be copied to device once and " "that should be the only io event")
def testConv1x1_WithBias(self): for fmt in self.data_formats: with self.session() as sess: with ops.device("/device:IPU:0"): inp = array_ops.placeholder(np.float32, self._ip_shp([1, 1, 1, 4], fmt), name="inp") wei = array_ops.placeholder(np.float32, [1, 1, 4, 16], name="wei") bia = array_ops.placeholder(np.float32, [16], name="bia") output = nn_ops.conv2d(inp, wei, strides=[1, 1, 1, 1], padding="VALID", data_format=fmt, name='cnv5') output = nn_ops.bias_add(output, bia, data_format=fmt, name='ba5') report = tu.ReportJSON(self, sess) report.reset() fd = { inp: np.zeros(self._ip_shp([1, 1, 1, 4], fmt)), wei: np.zeros([1, 1, 4, 16]), bia: np.zeros([16]), } result = sess.run(output, fd) self.assertAllClose(result, np.zeros(self._ip_shp([1, 1, 1, 16], fmt))) report.parse_log() ok = [ '__seed*', 'Copy_', 'cnv5*/convolution.*/Conv_1x1', 'ba5*/fusion/Op/Add' ] report.assert_all_compute_sets_and_list(ok)
def testFwdAndBwdMaxPool(self): with self.session() as sess: input_values = np.arange(16).reshape(1, 4, 4, 1) output_grad = np.full((1, 2, 2, 1), 0.1) with ops.device("/device:IPU:0"): pa = array_ops.placeholder(np.float32, [1, 4, 4, 1], name="a") pb = array_ops.placeholder(np.float32, [1, 2, 2, 1], name="b") c = nn.max_pool(pa, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], data_format='NCHW', padding='SAME') d = gen_nn_ops.max_pool_grad(pa, c, pb, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], data_format='NCHW', padding='SAME') report = tu.ReportJSON(self, sess) report.reset() fe = { pa: input_values, pb: output_grad, } output, input_grad = sess.run((c, d), fe) self.assertAllClose(output, [[[[5.], [7.]], [[13.], [15.]]]]) self.assertAllClose( input_grad, [[[[0.], [0.], [0.], [0.]], [[0.], [0.1], [0.], [0.1]], [[0.], [0.], [0.], [0.]], [[0.], [0.1], [0.], [0.1]]]]) report.parse_log(assert_len=4) ok = [ '__seed*', 'Copy_*', 'MaxPool/max-pool*/maxPool2x2/', 'MaxPoolGrad/max-pool-grad*/maxPool2x2' ] report.assert_all_compute_sets_and_list(ok)
def testConvolutionsDontMatchDifferentTypes(self): with self.session() as sess: with ops.device("/device:IPU:0"): x = array_ops.placeholder(np.float32, shape=[1, 4, 4, 2]) with variable_scope.variable_scope("vs", use_resource=True): y = layers.Conv2D( 2, 1, use_bias=False, kernel_initializer=init_ops.ones_initializer(), dtype=np.float32)(x) y = math_ops.cast(y, np.float16) y = layers.Conv2D( 2, 1, use_bias=False, kernel_initializer=init_ops.ones_initializer(), dtype=np.float16)(y) report = tu.ReportJSON(self, sess) sess.run(variables.global_variables_initializer()) report.reset() sess.run(y, {x: np.zeros([1, 4, 4, 2])}) report.parse_log() # Matches two convolutions ok = [ '__seed*', 'Copy_*weightsRearranged', 'Copy_', 'Copy_vs/*/OnTileCopy-0', 'vs/conv2d/Conv2D/convolution.*/Conv_1x1', 'vs/Cast/convert.*/Cast', 'vs/conv2d_1/Conv2D/convolution.*/Conv_1x1' ] report.assert_all_compute_sets_and_list(ok) self.assertAllEqual(report.get_ml_type_counts(), [2, 0, 0, 0])
def testScaledSubtractFrom(self): with self.session() as sess: with ops.device("/device:IPU:0"): pa = array_ops.placeholder(np.float16, [3]) pb = array_ops.placeholder(np.float16, [3]) const = array_ops.constant(2.0, np.float16) # note how const operand index varies compared to testScaledAddTo # still should match as it will be reordered c = pa - const * pb report = tu.ReportJSON(self, sess) report.reset() fd = {pa: [2.0, 0.5, 1.0], pb: [1.0, 2.0, 3.0]} result = sess.run(c, fd) self.assertAllClose(result, [0.0, -3.5, -5.0]) report.parse_log(assert_len=4) ok = ['__seed*', 'host-exchange-local-copy-', 'sub/fusion/AddTo'] report.assert_all_compute_sets_and_list(ok)
def testMatmulAndEmbedding(self, transpose_matmul): with self.session() as sess: def model(ids): return _matmulAndEmbeddingFwd( ids, transpose_matmul=transpose_matmul) ids_ph = array_ops.placeholder(np.int32, shape=[50]) with ipu.scopes.ipu_scope("/device:IPU:0"): output = ipu.ipu_compiler.compile(model, [ids_ph]) report = tu.ReportJSON(self, sess, compile_ipu_code=True) tu.move_variable_initialization_to_cpu() sess.run(variables.global_variables_initializer()) report.reset() sess.run(output, {ids_ph: np.ones([50])}) report.parse_log() report.assert_total_tile_memory(49281058)
def testInplaceTuple(self): with self.session() as sess: def my_net(x): def cond(i, x, y): del x del y return i < 1 def body(i, x, y): i = i + 1 x = nn.tanh(x) y = nn.tanh(y) return (i, x, y) i = 0 return control_flow_ops.while_loop(cond, body, (i, x, x), name='')[1:] with ops.device('cpu'): x = array_ops.placeholder(np.float32, [4]) report = tu.ReportJSON(self, sess) with ops.device("/device:IPU:0"): r = xla.compile(my_net, inputs=[x]) report.reset() x, y = sess.run(r, {x: np.full([4], 2)}) self.assertAllClose(x, np.full([4], np.tanh(2))) self.assertAllClose(y, np.full([4], np.tanh(2))) report.parse_log(assert_len=4) ok = [ '__seed*', 'Copy_*_to_*', 'Tanh/tanh*/Op/Tanh', 'Tanh_1/tanh*/Op/Tanh' ] report.assert_all_compute_sets_and_list(ok)
def testAvgPoolSameWithReshape(self): with self.session() as sess: np.random.seed(0) shape = [1, 10, 10, 1] data = np.random.uniform(0, 1, shape) # The expected answer was generated using TF on the cpu expected = [[[[0.64431685], [0.51738459], [0.49705142], [0.60235918], [0.73694557]], [[0.57755166], [0.47387227], [0.40451217], [0.4876942], [0.55843753]], [[0.49037799], [0.4466258], [0.35829377], [0.40070742], [0.37205362]], [[0.47563809], [0.4075647], [0.34894851], [0.35470542], [0.3322109]], [[0.52914065], [0.45464769], [0.38156652], [0.32455513], [0.33199897]]]] with ops.device("/device:IPU:0"): pa = array_ops.placeholder(np.float32, shape, name="a") output = nn.avg_pool(pa, ksize=[1, 5, 5, 1], strides=[1, 2, 2, 1], data_format='NHWC', padding='SAME', name="avg") report = tu.ReportJSON(self, sess) sess.run(variables.global_variables_initializer()) report.reset() fd = {pa: data} result = sess.run(output, fd) self.assertAllClose(result, expected) report.parse_log(assert_len=4) ok = ['__seed*', 'avg/avg-pool*/avgPool5x5'] report.assert_all_compute_sets_and_list(ok)
def testBiasApplyVariableLR(self): with self.session() as sess: input_values = np.ones((1, 4, 4, 2)) with ops.device("/device:IPU:0"): x = array_ops.placeholder(np.float16, shape=[1, 4, 4, 2]) lr = array_ops.placeholder(np.float16, shape=[]) with variable_scope.variable_scope("vs", use_resource=True): y = layers.Conv2D(2, 1, use_bias=True, kernel_initializer=init_ops.ones_initializer(), bias_initializer=init_ops.ones_initializer(), name="a")(x) y = nn.relu(y) loss = math_ops.reduce_sum(y) optimizer = gradient_descent.GradientDescentOptimizer(lr) train = optimizer.minimize(loss) report = tu.ReportJSON(self, sess) sess.run(variables.global_variables_initializer()) report.reset() fe = { x: input_values, lr: 0.1, } sess.run((loss, train), fe) tvars = variables.global_variables() tvars_vals = sess.run(tvars) found = False for var, val in zip(tvars, tvars_vals): if var.name == "vs/a/bias:0": # Value computed using the CPU backend self.assertAllClose(val, [-0.6, -0.6], atol=0.001) found = True self.assertTrue(found)
def testConvolutionBiasApply(self): with self.session() as sess: with ops.device("/device:IPU:0"): x = array_ops.placeholder(np.float32, shape=[1, 4, 4, 2]) with variable_scope.variable_scope("vs", use_resource=True): y = layers.Conv2D(2, 1, use_bias=True, kernel_initializer=init_ops.ones_initializer())(x) y = layers.Conv2D(2, 1, use_bias=True, kernel_initializer=init_ops.ones_initializer())(y) loss = math_ops.reduce_sum(y) optimizer = gradient_descent.GradientDescentOptimizer(0.1) train = optimizer.minimize(loss) report = tu.ReportJSON(self, sess) sess.run(variables.global_variables_initializer()) report.reset() sess.run([train, loss], {x: np.zeros([1, 4, 4, 2])}) report.parse_log( assert_len=6, assert_msg= "Expected 2x compile, 1x upload, 1x load, 1x download, 1x execute") # pylint: disable=line-too-long ok = [ '__seed*', 'GradientDescent/update_vs/conv2d/bias/ResourceApplyGradientDescent/fusion.*/Reduce' ] # pylint: enable=line-too-long report.assert_compute_sets_contain_list(ok)
def executeModel(inputs, expected): with self.session() as sess: # Decide what the output type should be. data_type = inputs["on"].dtype # The actual model function which perfoms the one-hot operation based on the inputs given to executeModel. def model(a): return array_ops.one_hot(a, inputs["n_classes"], dtype=data_type, on_value=inputs["on"], off_value=inputs["off"], axis=inputs["axis"]) # We run once on the CPU to get the expected result, then on the IPU to compare the two. cpuRun = expected is None with ops.device('cpu'): pa = array_ops.placeholder(np.int32, inputs["shape"], name="a") # Check if we should be running on IPU or cpu. device = "cpu:0" if cpuRun else "/device:IPU:0" with ops.device(device): out = model(pa) tu.ReportJSON(self, sess) in_data = np.array(inputs["in_values"]) fd = {pa: in_data} result = sess.run(out, fd) if cpuRun: return result self.assertAllClose(result, expected)
def testReportEveryNthExecution_FirstOnly(self): with self.session() as sess: with ops.device("/device:IPU:0"): pa = array_ops.placeholder(np.float32, [2, 2], name="a") pb = array_ops.placeholder(np.float32, [2, 2], name="b") out = math_ops.add(pa, pb) r = tu.ReportJSON(self, sess) fd = {pa: [[1., 1.], [2., 3.]], pb: [[0., 1.], [4., 5.]]} r.reset() sess.run(out, fd) sess.run(out, fd) sess.run(out, fd) sess.run(out, fd) sess.run(out, fd) types = r.parse_log() self.assertEqual(types[IpuTraceEvent.EXECUTE], 5) self.assertEqual( len(r.get_execution_reports()), 1, "Only the first execution should have generated a report")
def testReluNotInPlace(self): with self.session() as sess: with ops.device("/device:IPU:0"): pa = array_ops.placeholder(np.float32, [3], name="a") c = nn_ops.relu(pa) + pa report = tu.ReportJSON(self, sess) report.reset() fd = {pa: [1, -2, 1]} result = sess.run(c, fd) self.assertAllClose(result, [2, -2, 2]) report.parse_log(assert_len=4) # pylint: disable=line-too-long ok = [ '__seed*', 'Copy_XLA_Args*/arg0.*_to_Relu/relu/Nonlinearity/out/OnTileCopy-0', 'Relu/relu/Nonlinearity', 'add/add.*/Add' ] # pylint: enable=line-too-long report.assert_all_compute_sets_and_list(ok)
def testConvolutionsWithBroadcast(self): with self.session() as sess: def model(device): with ops.device(device): x = array_ops.placeholder(np.float32, shape=[2]) x_bcast = gen_array_ops.broadcast_to( x, shape=[2, 256, 256, 2]) w_bcast = gen_array_ops.broadcast_to(x, shape=[2, 2, 2, 2]) y = nn.conv2d(x_bcast, w_bcast, strides=1, padding="SAME", name="a") y = nn.conv2d(y, w_bcast, strides=1, padding="SAME", name="b") return sess.run(y, {x: np.ones(x.shape)}) report = tu.ReportJSON(self, sess) report.reset() ipu_result = model("/device:IPU:0") cpu_result = model("cpu") self.assertAllClose(cpu_result, ipu_result) report.parse_log() report.assert_total_tile_memory(11336260) report.assert_max_tile_memory(9675) # Would fail if there were two convolutions in the graph ok = ['__seed*', 'a/convolution', 'Copy_'] report.assert_all_compute_sets_and_list(ok)
def testMatch(self): with self.session() as sess: with ops.device("/device:IPU:0"): biases1 = array_ops.placeholder(np.float32, shape=[2]) biases2 = array_ops.placeholder(np.float32, shape=[2]) biases3 = array_ops.placeholder(np.float32, shape=[2]) grads1 = array_ops.placeholder(np.float32, shape=[2, 10]) grads2 = array_ops.placeholder(np.float32, shape=[2, 10]) grads3 = array_ops.placeholder(np.float32, shape=[2, 10]) vlr = array_ops.placeholder(np.float32, shape=[]) def bias_apply(bias, grad, lr): return bias - math_ops.reduce_sum(grad, axis=1) * lr out = (bias_apply(biases1, grads1, vlr) + bias_apply(biases2, grads2, 0.1) + bias_apply(biases3, grads3, 0.2)) report = tu.ReportJSON(self, sess) sess.run(variables.global_variables_initializer()) report.reset() r = sess.run( out, { biases1: np.ones([2]), biases2: np.ones([2]), biases3: np.ones([2]), grads1: np.ones([2, 10]), grads2: np.ones([2, 10]), grads3: np.ones([2, 10]), vlr: 0.1 }) self.assertAllClose(r, [-1., -1.]) report.parse_log() report.assert_compute_sets_matches("*ReduceOnTile*", 1)
def tesInplaceAddCopyWithInplacePeer2(self): with self.session() as sess: data_a = np.array([[10, -10], [-5, 5]]) data_b = np.array([[-15, 15], [25, -25]]) data_c = 2 with ops.device("/device:IPU:0"): pa = array_ops.placeholder(np.float32, [2, 2]) pb = array_ops.placeholder(np.float32, [2, 2]) pc = array_ops.placeholder(np.float32, []) a = array_ops.transpose(pa) b = pa + pb * pc c = a * pb + pc d = b / c report = tu.ReportJSON(self, sess) report.reset() fd = { pa: data_a, pb: data_b, pc: data_c, } np_result = (data_a + data_b * data_c) / ( np.transpose(data_a) * data_b + data_c) result = sess.run(d, fd) self.assertAllClose(result, np_result) report.parse_log( assert_len=4, assert_msg="engine, compile_begin, compile_end, execute") ok = [ '__seed*', 'Copy_XLA_Args/arg0.*_to_transpose/transpose' 'mul/multiply.*/Op/Multiply', 'add/add.*/AddTo', 'mul_1/multiply.*/Op/Multiply', 'add_1/add.*/AddTo', 'truediv/divide.*/Op/Divide' ] report.assert_all_compute_sets_and_list(ok)
def testMaxPool(self): with self.session() as sess: with ops.device("/device:IPU:0"): pa = array_ops.placeholder(np.float32, [1, 1, 10, 10], name="a") c = nn.max_pool(pa, ksize=[1, 1, 5, 5], strides=[1, 1, 2, 2], data_format='NCHW', padding='SAME', name="max") report = tu.ReportJSON(self, sess) report.reset() fd = { pa: np.ones([1, 1, 10, 10]), } result = sess.run(c, fd) self.assertAllClose(result, np.ones([1, 1, 5, 5])) report.parse_log(assert_len=4) ok = ['__seed*', 'max/max-pool*/maxPool5x5'] report.assert_all_compute_sets_and_list(ok)
def testConvolutionsMatch(self): with self.session() as sess: with ops.device("/device:IPU:0"): x = array_ops.placeholder(np.float32, shape=[1, 4, 4, 2]) with variable_scope.variable_scope("vs", use_resource=True): y = layers.Conv2D( 2, 1, use_bias=False, kernel_initializer=init_ops.ones_initializer())(x) y = layers.Conv2D( 2, 1, use_bias=False, kernel_initializer=init_ops.ones_initializer())(y) report = tu.ReportJSON(self, sess) sess.run(variables.global_variables_initializer()) report.reset() sess.run(y, {x: np.zeros([1, 4, 4, 2])}) report.parse_log() # Would fail if there were two convolutions in the graph as they would be # called conv2d and conv2d_1 ok = [ '__seed*', 'Copy_', 'vs/conv2d/Conv2D/convolution.*/Conv_1x1', 'Copy_' ] report.assert_all_compute_sets_and_list(ok) self.assertAllEqual(report.get_ml_type_counts(), [2, 0, 0, 0])
def testIpuModelDeviceWithReport(self): with self.session() as sess: with ops.device("/device:IPU:0"): pa = array_ops.placeholder(np.float32, [2, 2], name="a") pb = array_ops.placeholder(np.float32, [2, 2], name="b") output = pa + pb with ops.device('cpu'): with ops.control_dependencies([output]): report = gen_ipu_ops.ipu_event_trace() r = tu.ReportJSON(self, sess) fd = {pa: [[1., 1.], [2., 3.]], pb: [[0., 1.], [4., 5.]]} sess.run(report, fd) result, rep = sess.run([output, report], fd) self.assertAllClose(result, [[1., 2.], [6., 8.]]) types = r.parse_events(rep, assert_len=4) self.assertEqual(1, types[IpuTraceEvent.COMPILE_BEGIN]) self.assertEqual(1, types[IpuTraceEvent.COMPILE_END]) self.assertEqual(1, types[IpuTraceEvent.EXECUTE]) self.assertEqual(1, types[IpuTraceEvent.LOAD_ENGINE])
def testSigmoidNotInplace(self): with self.session() as sess: with ops.device("/device:IPU:0"): pa = array_ops.placeholder(np.float32, [3], name="a") c = math_ops.sigmoid(pa) + pa report = tu.ReportJSON(self, sess) report.reset() fd = {pa: [-6.0, 0.0, 6.0]} result = sess.run(c, fd) self.assertAllClose(result, [-5.997527, 0.5, 6.997527]) report.parse_log(assert_len=4) # pylint: disable=line-too-long ok = [ '__seed*', 'Copy_XLA_Args*/arg0.*_to_Sigmoid/sigmoid/Nonlinearity/out/OnTileCopy-0', 'Sigmoid/sigmoid/Nonlinearity', 'add/add.*/Add' ] # pylint: enable=line-too-long report.assert_all_compute_sets_and_list(ok)
def testDepthwiseConvBackpropFilter1x1(self): with self.session() as sess: with ops.device("/device:IPU:0"): pa = array_ops.placeholder(np.float32, [1, 6, 6, 3], name="a") pb = constant_op.constant([1, 1, 3, 2], dtype=np.int32) # filter sizes pc = array_ops.placeholder(np.float32, [1, 6, 6, 6], name="c") c = nn.depthwise_conv2d_native_backprop_filter( pa, pb, pc, strides=[1, 1, 1, 1], padding="SAME") report = tu.ReportJSON(self, sess) report.reset() fd = {pa: np.zeros([1, 6, 6, 3]), pc: np.zeros([1, 6, 6, 6])} result = sess.run(c, fd) self.assertAllClose(result, np.zeros([1, 1, 3, 2])) report.parse_log() ok = [ '__seed*', 'Copy_', 'DepthwiseConv2dNativeBackpropFilter/fusion*/Conv_6x6' ] report.assert_all_compute_sets_and_list(ok)
def testBatchNormsMatchFwdBwdSomeOnShard0SomeOnShard1(self): with self.session() as sess: with ops.device("/device:IPU:0"): x = array_ops.placeholder(np.float32, shape=[1, 4, 4, 2]) with variable_scope.variable_scope("vs", use_resource=True): with ipu.scopes.ipu_shard(0): y = convolutional.conv2d( x, 2, 1, use_bias=False, kernel_initializer=init_ops.ones_initializer(), name='conv1') y = layers_norm.batch_normalization(y, fused=True, training=True) y = convolutional.conv2d( y, 2, 1, use_bias=False, kernel_initializer=init_ops.ones_initializer(), name='conv2') y = layers_norm.batch_normalization(y, fused=True, training=True) with ipu.scopes.ipu_shard(1): y = convolutional.conv2d( y, 2, 1, use_bias=False, kernel_initializer=init_ops.ones_initializer(), name='conv3') y = layers_norm.batch_normalization(y, fused=True, training=True) loss = math_ops.reduce_sum(y) optimizer = gradient_descent.GradientDescentOptimizer(0.1) train = optimizer.minimize(loss) report = tu.ReportJSON(self, sess, sharded=True) tu.move_variable_initialization_to_cpu() sess.run(variables.global_variables_initializer()) report.reset() sess.run([train, loss], {x: np.zeros([1, 4, 4, 2])}) report.parse_log() # Two BN for forwards (on shards 0 and 1) and two BN for grad # (note that we don't cache gradient application) # pylint: disable=line-too-long ok = [ '__seed*', '*OnTileCopy*', 'Copy_', 'vs/conv1/Conv2D/convolution.*/Conv_1x1', 'vs/conv3/Conv2D/convolution.*/Conv_1x1', 'vs/batch_normalization/FusedBatchNorm*/batch-norm-training.*/', 'vs/batch_normalization_2/FusedBatchNorm*/batch-norm-training.*/', 'Sum/reduce.*/ReduceOnTile/InToIntermediateNoExchange/Reduce', 'Sum/reduce.*/ReduceFinalStage/IntermediateToOutput/Reduce', 'gradients/vs/batch_normalization_2/FusedBatchNorm*_grad/FusedBatchNormGrad*/batch-norm-grad.*/', 'gradients/vs/batch_normalization_1/FusedBatchNorm*_grad/FusedBatchNormGrad*/batch-norm-grad.*/', 'GradientDescent/update_vs/batch_normalization/', 'GradientDescent/update_vs/batch_normalization_1/', 'GradientDescent/update_vs/batch_normalization_2/', 'gradients/vs/conv3/Conv2D_grad/Conv2DBackpropFilter/fusion.*/AddTo', 'gradients/vs/conv3/Conv2D_grad/Conv2DBackpropFilter/fusion.*/Conv_4x4', 'gradients/vs/conv3/Conv2D_grad/Conv2DBackpropFilter/fusion.*/Transpose', 'gradients/vs/conv3/Conv2D_grad/Conv2DBackpropInput/fusion/*Transpose', 'gradients/vs/conv2/Conv2D_grad/Conv2DBackpropInput/fusion.*/*Transpose', 'gradients/vs/conv1/Conv2D_grad/Conv2DBackpropFilter/fusion.*/Conv_4x4', 'gradients/vs/conv1/Conv2D_grad/Conv2DBackpropFilter/fusion.*/AddTo', 'gradients/vs/conv1/Conv2D_grad/Conv2DBackpropFilter/fusion.*/Transpose', ] # pylint: enable=line-too-long report.assert_all_compute_sets_and_list(ok)
def testConvolutionApply(self): with self.session() as sess: with ops.device("/device:IPU:0"): filter_sizes = constant_op.constant([2, 2, 3, 5], np.int32) input1 = array_ops.placeholder(np.float32, [2, 8, 8, 3]) input2 = array_ops.placeholder(np.float32, [2, 8, 8, 3]) input3 = array_ops.placeholder(np.float32, [2, 8, 8, 3]) grads1 = array_ops.placeholder(np.float32, [2, 8, 8, 5]) grads2 = array_ops.placeholder(np.float32, [2, 8, 8, 5]) grads3 = array_ops.placeholder(np.float32, [2, 8, 8, 5]) weights1 = array_ops.placeholder(np.float32, [2, 2, 3, 5]) weights2 = array_ops.placeholder(np.float32, [2, 2, 3, 5]) weights3 = array_ops.placeholder(np.float32, [2, 2, 3, 5]) vlr = array_ops.placeholder(np.float32, []) def conv_scaled_inplace(input_values, grads, weights, lr): return weights - lr * nn_ops.conv2d_backprop_filter( input_values, filter_sizes, grads, strides=[1, 1, 1, 1], padding="SAME") result = (conv_scaled_inplace(input1, grads1, weights1, vlr) + conv_scaled_inplace(input2, grads2, weights2, 0.1) + conv_scaled_inplace(input3, grads3, weights3, 0.2)) report = tu.ReportJSON(self, sess) sess.run(variables.global_variables_initializer()) report.reset() r = sess.run( result, { input1: np.ones([2, 8, 8, 3]), input2: np.ones([2, 8, 8, 3]), input3: np.ones([2, 8, 8, 3]), grads1: np.ones([2, 8, 8, 5]), grads2: np.ones([2, 8, 8, 5]), grads3: np.ones([2, 8, 8, 5]), weights1: np.ones([2, 2, 3, 5]), weights2: np.ones([2, 2, 3, 5]), weights3: np.ones([2, 2, 3, 5]), vlr: 0.1, }) # yapf: disable self.assertAllClose(r, [[[[-48.2, -48.2, -48.2, -48.2, -48.2], [-48.2, -48.2, -48.2, -48.2, -48.2], [-48.2, -48.2, -48.2, -48.2, -48.2],], [[-41.8, -41.8, -41.8, -41.8, -41.8], [-41.8, -41.8, -41.8, -41.8, -41.8], [-41.8, -41.8, -41.8, -41.8, -41.8],],], [[[-41.8, -41.8, -41.8, -41.8, -41.8], [-41.8, -41.8, -41.8, -41.8, -41.8], [-41.8, -41.8, -41.8, -41.8, -41.8],], [[-36.2, -36.2, -36.2, -36.2, -36.2], [-36.2, -36.2, -36.2, -36.2, -36.2], [-36.2, -36.2, -36.2, -36.2, -36.2],]]]) # yapf: enable report.parse_log() report.assert_compute_sets_matches('*Convolve', 1)
def testResourceCountsAreCorrect(self): with self.session() as sess: with ops.device("/device:IPU:0"): with variable_scope.variable_scope("vs", use_resource=True): w1 = variable_scope.get_variable( "w1", shape=[4, 2], dtype=np.float32, initializer=init_ops.constant_initializer( np.array([[1, 2], [3, 4], [5, 6], [7, 8]], dtype=np.float32))) b1 = variable_scope.get_variable( "b1", shape=[2], dtype=np.float32, trainable=False, initializer=init_ops.constant_initializer( np.array([2, 3], dtype=np.float32))) w2 = variable_scope.get_variable( "w2", shape=[2, 2], dtype=np.float32, initializer=init_ops.constant_initializer( np.array([[1, 2], [3, 4]], dtype=np.float32))) b2 = variable_scope.get_variable( "b2", shape=[2], dtype=np.float32, trainable=False, initializer=init_ops.constant_initializer( np.array([2, 3], dtype=np.float32))) x = array_ops.placeholder(np.float32, shape=[1, 4]) y = math_ops.matmul(x, w1) + b1 y = math_ops.matmul(y, w2) + b2 loss = math_ops.reduce_sum(y) optimizer = gradient_descent.GradientDescentOptimizer(0.1) train = optimizer.minimize(loss) report = tu.ReportJSON(self, sess) sess.run(variables.global_variables_initializer()) report.reset() sess.run([train, loss], {x: np.array([[7, 3, 5, 9]], dtype=np.float32)}) sess.run([train, loss], {x: np.array([[1, 2, 3, 4]], dtype=np.float32)}) sess.run([train, loss], {x: np.array([[7, 3, 5, 9]], dtype=np.float32)}) sess.run([train, loss], {x: np.array([[1, 2, 3, 4]], dtype=np.float32)}) sess.run([train, loss], {x: np.array([[7, 3, 5, 9]], dtype=np.float32)}) w1_dl = "1.0" b1_dl = "2.0" w2_dl = "3.0" b2_dl = "4.0" # biases are not outputs of the graph w1_ul = "out_1.0" w2_ul = "out_2.0" report.parse_log() # The initialization is constant, so there are no events generated on the # IPU. report.assert_host_to_device_event_names( [w1_dl, b1_dl, w2_dl, b2_dl], "Weights/biases should be downloaded once, and the input no times " "because it is streamed") # Weights should not be uploaded, and the loss is streamed report.assert_device_to_host_event_names( [], "Weights/biases should not be uploaded, and the loss is streamed") # Explicitly fetch the first set of weights and biases vw, vb = sess.run([w1, b1]) self.assertAllClose(np.array( [[100.00576782, 86.60944366], [57.62784195, 51.23856354], [93.45920563, 82.40240479], [155.36032104, 135.74447632]], dtype=np.float32), vw, rtol=1e-4) self.assertAllClose(np.array([2, 3], dtype=np.float32), vb, rtol=1e-4) report.parse_log() report.assert_host_to_device_event_names( [], "Weights/biases/inputs should not be downloaded at all") # Note all weights are fetched as a group report.assert_device_to_host_event_names( [w1_ul, w2_ul], "Weights/biases should be uploaded once (explicitly fetched)")