def testNotEnoughIpus(self): def my_graph(pa, pb, pc): with ipu.ops.ipu_shard(0): o1 = pa + pb with ipu.ops.ipu_shard(1): o2 = pa + pc with ipu.ops.ipu_shard(2): out = o1 + o2 return out with ops.device('cpu'): pa = array_ops.placeholder(np.float32, [2], name="a") pb = array_ops.placeholder(np.float32, [2], name="b") pc = array_ops.placeholder(np.float32, [2], name="c") report = gen_ipu_ops.ipu_event_trace() with ops.device("/device:IPU:0"): out = ipu_compiler.compile(my_graph, [pa, pb, pc]) cfg = ipu.utils.create_ipu_config(profiling=True) cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False) cfg = ipu.utils.auto_select_ipus(cfg, 2) ipu.utils.configure_ipu_system(cfg) with sl.Session() as sess: with self.assertRaisesRegexp(errors.ResourceExhaustedError, 'Trying to compile a graph for'): sess.run(out, {pa: [1., 1.], pb: [0., 1.], pc: [1., 5.]})
def testConvBackpropFilter(self): with ops.device("/device:IPU:0"): inp = array_ops.placeholder(np.float32, [2, 8, 8, 3]) fil = constant_op.constant([2, 2, 3, 5], np.int32) bck = array_ops.placeholder(np.float32, [2, 8, 8, 5], name="wei") output = nn_ops.conv2d_backprop_filter( inp, fil, bck, strides=[1, 1, 1, 1], padding="SAME") with ops.device('cpu'): report = gen_ipu_ops.ipu_event_trace() tu.configure_ipu_system() with tu.ipu_session() as sess: sess.run(report) fd = { inp: np.zeros([2, 8, 8, 3]), bck: np.zeros([2, 8, 8, 5]), } result = sess.run(output, fd) self.assertAllClose(result, np.zeros([2, 2, 3, 5])) result = sess.run(report) s = tu.extract_all_strings_from_event_trace(result) cs_list = tu.get_compute_sets_from_report(s) ok = ['__seed*', 'Copy_', 'Conv2DBackpropFilter/convolution.*/Conv_8x8'] self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
def testDepthwiseConvBackpropFilter1x1WithRelu(self): with ops.device("/device:IPU:0"): pa = array_ops.placeholder(np.float32, [1, 6, 6, 3], name="a") pb = constant_op.constant([1, 1, 3, 2], dtype=np.int32) # filter sizes pc = array_ops.placeholder(np.float32, [1, 6, 6, 6], name="c") c = nn.depthwise_conv2d_native_backprop_filter( pa, pb, pc, strides=[1, 1, 1, 1], padding="SAME") c = nn.relu(c) with ops.device('cpu'): report = gen_ipu_ops.ipu_event_trace() tu.configure_ipu_system() with tu.ipu_session() as sess: sess.run(report) fd = {pa: np.zeros([1, 6, 6, 3]), pc: np.zeros([1, 6, 6, 6])} result = sess.run(c, fd) self.assertAllClose(result, np.zeros([1, 1, 3, 2])) result = sess.run(report) s = tu.extract_all_strings_from_event_trace(result) cs_list = tu.get_compute_sets_from_report(s) ok = [ '__seed*', 'Copy_', 'DepthwiseConv2dNativeBackpropFilter/fusion*/Conv_6x6', 'Relu/custom-call*/Nonlinearity' ] self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
def testIpuEventsWithoutPoplarReporting(self): with self.session() as sess: with ops.device("/device:IPU:0"): pa = array_ops.placeholder(np.float32, [2, 2], name="a") pb = array_ops.placeholder(np.float32, [2, 2], name="b") out = math_ops.add(pa, pb) with ops.device('cpu'): report = gen_ipu_ops.ipu_event_trace() opts = utils.create_ipu_config(profiling=False, enable_ipu_events=True) utils.configure_ipu_system(opts) fd = {pa: [[1., 1.], [2., 3.]], pb: [[0., 1.], [4., 5.]]} sess.run(report, fd) sess.run(out, fd) rep = sess.run(report, fd) evts = utils.extract_all_events(rep) self.assertEqual(len(evts), 3) # compile begin, compile end, execute for e in evts: if e.type == IpuTraceEvent.COMPILE_END: self.assertFalse(e.compile_end.compilation_report) if e.type == IpuTraceEvent.EXECUTE: self.assertFalse(e.execute.execution_report) sess.close()
def testDropoutImpl(): def ipu_dropout_back(w): output = poprand.dropout(w, rate=0.4) largest = output cost = tf.square(largest) opt = tf.train.GradientDescentOptimizer(learning_rate=0.1) gradients = opt.compute_gradients(cost, w) return [output, gradients] with ops.device('cpu'): input_data = array_ops.placeholder(np.float32, [32]) report = gen_ipu_ops.ipu_event_trace() with ipu.ops.ipu_scope("/device:IPU:0"): r = ipu_compiler.compile(ipu_dropout_back, inputs=[input_data]) cfg = ipu.utils.create_ipu_config() cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False) ipu.utils.configure_ipu_system(cfg) with sl.Session() as sess: in_data = np.random.rand(32) out = sess.run(r, {input_data: in_data}) dropout_out = out[0] gradients = out[1][0][0] # Check we have the same number of zeros. self.assertAllEqual( np.count_nonzero(dropout_out), np.count_nonzero(gradients))
def testNamedOperations(self): with ops.device("/device:IPU:0"): pa = array_ops.placeholder(np.float32, [2, 2], name="a") pb = array_ops.placeholder(np.float32, [2, 2], name="b") with ops.name_scope('my_ops'): out = math_ops.add(pa, pb, 'my_add_op') with ops.device('cpu'): report = gen_ipu_ops.ipu_event_trace() tu.configure_ipu_system() with tu.ipu_session() as sess: fd = {pa: [[1., 1.], [2., 3.]], pb: [[0., 1.], [4., 5.]]} sess.run(report, fd) result = sess.run(out, fd) self.assertAllClose(result, [[1., 2.], [6., 8.]]) rep = sess.run(report, fd) s = tu.extract_all_strings_from_event_trace(rep) cs_list = tu.get_compute_sets_from_report(s) ok = ['__seed*', 'my_ops/my_add_op/add'] self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
def testReportEveryNthExecution_Every1(self): with self.session() as sess: with ops.device("/device:IPU:0"): pa = array_ops.placeholder(np.float32, [2, 2], name="a") pb = array_ops.placeholder(np.float32, [2, 2], name="b") out = math_ops.add(pa, pb) with ops.device('cpu'): report = gen_ipu_ops.ipu_event_trace() opts = utils.create_ipu_config(profiling=True, profile_execution=True, report_every_nth_execution=1, use_poplar_text_report=False) utils.configure_ipu_system(opts) fd = {pa: [[1., 1.], [2., 3.]], pb: [[0., 1.], [4., 5.]]} sess.run(report, fd) sess.run(out, fd) sess.run(out, fd) sess.run(out, fd) sess.run(out, fd) sess.run(out, fd) rep = sess.run(report, fd) r = tu.ReportJSON(self) types = r.parse_events(rep) self.assertEqual(types[IpuTraceEvent.EXECUTE], 5) self.assertEqual(len(r.get_execution_reports()), 5, "Every execution should have generated a report")
def testBatchNormalizeLayerFusedFp16(self): with ops.device("/device:IPU:0"): with variable_scope.variable_scope("", use_resource=True): x = array_ops.placeholder(np.float16, [4, 64, 64, 4], name="a") normed = layers_norm.batch_normalization(x, fused=True) with ops.device('cpu'): report = gen_ipu_ops.ipu_event_trace() tu.configure_ipu_system() with tu.ipu_session() as sess: sess.run(report) sess.run(variables.global_variables_initializer()) result = sess.run(normed, {x: np.zeros([4, 64, 64, 4])}) self.assertAllClose(result, np.zeros([4, 64, 64, 4])) rep = sess.run(report) s = tu.extract_all_strings_from_event_trace(rep) cs = tu.get_compute_sets_from_report(s) bl = ['*convert*/Cast*'] self.assertTrue(tu.check_compute_sets_not_in_blacklist(cs, bl))
def testMaxPool(self): with ops.device("/device:IPU:0"): pa = array_ops.placeholder(np.float32, [1, 1, 10, 10], name="a") c = nn.max_pool(pa, ksize=[1, 1, 5, 5], strides=[1, 1, 2, 2], data_format='NCHW', padding='SAME', name="max") with ops.device('cpu'): report = gen_ipu_ops.ipu_event_trace() tu.configure_ipu_system() with tu.ipu_session() as sess: sess.run(report) fd = { pa: np.ones([1, 1, 10, 10]), } result = sess.run(c, fd) self.assertAllClose(result, np.ones([1, 1, 5, 5])) result = sess.run(report) self.assertTrue(len(result) == 3) s = tu.extract_all_strings_from_event_trace(result) cs_list = tu.get_compute_sets_from_report(s) ok = ['__seed*', 'max/custom-call*/maxPool5x5'] self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
def testArgMax(self): batchsize = 4 n_categories = 1200 def model(a): return math_ops.argmax(a, axis=1, output_type=dtypes.int32) with ops.device('cpu'): pa = array_ops.placeholder(np.float32, [batchsize, n_categories]) report = gen_ipu_ops.ipu_event_trace() with ops.device("/device:IPU:0"): out = model(pa) tu.configure_ipu_system() with tu.ipu_session() as sess: sess.run(report) input = np.random.rand(batchsize, n_categories) fd = {pa: input} result = sess.run(out, fd) self.assertAllClose(result, np.argmax(input, axis=1)) result = sess.run(report) self.assertTrue(len(result) == 3)
def testCheckMaxTileSize(self): dtype = np.float32 shape = (1024, 2048) with ops.device("/device:IPU:0"): with variable_scope.variable_scope("", use_resource=True): a = variable_scope.get_variable( "a", shape=shape, initializer=init_ops.constant_initializer(2), dtype=dtype) pb = array_ops.placeholder(shape=shape, dtype=dtype, name="b") c = constant_op.constant(4, shape=shape, dtype=dtype, name="c") output = a + pb + c with ops.device('cpu'): report = gen_ipu_ops.ipu_event_trace() tu.configure_ipu_system(execution_trace=False) with tu.ipu_session() as sess: sess.run(variables.global_variables_initializer()) result = sess.run(report) s = tu.extract_all_strings_from_event_trace(result) max_tile_size = tu.get_maximum_tile_size_from_events(s) self.assertTrue(max_tile_size < 17000) out = sess.run(output, {pb: np.ones(shape=shape, dtype=dtype)}) self.assertAllClose(np.full(shape, 7, dtype=dtype), out) result = sess.run(report) s = tu.extract_all_strings_from_event_trace(result) max_tile_size = tu.get_maximum_tile_size_from_events(s) self.assertTrue(max_tile_size < 40000)
def testTopK(self): n_categories = 1200 topn = 24 def model(a): values, indices = nn.top_k(a, topn) return indices with ops.device('cpu'): pa = array_ops.placeholder(np.float32, [n_categories], name="a") report = gen_ipu_ops.ipu_event_trace() with ops.device("/device:IPU:0"): out = model(pa) tu.configure_ipu_system() with tu.ipu_session() as sess: sess.run(report) input = np.random.random(n_categories) expected = (-input).argsort()[:topn] fd = {pa: input} result = sess.run(out, fd) self.assertAllClose(result, expected) result = sess.run(report) self.assertTrue(len(result) == 3)
def testTraining(self): x = array_ops.placeholder(datatype, shape=[1, 224, 224, 4]) y_ = array_ops.placeholder(datatype, shape=[1, 1000]) with ipu_ops.ipu_scope("/device:IPU:0"): logits = inference(x) loss = math_ops.reduce_mean( nn_ops.softmax_cross_entropy_with_logits_v2( logits=logits, labels=array_ops.stop_gradient(y_))) train = gradient_descent.GradientDescentOptimizer(0.01).minimize(loss) with ops.device('cpu'): report = gen_ipu_ops.ipu_event_trace() opts = utils.create_ipu_config(profiling=True) utils.configure_ipu_system(opts) sess = sl.Session() sess.run(variables.global_variables_initializer()) sess.run(report) data = np.zeros([1, 224, 224, 4]) labels = np.zeros([1, 1000]) sess.run(train, feed_dict={x: data, y_: labels}) out = sess.run(report) sess.close() evts = utils.extract_all_events(out) size = utils.get_memory_size_from_events(evts) self.assertTrue(size < 174000000)
def testMultipleConfigureIpuShouldFail(self): def my_graph(pa, pb, pc): with ops.device("/device:IPU:0"): o1 = pa + pb o2 = pa + pc out = o1 + o2 return [out] with ops.device('cpu'): pa = array_ops.placeholder(np.float32, [2], name="a") pb = array_ops.placeholder(np.float32, [2], name="b") pc = array_ops.placeholder(np.float32, [2], name="c") report = gen_ipu_ops.ipu_event_trace() out = ipu_compiler.compile(my_graph, [pa, pb, pc]) cfg = ipu.utils.create_ipu_config(profiling=True) cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False) cfg = ipu.utils.auto_select_ipus(cfg, 2) ipu.utils.configure_ipu_system(cfg) with self.assertRaises(Exception): cfg = ipu.utils.create_ipu_config(profiling=True) cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=True) ipu.utils.configure_ipu_system(cfg)
def testUniformRandomNonScalarInitalizer(self): with ops.device('cpu'): report = gen_ipu_ops.ipu_event_trace() with ops.device("/device:IPU:0"): with variable_scope.variable_scope("vs", use_resource=True): i = init_ops.random_uniform_initializer(minval=-2.0, maxval=2.0) z = variable_scope.get_variable("z1", shape=[2], dtype=np.float32, initializer=i) tu.configure_ipu_system() with tu.ipu_session() as sess: # Clean existing reports sess.run(report) sess.run(variables.global_variables_initializer()) r = sess.run(report) o = sess.run(z) self.assertAllClose(o, [0.0, 0.0], 2.0, 2.0) s = tu.extract_all_strings_from_event_trace(r) cs_list = tu.get_compute_sets_from_report(s) ok = [ '__seed*', 'vs/z1/Initializer/random_uniform/RandomUniform/fusion/uniform' ] self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
def testScaledSubtractFrom(self): with ops.device("/device:IPU:0"): pa = array_ops.placeholder(np.float16, [3]) pb = array_ops.placeholder(np.float16, [3]) const = array_ops.constant(2.0, np.float16) # note how const operand index varies compared to testScaledAddTo # still should match as it will be reordered c = pa - const * pb with ops.device('cpu'): report = gen_ipu_ops.ipu_event_trace() tu.configure_ipu_system() with tu.ipu_session() as sess: sess.run(report) fd = {pa: [2.0, 0.5, 1.0], pb: [1.0, 2.0, 3.0]} result = sess.run(c, fd) self.assertAllClose(result, [0.0, -3.5, -5.0]) result = sess.run(report) self.assertTrue(len(result) == 3) s = tu.extract_all_strings_from_event_trace(result) cs_list = tu.get_compute_sets_from_report(s) ok = ['__seed*', 'host-exchange-local-copy-', 'sub/fusion/AddTo'] self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
def testTuplesOfTuplesAreStreamed(self): with ops.device("/device:IPU:0"): with variable_scope.variable_scope("vs", use_resource=True): pa = array_ops.placeholder(np.int64, [2, 2], name="a") pb = array_ops.placeholder(np.int64, [2, 2], name="b") pc = array_ops.placeholder(np.int64, [2, 2], name="c") c = control_flow_ops.tuple((pa + pc, pb + pc)) with ops.device('cpu'): report = gen_ipu_ops.ipu_event_trace() tu.configure_ipu_system(True, True, True) with tu.ipu_session() as sess: sess.run(report) in0 = np.full((2, 2), 7) in1 = np.full((2, 2), 6) in2 = np.full((2, 2), 5) fd = { pa: in0, pb: in1, pc: in2, } out = sess.run(c, fd) self.assertEqual(len(out), 2) self.assertAllClose(out, (np.full((2, 2), 12), np.full( (2, 2), 11))) rep = sess.run(report) io_evts = tu.extract_all_io_events(rep) # No io_events implies the data was streamed self.assertEqual(len(list(io_evts)), 0)
def testScaledSubtractFromVariable(self): with ops.device("/device:IPU:0"): pa = array_ops.placeholder(np.float16, [3]) pb = array_ops.placeholder(np.float16, [3]) pc = array_ops.placeholder(np.float16, [1]) c = pa - pc * pb with ops.device('cpu'): report = gen_ipu_ops.ipu_event_trace() tu.configure_ipu_system() with tu.ipu_session() as sess: sess.run(report) fd = {pa: [2.0, 0.5, 1.0], pb: [1.0, 2.0, 3.0], pc: [2.0]} result = sess.run(c, fd) self.assertAllClose(result, [0.0, -3.5, -5.0]) result = sess.run(report) self.assertTrue(len(result) == 3) s = tu.extract_all_strings_from_event_trace(result) cs_list = tu.get_compute_sets_from_report(s) ok = ['__seed*', 'host-exchange-local-copy-', 'sub/fusion/AddTo'] self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
def testCborReport(self): with ops.device("/device:IPU:0"): pa = array_ops.placeholder(np.float32, [2, 2], name="a") pb = array_ops.placeholder(np.float32, [2, 2], name="b") out = math_ops.add(pa, pb) with ops.device('cpu'): report = gen_ipu_ops.ipu_event_trace() tu.configure_ipu_system(text_report=False, cbor_report=True) with tu.ipu_session() as sess: fd = {pa: [[1., 1.], [2., 3.]], pb: [[0., 1.], [4., 5.]]} sess.run(report, fd) sess.run(out, fd) rep = sess.run(report, fd) evts = tu.extract_all_events(rep) self.assertEqual(len(evts), 3) # begin, end, execute self.assertEqual(evts[1].compile_end.compilation_report[0], bytes(bytearray([217]))[0]) self.assertEqual(evts[2].execute.execution_report[0], bytes(bytearray([217]))[0])
def testSigmoidNotInplace(self): with ops.device("/device:IPU:0"): pa = array_ops.placeholder(np.float32, [3], name="a") c = math_ops.sigmoid(pa) + pa with ops.device('cpu'): report = gen_ipu_ops.ipu_event_trace() tu.configure_ipu_system() with tu.ipu_session() as sess: sess.run(report) fd = {pa: [-6.0, 0.0, 6.0]} result = sess.run(c, fd) self.assertAllClose(result, [-5.997527, 0.5, 6.997527]) result = sess.run(report) self.assertTrue(len(result) == 3) s = tu.extract_all_strings_from_event_trace(result) cs_list = tu.get_compute_sets_from_report(s) ok = [ '__seed*', 'Sigmoid/custom-call/Nonlinearity', 'Copy_XLA_Args/arg0.*_to_Sigmoid/custom-call.clone/OnTileCopy-0', 'add/add.*/AddTo' ] self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
def testCborReport(self): with self.session() as sess: with ops.device("/device:IPU:0"): pa = array_ops.placeholder(np.float32, [2, 2], name="a") pb = array_ops.placeholder(np.float32, [2, 2], name="b") out = math_ops.add(pa, pb) with ops.device('cpu'): report = gen_ipu_ops.ipu_event_trace() opts = utils.create_ipu_config(profiling=True, profile_execution=True, use_poplar_text_report=False, use_poplar_cbor_report=True) utils.configure_ipu_system(opts) fd = {pa: [[1., 1.], [2., 3.]], pb: [[0., 1.], [4., 5.]]} sess.run(report, fd) sess.run(out, fd) rep = sess.run(report, fd) evts = utils.extract_all_events(rep) self.assertEqual(len(evts), 4) # engine, begin, end, execute self.assertEqual(evts[1].compile_end.compilation_report[0], bytes(bytearray([217]))[0]) self.assertEqual(evts[3].execute.execution_report[0], bytes(bytearray([217]))[0])
def testSigmoidGrad(self): with ops.device("/device:IPU:0"): pa = array_ops.placeholder(np.float32, [3], name="grad") pb = array_ops.placeholder(np.float32, [3], name="in") c = gen_math_ops.sigmoid_grad(pa, pb) with ops.device('cpu'): report = gen_ipu_ops.ipu_event_trace() tu.configure_ipu_system() with tu.ipu_session() as sess: sess.run(report) fd = {pa: [2.0, 0.5, 1.0], pb: [-1.0, 1.0, 6.0]} result = sess.run(c, fd) self.assertAllClose(result, [2.0, 0.25, 0.0]) result = sess.run(report) self.assertTrue(len(result) == 3) s = tu.extract_all_strings_from_event_trace(result) cs_list = tu.get_compute_sets_from_report(s) ok = ['__seed*', 'SigmoidGrad/custom-call/NonLinearityGrad'] self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
def testIpuModelDeviceWithMultipleReport(self): with self.session() as sess: with ops.device("/device:IPU:0"): pa = array_ops.placeholder(np.float32, [2, 2], name="a") pb = array_ops.placeholder(np.float32, [2, 2], name="b") out1 = pa + pb out2 = pa - pb with ops.device('cpu'): with ops.control_dependencies([out1, out2]): report = gen_ipu_ops.ipu_event_trace() opts = utils.create_ipu_config(profiling=True, profile_execution=True) utils.configure_ipu_system(opts) fd = {pa: [[1., 1.], [2., 3.]], pb: [[0., 1.], [4., 5.]]} sess.run(report, fd) result = sess.run(out1, fd) self.assertAllClose(result, [[1., 2.], [6., 8.]]) result, rep = sess.run([out2, report], fd) self.assertAllClose(result, [[1., 0.], [-2., -2.]]) # 2x engine, 2x compile_begin, 2x compile_end, 2x load engine self.assertEqual(len(rep), 8)
def testDontOutlineInplaceExpression(self): with ops.device("/device:IPU:0"): pa = array_ops.placeholder(np.float32, []) pb = array_ops.placeholder(np.float32, []) pc = array_ops.placeholder(np.float32, []) pd = array_ops.placeholder(np.float32, []) e = pa + pb - pc + pd with ops.device('cpu'): report = gen_ipu_ops.ipu_event_trace() tu.configure_ipu_system() with tu.ipu_session() as sess: sess.run(report) fd = {pa: 1, pb: 2, pc: 3, pd: 4} result = sess.run(e, fd) self.assertAllClose(result, 4) result = sess.run(report) self.assertTrue(len(result) == 3) s = tu.extract_all_strings_from_event_trace(result) cs_list = tu.get_compute_sets_from_report(s) ok = [ '__seed*', 'add/add.*/AddTo', 'sub/subtract.*/AddTo', 'add_1/add.*/AddTo' ] self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
def testDropoutImpl(rate): def ipu_dropout(w): output = poprand.dropout(w, rate=rate) return [output] with ops.device('cpu'): input_data = array_ops.placeholder(np.float32, [1024, 1024, 4]) report = gen_ipu_ops.ipu_event_trace() with ipu.ops.ipu_scope("/device:IPU:0"): r = ipu_compiler.compile(ipu_dropout, inputs=[input_data]) cfg = ipu.utils.create_ipu_config() cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False) ipu.utils.configure_ipu_system(cfg) with sl.Session() as sess: in_data = np.random.rand(1024, 1024, 4) result = sess.run(r, {input_data: in_data}) percent_kept = np.count_nonzero(result) / np.count_nonzero(in_data) # There's a considerable amount for randomness so we have a reasonably large # dimensionality of test data to make sure the error is smaller. is_roughly_close = abs(percent_kept - (1.0 - rate)) # The observed error is actually a lot less than this (>1%) but we don't want to cause # random regressions and 3% is probably still acceptable for any outlier randoms. self.assertTrue(is_roughly_close < 0.03)
def testRandomNormalInitalizer(self): with ops.device('cpu'): report = gen_ipu_ops.ipu_event_trace() with ops.device("/device:IPU:0"): with variable_scope.variable_scope("vs", use_resource=True): i = init_ops.random_normal_initializer(mean=2.0, stddev=0.01) z = variable_scope.get_variable("z1", shape=[], dtype=np.float32, initializer=i) tu.configure_ipu_system() with tu.ipu_session() as sess: # Clean existing reports sess.run(report) sess.run(variables.global_variables_initializer()) r = sess.run(report) o = sess.run(z) self.assertAllClose(o, 2.0, 0.2, 0.2) s = tu.extract_all_strings_from_event_trace(r) cs_list = tu.get_compute_sets_from_report(s) ok = [ '__seed*', 'vs/z1/Initializer/random_normal/RandomStandardNormal/fusion/normal' ] self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
def testDepthwiseConvBackpropInput1x1(self): with ops.device("/device:IPU:0"): pa = constant_op.constant([1, 8, 8, 3], dtype=np.int32) # input sizes pb = array_ops.placeholder(np.float32, [1, 1, 3, 2], name="b") pc = array_ops.placeholder(np.float32, [1, 8, 8, 6], name="c") c = nn.depthwise_conv2d_native_backprop_input( pa, pb, pc, strides=[1, 1, 1, 1], padding="SAME") with ops.device('cpu'): report = gen_ipu_ops.ipu_event_trace() tu.configure_ipu_system() with tu.ipu_session() as sess: sess.run(report) fd = {pb: np.zeros([1, 1, 3, 2]), pc: np.zeros([1, 8, 8, 6])} result = sess.run(c, fd) self.assertAllClose(result, np.zeros([1, 8, 8, 3])) result = sess.run(report) s = tu.extract_all_strings_from_event_trace(result) cs_list = tu.get_compute_sets_from_report(s) ok = [ '__seed*', 'DepthwiseConv2dNativeBackpropInput/fusion*/WeightTranspose', 'DepthwiseConv2dNativeBackpropInput/fusion*/Conv_1x1', 'Copy_' ] self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
def testDefaultTruncatedNormalInitalizer(self): with ops.device('cpu'): report = gen_ipu_ops.ipu_event_trace() with ops.device("/device:IPU:0"): with variable_scope.variable_scope("", use_resource=True): i = init_ops.truncated_normal_initializer() z = variable_scope.get_variable("z1", shape=[2, 4], dtype=np.float32, initializer=i) tu.configure_ipu_system() with tu.ipu_session() as sess: sess.run(variables.global_variables_initializer()) o = sess.run(z) self.assertAllClose(o, np.ones((2, 4)), 2.0, 2.0) # Find of the names of compute sets r = sess.run(report) s = tu.extract_all_strings_from_event_trace(r) cs_list = tu.get_compute_sets_from_report(s) ok = [ '__seed*', 'z1/Initializer/truncated_normal/TruncatedNormal/custom-call*/truncatedNormal' ] self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
def ipu_compile_summary(name, op_list, collections=None): """Create an IPU compiler summary operation. Args: name: A name for the summary. op_list: An operation or list of operations to make this summary dependent upon. collections: Optional collections to add the summary into. Returns: The new summary operation """ if not isinstance(op_list, list): op_list = [op_list] with ops.device("cpu"): with ops.control_dependencies(op_list): reports = gen_ipu_ops.ipu_event_trace() summary_metadata = summary_pb2.SummaryMetadata( plugin_data=summary_pb2.SummaryMetadata.PluginData( plugin_name="ipu")) t_summary = tensor_summary(name='ipu_trace', tensor=reports, summary_metadata=summary_metadata, collections=collections, display_name=name) return t_summary
def testMultiScopeTest(self): with ops.device('cpu'): x = array_ops.placeholder(np.float32, [2, 2]) y = array_ops.placeholder(np.float32, [2, 2]) report = gen_ipu_ops.ipu_event_trace() with ipu.scopes.ipu_scope('/device:IPU:0'): z = math_ops.matmul(x, y) with ipu.scopes.ipu_scope('/device:IPU:0'): z2 = math_ops.matmul(x, z) cfg = ipu.utils.create_ipu_config(profiling=True) cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False) ipu.utils.configure_ipu_system(cfg) with sl.Session() as sess: sess.run(report) result = sess.run(z2, {x: np.ones([2, 2]), y: np.ones([2, 2])}) self.assertAllEqual(result, [[4, 4], [4, 4]]) rep = sess.run(report) evts = ipu.utils.extract_all_types_from_event_trace(rep) num_compiles = 0 num_executions = 0 for e in evts: if e == IpuTraceEvent.COMPILE_END: num_compiles += 1 if e == IpuTraceEvent.EXECUTE: num_executions += 1 self.assertEqual(num_compiles, 1) self.assertEqual(num_executions, 1)