def testTwoOutfeedsDifferentPrograms(self): outfeed_queue1 = ipu_outfeed_queue.IPUOutfeedQueue( feed_name=next_feed_id()) outfeed_queue2 = ipu_outfeed_queue.IPUOutfeedQueue( feed_name=next_feed_id()) def body1(v): outfeed = outfeed_queue1.enqueue(v) v = v + 1 return (v, outfeed) def my_net1(v): r = loops.repeat(5, body1, (v)) return r def body2(v): outfeed = outfeed_queue2.enqueue(v) v = v + 1 return (v, outfeed) def my_net2(v): r = loops.repeat(7, body2, (v)) return r with ops.device('cpu'): v1 = array_ops.placeholder(np.float32, [4, 4]) v2 = array_ops.placeholder(np.float32, [5, 5]) with ipu.ops.ipu_scope("/device:IPU:0"): res1 = ipu_compiler.compile(my_net1, inputs=[v1]) res2 = ipu_compiler.compile(my_net2, inputs=[v2]) cfg = ipu.utils.create_ipu_config() cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False) ipu.utils.configure_ipu_system(cfg) outfeed1 = outfeed_queue1.dequeue() outfeed2 = outfeed_queue2.dequeue() with session_lib.Session() as sess: result1 = sess.run(res1, {v1: np.ones([4, 4], np.float32)}) self.assertAllClose(result1[0], np.broadcast_to(6, [4, 4])) outfed1 = sess.run(outfeed1) for i in range(5): self.assertAllClose(outfed1[i], np.broadcast_to(i + 1, [4, 4])) result2 = sess.run(res2, {v2: np.full([5, 5], 4, np.float32)}) self.assertAllClose(result2[0], np.broadcast_to(11, [5, 5])) outfed2 = sess.run(outfeed2) for i in range(7): self.assertAllClose(outfed2[i], np.broadcast_to(i + 4, [5, 5]))
def testSingleInfeedRepeatTupleMerge(self): dataset = tu.create_single_increasing_dataset(3, shape=[4, 4]) def dataset_parser(value): image_1 = value image_2 = (value + 10.) / 2.0 return (image_1, image_2) dataset = dataset.map(dataset_parser) infeed_queue = ipu_infeed_queue.IPUInfeedQueue(dataset, next_feed_id()) def body(v, im1, im2): v = v + im1 + im2 return (v) def my_net(): v = constant_op.constant(0.0, shape=[4, 4], dtype=np.float32) r = loops.repeat(5, body, [v], infeed_queue) return r with ipu.ops.ipu_scope("/device:IPU:0"): res = ipu_compiler.compile(my_net, inputs=[]) cfg = ipu.utils.create_ipu_config(merge_infeed_io_copies=True) cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False) ipu.utils.configure_ipu_system(cfg) with session_lib.Session() as sess: sess.run(infeed_queue.initializer) result = sess.run(res) self.assertAllClose(result[0], np.broadcast_to(31, [4, 4]))
def testSingleInfeedMultipleRepeats(self): dataset = tu.create_single_increasing_dataset(2, shape=[4, 4]) infeed_queue = ipu_infeed_queue.IPUInfeedQueue(dataset, next_feed_id()) def body(v, x): v = v + x return (v) def my_net(): v = constant_op.constant(0.0, shape=[4, 4], dtype=np.float32) r = loops.repeat(5, body, [v], infeed_queue) r = loops.repeat(5, body, [r], infeed_queue) return r with ipu.ops.ipu_scope("/device:IPU:0"): res = ipu_compiler.compile(my_net, inputs=[]) cfg = ipu.utils.create_ipu_config() cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False) ipu.utils.configure_ipu_system(cfg) with session_lib.Session() as sess: sess.run(infeed_queue.initializer) result = sess.run(res) self.assertAllClose(result[0], np.broadcast_to(5, [4, 4]))
def testCreateSimpleReplicatedGraphVariable(self): def my_graph(): with ops.device("/device:IPU:0"): with variable_scope.variable_scope("", use_resource=True): x = variable_scope.get_variable( "x", dtype=np.float32, shape=[4], initializer=init_ops.constant_initializer(10.0)) x = x + x return [popops_cross_replica_sum.cross_replica_sum(x)] out = ipu_compiler.compile(my_graph, []) cfg = ipu.utils.create_ipu_config( profiling=False, max_cross_replica_sum_buffer_size=10000) cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False) cfg = ipu.utils.auto_select_ipus(cfg, 2) ipu.utils.configure_ipu_system(cfg) with sl.Session() as sess: sess.run(variables.global_variables_initializer()) result = sess.run(out, {}) # Test that the output is just the input self.assertAllClose(result[0], 4 * np.full([4], 10.0))
def testCreateSimpleReplicatedGraph(self): def my_graph(inp): with ops.device("/device:IPU:0"): x = inp + inp return [popops_cross_replica_sum.cross_replica_sum(x)] with ops.device('cpu'): inp = array_ops.placeholder(np.float32, [4], name="data") out = ipu_compiler.compile(my_graph, [inp]) cfg = ipu.utils.create_ipu_config( profiling=False, max_cross_replica_sum_buffer_size=10000) cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False) cfg = ipu.utils.auto_select_ipus(cfg, 2) ipu.utils.configure_ipu_system(cfg) with sl.Session() as sess: sess.run(variables.global_variables_initializer()) data = np.ones([4]) fd = {inp: data} result = sess.run(out, fd) # Test that the output is just the input self.assertAllClose(result[0], 4 * data)
def testCreateSimpleReplicatedOutfeedWrongReplicationFactor(self): shape = [2] dataset = tu.create_single_increasing_dataset(3, shape) outfeed_queue = ipu_outfeed_queue.IPUOutfeedQueue( feed_name=next_feed_id(), replication_factor=4) def body(v): v = popops_cross_replica_sum.cross_replica_sum(v) outfeed = outfeed_queue.enqueue(v) return (v, outfeed) def my_net(): v = constant_op.constant(0.0, shape=shape, dtype=np.float32) r = loops.repeat(5, body, [v]) return r with ipu.ops.ipu_scope("/device:IPU:0"): res = ipu_compiler.compile(my_net, inputs=[]) cfg = ipu.utils.create_ipu_config( profiling=False, max_cross_replica_sum_buffer_size=10000) cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False) cfg = ipu.utils.auto_select_ipus(cfg, 2) ipu.utils.configure_ipu_system(cfg) with sl.Session() as sess: with self.assertRaisesRegexp( errors.FailedPreconditionError, 'Current program has been created with replication_factor 2' ): result = sess.run(res)
def testErrorWhenNoAllReduce(self): shape = [2] dataset = tu.create_single_increasing_dataset(3, shape) infeed_queue = ipu_infeed_queue.IPUInfeedQueue( dataset, feed_name=next_feed_id(), replication_factor=2) outfeed_queue = ipu_outfeed_queue.IPUOutfeedQueue( feed_name=next_feed_id(), replication_factor=2) def body(v, x): outfeed = outfeed_queue.enqueue(v) return (v + x, outfeed) def my_net(): v = constant_op.constant(0.0, shape=shape, dtype=np.float32) r = loops.repeat(5, body, [v], infeed_queue) return r with ipu.ops.ipu_scope("/device:IPU:0"): res = ipu_compiler.compile(my_net, inputs=[]) outfed = outfeed_queue.dequeue() cfg = ipu.utils.create_ipu_config( profiling=False, max_cross_replica_sum_buffer_size=10000) cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False) cfg = ipu.utils.auto_select_ipus(cfg, 2) ipu.utils.configure_ipu_system(cfg) with sl.Session() as sess: sess.run(infeed_queue.initializer) with self.assertRaisesRegexp( errors.FailedPreconditionError, 'This is not a valid replicated graph because'): result = sess.run(res)
def testSingleOutfeedRepeatNonTuple(self): outfeed_queue = ipu_outfeed_queue.IPUOutfeedQueue(next_feed_id()) def body(v): outfeed = outfeed_queue.enqueue(v) v = v + 1 return (v, outfeed) def my_net(v): r = loops.repeat(20, body, (v)) return r with ops.device('cpu'): v = array_ops.placeholder(np.float32, [4, 4]) with ipu.ops.ipu_scope("/device:IPU:0"): res = ipu_compiler.compile(my_net, inputs=[v]) cfg = ipu.utils.create_ipu_config() cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False) ipu.utils.configure_ipu_system(cfg) outfeed = outfeed_queue.dequeue() with session_lib.Session() as sess: result = sess.run(res, {v: np.ones([4, 4], np.float32)}) self.assertAllClose(result[0], np.broadcast_to(21, [4, 4])) outfed = sess.run(outfeed) for i in range(20): self.assertAllClose(outfed[i], np.broadcast_to(i + 1, [4, 4]))
def testMultipleOutfeedsRepeatNonTuple(self): outfeed_queue1 = ipu_outfeed_queue.IPUOutfeedQueue(next_feed_id()) outfeed_queue2 = ipu_outfeed_queue.IPUOutfeedQueue(next_feed_id()) def body(v): outfeed1 = outfeed_queue1.enqueue(v) outfeed2 = outfeed_queue2.enqueue(v * 2) v = v + 1 return (v, outfeed1, outfeed2) def my_net(v): r = loops.repeat(20, body, (v)) return r with ops.device('cpu'): v = array_ops.placeholder(np.float32, [4, 4]) with ipu.ops.ipu_scope("/device:IPU:0"): res = ipu_compiler.compile(my_net, inputs=[v]) cfg = ipu.utils.create_ipu_config() cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False) ipu.utils.configure_ipu_system(cfg) outfeed1 = outfeed_queue1.dequeue() outfeed2 = outfeed_queue2.dequeue() with session_lib.Session() as sess: with self.assertRaisesRegexp( errors.InvalidArgumentError, 'Only one IPUOutfeedQueue supported per graph'): result = sess.run(res, {v: np.ones([4, 4], np.float32)})
def testMultipleConfigureIpuShouldFail(self): def my_graph(pa, pb, pc): with ops.device("/device:IPU:0"): o1 = pa + pb o2 = pa + pc out = o1 + o2 return [out] with ops.device('cpu'): pa = array_ops.placeholder(np.float32, [2], name="a") pb = array_ops.placeholder(np.float32, [2], name="b") pc = array_ops.placeholder(np.float32, [2], name="c") report = gen_ipu_ops.ipu_event_trace() out = ipu_compiler.compile(my_graph, [pa, pb, pc]) cfg = ipu.utils.create_ipu_config(profiling=True) cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False) cfg = ipu.utils.auto_select_ipus(cfg, 2) ipu.utils.configure_ipu_system(cfg) with self.assertRaises(Exception): cfg = ipu.utils.create_ipu_config(profiling=True) cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=True) ipu.utils.configure_ipu_system(cfg)
def testSingleInfeedRepeatNonTupleFiniteDataset(self): dataset = tu.create_single_increasing_dataset(10, shape=[4, 4], repeat=False) infeed_queue = ipu_infeed_queue.IPUInfeedQueue(dataset, next_feed_id()) def body(v, x): v = v + x return (v) def my_net(v): r = loops.repeat(10, body, (v), infeed_queue) return r with ops.device('cpu'): v = array_ops.placeholder(np.float32, [4, 4]) with ipu.ops.ipu_scope("/device:IPU:0"): res = ipu_compiler.compile(my_net, inputs=[v]) cfg = ipu.utils.create_ipu_config() cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False) ipu.utils.configure_ipu_system(cfg) with session_lib.Session() as sess: sess.run(infeed_queue.initializer) result = sess.run(res, {v: np.ones([4, 4], np.float32)}) self.assertAllClose(result[0], np.broadcast_to(46, [4, 4]))
def testNotEnoughIpus(self): def my_graph(pa, pb, pc): with ipu.ops.ipu_shard(0): o1 = pa + pb with ipu.ops.ipu_shard(1): o2 = pa + pc with ipu.ops.ipu_shard(2): out = o1 + o2 return out with ops.device('cpu'): pa = array_ops.placeholder(np.float32, [2], name="a") pb = array_ops.placeholder(np.float32, [2], name="b") pc = array_ops.placeholder(np.float32, [2], name="c") report = gen_ipu_ops.ipu_event_trace() with ops.device("/device:IPU:0"): out = ipu_compiler.compile(my_graph, [pa, pb, pc]) cfg = ipu.utils.create_ipu_config(profiling=True) cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False) cfg = ipu.utils.auto_select_ipus(cfg, 2) ipu.utils.configure_ipu_system(cfg) with sl.Session() as sess: with self.assertRaisesRegexp(errors.ResourceExhaustedError, 'Trying to compile a graph for'): sess.run(out, {pa: [1., 1.], pb: [0., 1.], pc: [1., 5.]})
def testDropoutImpl(rate): def ipu_dropout(w): output = poprand.dropout(w, rate=rate) return [output] with ops.device('cpu'): input_data = array_ops.placeholder(np.float32, [1024, 1024, 4]) report = gen_ipu_ops.ipu_event_trace() with ipu.ops.ipu_scope("/device:IPU:0"): r = ipu_compiler.compile(ipu_dropout, inputs=[input_data]) cfg = ipu.utils.create_ipu_config() cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False) ipu.utils.configure_ipu_system(cfg) with sl.Session() as sess: in_data = np.random.rand(1024, 1024, 4) result = sess.run(r, {input_data: in_data}) percent_kept = np.count_nonzero(result) / np.count_nonzero(in_data) # There's a considerable amount for randomness so we have a reasonably large # dimensionality of test data to make sure the error is smaller. is_roughly_close = abs(percent_kept - (1.0 - rate)) # The observed error is actually a lot less than this (>1%) but we don't want to cause # random regressions and 3% is probably still acceptable for any outlier randoms. self.assertTrue(is_roughly_close < 0.03)
def testDropoutImpl(): def ipu_dropout_back(w): output = poprand.dropout(w, rate=0.4) largest = output cost = tf.square(largest) opt = tf.train.GradientDescentOptimizer(learning_rate=0.1) gradients = opt.compute_gradients(cost, w) return [output, gradients] with ops.device('cpu'): input_data = array_ops.placeholder(np.float32, [32]) report = gen_ipu_ops.ipu_event_trace() with ipu.ops.ipu_scope("/device:IPU:0"): r = ipu_compiler.compile(ipu_dropout_back, inputs=[input_data]) cfg = ipu.utils.create_ipu_config() cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False) ipu.utils.configure_ipu_system(cfg) with sl.Session() as sess: in_data = np.random.rand(32) out = sess.run(r, {input_data: in_data}) dropout_out = out[0] gradients = out[1][0][0] # Check we have the same number of zeros. self.assertAllEqual( np.count_nonzero(dropout_out), np.count_nonzero(gradients))
def testTwoOutfeedsDifferentProgramsSameFeedName(self): outfeed_queue1 = ipu_outfeed_queue.IPUOutfeedQueue(feed_name="a") outfeed_queue2 = ipu_outfeed_queue.IPUOutfeedQueue(feed_name="a") def body1(v): outfeed = outfeed_queue1.enqueue(v) v = v + 1 return (v, outfeed) def my_net1(v): r = loops.repeat(5, body1, (v)) return r def body2(v): outfeed = outfeed_queue2.enqueue(v) v = v + 1 return (v, outfeed) def my_net2(v): r = loops.repeat(7, body2, (v)) return r with ops.device('cpu'): v1 = array_ops.placeholder(np.float32, [4, 4]) v2 = array_ops.placeholder(np.float32, [5, 5]) with ipu.ops.ipu_scope("/device:IPU:0"): res1 = ipu_compiler.compile(my_net1, inputs=[v1]) res2 = ipu_compiler.compile(my_net2, inputs=[v2]) cfg = ipu.utils.create_ipu_config() cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False) ipu.utils.configure_ipu_system(cfg) outfeed1 = outfeed_queue1.dequeue() outfeed2 = outfeed_queue2.dequeue() with session_lib.Session() as sess: result1 = sess.run(res1, {v1: np.ones([4, 4], np.float32)}) with self.assertRaisesRegexp( errors.FailedPreconditionError, 'Outfeed with id=\'a\' already exists'): result2 = sess.run(res2, {v2: np.full([5, 5], 4, np.float32)})
def testConvAndBiasAddDifferentIPUs(self): def my_graph(inp, bias): with ops.device("/device:IPU:0"): with ipu.ops.ipu_shard(0): x = layers.Conv2D(8, 3, padding='same', name="conv", use_bias=False)(inp) with ipu.ops.ipu_shard(1): x = nn_ops.bias_add(x, bias, name='biasAdd') return x with ops.device('cpu'): inp = array_ops.placeholder(np.float32, [1, 32, 32, 4], name="data") bias = array_ops.placeholder(np.float32, [8], name="bias") report = gen_ipu_ops.ipu_event_trace() out = ipu_compiler.compile(my_graph, [inp, bias]) cfg = ipu.utils.create_ipu_config(profiling=True) cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False) cfg = ipu.utils.auto_select_ipus(cfg, 2) ipu.utils.configure_ipu_system(cfg) with sl.Session() as sess: sess.run(report) sess.run(variables.global_variables_initializer()) sess.run(report) fd = {inp: np.ones([1, 32, 32, 4]), bias: np.ones([8])} sess.run(out, fd) rep = sess.run(report) num_compiles = 0 ge_list = [] evts = ipu.utils.extract_all_events(rep) for evt in evts: if evt.type == IpuTraceEvent.COMPILE_END: num_compiles = num_compiles + 1 ge_list = tu.get_all_global_exchange_from_json_report(evt) self.assertEqual(num_compiles, 1) # There is 1 piece of global exchange (aprt from progId) wl = [ 'switchControlBroadcast*/GlobalPreAll', '*_to_/custom-call/GlobalPreAll', ] self.assertTrue(tu.check_all_compute_sets_and_list(ge_list, wl))
def testCreateSimpleReplicatedInfeedOutfeed(self): shape = [2] dataset = tu.create_single_increasing_dataset(3, shape) infeed_queue = ipu_infeed_queue.IPUInfeedQueue( dataset, feed_name=next_feed_id(), replication_factor=2) outfeed_queue = ipu_outfeed_queue.IPUOutfeedQueue( feed_name=next_feed_id(), replication_factor=2) def body(v, x): v = popops_cross_replica_sum.cross_replica_sum(v + x) outfeed = outfeed_queue.enqueue(v) return (v, outfeed) def my_net(): v = constant_op.constant(0.0, shape=shape, dtype=np.float32) r = loops.repeat(5, body, [v], infeed_queue) return r with ipu.ops.ipu_scope("/device:IPU:0"): res = ipu_compiler.compile(my_net, inputs=[]) outfed = outfeed_queue.dequeue() cfg = ipu.utils.create_ipu_config( profiling=False, max_cross_replica_sum_buffer_size=10000) cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False) cfg = ipu.utils.auto_select_ipus(cfg, 2) ipu.utils.configure_ipu_system(cfg) with sl.Session() as sess: sess.run(infeed_queue.initializer) result = sess.run(res) self.assertAllClose(result[0], np.broadcast_to(48, shape)) outfed_result = sess.run(outfed) self.assertTrue(outfed_result.shape[0], 2) self.assertAllClose(outfed_result[0][0], outfed_result[0][1]) self.assertAllClose(outfed_result[0][0], np.broadcast_to(1, shape)) self.assertAllClose(outfed_result[1][0], outfed_result[1][1]) self.assertAllClose(outfed_result[1][0], np.broadcast_to(4, shape)) self.assertAllClose(outfed_result[2][0], outfed_result[2][1]) self.assertAllClose(outfed_result[2][0], np.broadcast_to(11, shape)) self.assertAllClose(outfed_result[3][0], outfed_result[3][1]) self.assertAllClose(outfed_result[3][0], np.broadcast_to(23, shape)) self.assertAllClose(outfed_result[4][0], outfed_result[4][1]) self.assertAllClose(outfed_result[4][0], np.broadcast_to(48, shape))
def program(iters, infeed_queue): def body(v, x): v = v + x return (v) def my_net(): v = constant_op.constant(0.0, shape=[4, 4], dtype=np.float32) r = loops.repeat(iters, body, (v), infeed_queue) return r with ipu.ops.ipu_scope("/device:IPU:0"): return ipu_compiler.compile(my_net)
def testIpuWhileScope(self): # 1: design is targetted at the device # 2: variables are resource variables # 3: training a while_loop is possible def my_net(a, b): c = variable_scope.get_variable('c', initializer=[1.0]) self.assertTrue("ResourceVariable" in str(type(c))) lstm_cell = rnn_cell.LSTMCell(1, forget_bias=1.0) outputs, states = rnn.dynamic_rnn(lstm_cell, a, dtype=np.float32) logits = outputs[-1] * c self.assertEqual(logits.device, "/device:IPU:0") res = array_ops.reshape(logits, [1, 8, 1]) l = losses.mean_squared_error(res, b) optimizer = gradient_descent.GradientDescentOptimizer(0.1) train = optimizer.minimize(l) return [l, train] with ops.device('cpu'): a = array_ops.placeholder(np.float32, [1, 8, 1], name="a") b = array_ops.placeholder(np.float32, [1, 8, 1], name="b") with ipu.ops.ipu_scope("/device:IPU:0"): l = ipu_compiler.compile(my_net, inputs=[a, b]) cfg = ipu.utils.create_ipu_config() cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False) ipu.utils.configure_ipu_system(cfg) with sl.Session() as sess: # Initialize and then discard events relating to initialization sess.run(variables.global_variables_initializer()) fd = { a: [[[1.], [1.], [1.], [1.], [1.], [1.], [1.], [1.]]], b: [[[1.], [1.], [1.], [1.], [1.], [1.], [1.], [1.]]], } l_initial = sess.run([l], fd) for _ in range(100): _ = sess.run([l], fd) l_final = sess.run([l], fd) self.assertTrue(l_initial > l_final)
def testMultiIpu(self): def my_graph(pa, pb, pc): with ops.device("/device:IPU:0"): with ipu.ops.ipu_shard(0): o1 = pa + pb with ipu.ops.ipu_shard(1): o2 = pa + pc out = o1 + o2 return [out] with ops.device('cpu'): pa = array_ops.placeholder(np.float32, [2], name="a") pb = array_ops.placeholder(np.float32, [2], name="b") pc = array_ops.placeholder(np.float32, [2], name="c") report = gen_ipu_ops.ipu_event_trace() out = ipu_compiler.compile(my_graph, [pa, pb, pc]) cfg = ipu.utils.create_ipu_config(profiling=True) cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False) cfg = ipu.utils.auto_select_ipus(cfg, 2) ipu.utils.configure_ipu_system(cfg) with sl.Session() as sess: sess.run(report) fd = {pa: [1., 1.], pb: [0., 1.], pc: [1., 5.]} result = sess.run(out, fd) self.assertAllClose(result[0], [3., 8.]) rep = sess.run(report) evts = ipu.utils.extract_all_events(rep) for evt in evts: if evt.type == IpuTraceEvent.COMPILE_END: js = json.loads(evt.compile_end.tensor_map.decode('utf-8')) mods = list(js['mappings'].keys()) self.assertEqual(len(mods), 1) tiles = set() for tensor in js['mappings'][mods[0]]: for tile in tensor[7]: tiles.add(tile[0]) self.assertEqual(len(tiles), 3) self.assertEqual(tiles, set((0, 1, 1216)))
def testIpuSimpleScopeAndExecutionReport(self): def my_net(a, b): c = a + b return [c] with ops.device('cpu'): a = array_ops.placeholder(np.float32, [1], name="a") b = array_ops.placeholder(np.float32, [1], name="b") events = gen_ipu_ops.ipu_event_trace() with ipu.ops.ipu_scope("/device:IPU:0"): r = ipu_compiler.compile(my_net, inputs=[a, b]) cfg = ipu.utils.create_ipu_config(profiling=True, profile_execution=True) cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False) ipu.utils.configure_ipu_system(cfg) with sl.Session() as sess: fd = { a: [1], b: [2], } sess.run(events) res = sess.run(r[0], fd) self.assertEqual(res, [3]) e = sess.run(events) evts = ipu.utils.extract_all_events(e) self.assertEqual(count_compile_end_events(evts), 1) compilation_rep = ipu.utils.extract_compile_reports(e) self.assertEqual(len(compilation_rep), 1) self.assertEqual(type(compilation_rep), list) self.assertEqual(type(compilation_rep[0]), tuple) self.assertTrue(compilation_rep[0][0].startswith("cluster")) self.assertTrue(len(compilation_rep[0][1]) > 1000) self.assertTrue(compilation_rep[0][1].startswith('{')) execution_rep = ipu.utils.extract_execute_reports(e) self.assertEqual(len(execution_rep), 1) self.assertEqual(type(execution_rep), list) self.assertEqual(type(execution_rep[0]), tuple) self.assertTrue(execution_rep[0][0].startswith("cluster")) self.assertTrue(len(execution_rep[0][1]) > 1000) self.assertTrue(execution_rep[0][1].startswith('{'))
def testSingleInfeedOutfeedRepeatNamedLast(self): dataset = tu.create_single_increasing_dataset(3, shape=[4, 4]) shape = [4, 4] def dataset_parser(value): image_1 = value image_2 = (value + 10.) / 2.0 return (image_1, image_2) dataset = dataset.map(dataset_parser) infeed_queue = ipu_infeed_queue.IPUInfeedQueue(dataset, next_feed_id()) outfeed_queue = ipu_outfeed_queue.IPUOutfeedQueue( next_feed_id(), outfeed_mode=ipu_outfeed_queue.IPUOutfeedMode.LAST) def body(v, im1, im2): v = v + im1 + im2 outfeed = outfeed_queue.enqueue({ "v": v, "image1": im1, "image2": im2 }) return (v, outfeed) def my_net(): v = constant_op.constant(0.0, shape=shape, dtype=np.float32) r = loops.repeat(5, body, [v], infeed_queue) return r with ipu.ops.ipu_scope("/device:IPU:0"): res = ipu_compiler.compile(my_net, inputs=[]) outfed = outfeed_queue.dequeue() cfg = ipu.utils.create_ipu_config() cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False) ipu.utils.configure_ipu_system(cfg) with session_lib.Session() as sess: sess.run(infeed_queue.initializer) result = sess.run(res) self.assertAllClose(result[0], np.broadcast_to(31, shape)) outfed_result = sess.run(outfed) self.assertTrue(len(outfed_result) == 3) self.assertAllClose(outfed_result["v"], np.broadcast_to(31, shape)) self.assertAllClose(outfed_result["image1"], np.broadcast_to(1, shape)) self.assertAllClose(outfed_result["image2"], np.broadcast_to(5.5, shape))
def testTrainingLoopWithInfeedAndOutfeedGetLast(self): dataset = tu.create_single_increasing_dataset(10, shape=[4, 4, 2]) dataset = dataset.batch(batch_size=2, drop_remainder=True) infeed_queue = ipu_infeed_queue.IPUInfeedQueue(dataset, next_feed_id()) outfeed_queue = ipu_outfeed_queue.IPUOutfeedQueue( next_feed_id(), outfeed_mode=ipu_outfeed_queue.IPUOutfeedMode.LAST) def my_net(iters): def body(loss, x): with variable_scope.variable_scope("vs", use_resource=True): y = layers.Conv2D( 2, 1, use_bias=True, kernel_initializer=init_ops.ones_initializer(), name='conv1')(x) loss = math_ops.reduce_sum(y) optimizer = gradient_descent.GradientDescentOptimizer(0.1) train = optimizer.minimize(loss) outfeed = outfeed_queue.enqueue(loss) with ops.control_dependencies([train]): return (array_ops.identity(loss), outfeed) loss = 0.0 return loops.repeat(iters, body, (loss), infeed_queue) with ops.device('cpu'): iters = array_ops.placeholder(np.int32, shape=[]) with ipu.ops.ipu_scope("/device:IPU:0"): r = ipu_compiler.compile(my_net, inputs=[iters]) outfeeds = outfeed_queue.dequeue() with session_lib.Session() as sess: sess.run(infeed_queue.initializer) sess.run(variables.global_variables_initializer()) initial_loss = sess.run(r, {iters: 1}) final_loss = sess.run(r, {iters: 1000}) outfed = sess.run(outfeeds) self.assertTrue(initial_loss > final_loss) self.assertTrue(outfed == final_loss) # Check that a scalar is returned instead of a numpy array self.assertTrue(type(outfed) == np.float32)
def testGather(self): def my_net(w, i): out = array_ops.gather(w, i) return [out] with ops.device('cpu'): i = array_ops.placeholder(np.int32, [256]) w = array_ops.placeholder(np.float32, [8192]) report = gen_ipu_ops.ipu_event_trace() with ipu.ops.ipu_scope("/device:IPU:0"): r = ipu_compiler.compile(my_net, inputs=[w, i]) cfg = ipu.utils.create_ipu_config(profiling=True) cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False) ipu.utils.configure_ipu_system(cfg) with sl.Session() as sess: result = sess.run(r, { i: np.arange(0, 3 * 256, 3), w: np.arange(8192) }) self.assertAllClose(result[0], np.arange(0, 3 * 256, 3)) rep = sess.run(report) events = ipu.utils.extract_all_events(rep) for e in events: if e.type == IpuTraceEvent.COMPILE_END: j = e.compile_end.tensor_map.decode('utf-8') if len(j) > 0: tm = json.loads( e.compile_end.tensor_map.decode('utf-8')) bad_maps = [] for g in tm['mappings']: for tensor in tm['mappings'][g]: # Total elements > 16 if tensor[6] > 16: # Tiles used == 1 and is_constant == 0 if len(tensor[7]) == 1 and tensor[4] == 0: bad_maps += [tensor[0]] self.assertEqual(len(bad_maps), 0)
def testDropoutImpl(rate, seed, in_data): def ipu_dropout(w): output = poprand.dropout(w, rate=rate, seed=seed) return [output] with ops.device('cpu'): input_data = array_ops.placeholder(np.float32, [32, 4]) report = gen_ipu_ops.ipu_event_trace() with ipu.ops.ipu_scope("/device:IPU:0"): r = ipu_compiler.compile(ipu_dropout, inputs=[input_data]) cfg = ipu.utils.create_ipu_config() cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False) ipu.utils.configure_ipu_system(cfg) with sl.Session() as sess: return sess.run(r, {input_data: in_data})
def testIpuWhilePerfTest(self): def cond(i, v): return math_ops.less(i, 15) def body(i, v): v = v + i i = i + 1 return (i, v) def my_net(v): i = constant_op.constant(0) r = control_flow_ops.while_loop(cond, body, (i, v), maximum_iterations=10) return [r[1]] with ops.device('cpu'): v = array_ops.placeholder(np.int32, [500]) report = gen_ipu_ops.ipu_event_trace() with ipu.ops.ipu_scope("/device:IPU:0"): r = ipu_compiler.compile(my_net, inputs=[v]) cfg = ipu.utils.create_ipu_config(profiling=True, profile_execution=True) cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False) ipu.utils.configure_ipu_system(cfg) with sl.Session() as sess: result = sess.run(r, {v: np.zeros([500], np.int32)}) self.assertAllClose(result[0], np.broadcast_to(45, [500])) rep = sess.run(report) # Check that there is only one real compile reps = ipu.utils.extract_compile_reports(rep) self.assertEqual(len(reps), 1) # Check that there is only one execute reps = ipu.utils.extract_execute_reports(rep) self.assertEqual(len(reps), 1)
def _RunLayer(layer_func, x, y): with ops.device('cpu'): px = array_ops.placeholder(dataType, shape=x.shape) ph = array_ops.placeholder(dataType, shape=[batch_size, num_hidden]) pc = array_ops.placeholder(dataType, shape=[batch_size, num_hidden]) py = array_ops.placeholder(dataType, shape=y.shape) with ipu.ops.ipu_scope("/device:IPU:0"): r = ipu_compiler.compile(layer_func, inputs=[px, ph, pc, py]) opts = utils.create_ipu_config(profiling=True, use_poplar_text_report=True) opts = ipu.utils.set_ipu_model_options(opts, compile_ipu_code=False) ipu.utils.configure_ipu_system(opts) with sl.Session() as sess: sess.run(variables.global_variables_initializer()) fd = {px: x, ph: np.ones(ph.shape), pc: np.ones(pc.shape), py: y} losses = [] for _ in range(0, num_training_steps): loss = sess.run(r, fd) losses.append(loss) return losses
def testCreateCombinedReplicatedSumGraph(self): def my_graph(): with ops.device("/device:IPU:0"): with variable_scope.variable_scope("", use_resource=True): x1 = variable_scope.get_variable( "x1", dtype=np.float32, shape=[100], initializer=init_ops.constant_initializer(10.0)) x2 = variable_scope.get_variable( "x2", dtype=np.int32, shape=[100], initializer=init_ops.constant_initializer(10)) y1 = popops_cross_replica_sum.cross_replica_sum(x1 + x1) z1 = popops_cross_replica_sum.cross_replica_sum(x1 * x1) y2 = popops_cross_replica_sum.cross_replica_sum(x2 + x2) z2 = popops_cross_replica_sum.cross_replica_sum(x2 * x2) return [ popops_cross_replica_sum.cross_replica_sum(z1 + y1), popops_cross_replica_sum.cross_replica_sum(z2 + y2) ] out = ipu_compiler.compile(my_graph, []) cfg = ipu.utils.create_ipu_config( profiling=False, max_cross_replica_sum_buffer_size=10000) cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False) cfg = ipu.utils.auto_select_ipus(cfg, 2) ipu.utils.configure_ipu_system(cfg) with sl.Session() as sess: sess.run(variables.global_variables_initializer()) result = sess.run(out, {}) ref = np.empty([2, 100]) ref.fill(480.0) # Check output equals the expected value self.assertAllClose(result, ref)
def testSingleInfeedWhileLoopTuple(self): dataset = tu.create_single_increasing_dataset(3, shape=[4, 4]) def dataset_parser(value): image_1 = value image_2 = (value + 10.) / 2.0 return (image_1, image_2) dataset = dataset.map(dataset_parser) infeed_queue = ipu_infeed_queue.IPUInfeedQueue(dataset, next_feed_id()) def cond(i, v): return i < 20 def body(i, v, im1, im2): v = v + im1 + im2 return (i + 1, v) def my_net(v): i = 0 r = loops.while_loop(cond, body, (i, v), infeed_queue) return r[1] with ops.device('cpu'): v = array_ops.placeholder(np.float32, [4, 4]) with ipu.ops.ipu_scope("/device:IPU:0"): res = ipu_compiler.compile(my_net, inputs=[v]) cfg = ipu.utils.create_ipu_config() cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False) ipu.utils.configure_ipu_system(cfg) with session_lib.Session() as sess: sess.run(infeed_queue.initializer) result = sess.run(res, {v: np.ones([4, 4], np.float32)}) self.assertAllClose(result[0], np.broadcast_to(129.5, [4, 4]))
def testSingleInfeedRepeatNamed(self): dataset = tu.create_single_increasing_dataset(3, shape=[4, 4]) def dataset_parser(value): image_1 = value image_2 = (value + 10.) / 2.0 return {"a": image_1, "b": image_2} dataset = dataset.map(dataset_parser) infeed_queue = ipu_infeed_queue.IPUInfeedQueue(dataset, next_feed_id()) # Note how the parameters are swapped around. def body(v1, v2, b, a): v1 = v1 + a v2 = v2 + b return (v1, v2) def my_net(): v1 = constant_op.constant(0.0, shape=[4, 4], dtype=np.float32) v2 = constant_op.constant(0.0, shape=[4, 4], dtype=np.float32) r = loops.repeat(5, body, [v1, v2], infeed_queue) return r with ipu.ops.ipu_scope("/device:IPU:0"): res = ipu_compiler.compile(my_net, inputs=[]) cfg = ipu.utils.create_ipu_config() cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False) ipu.utils.configure_ipu_system(cfg) with session_lib.Session() as sess: sess.run(infeed_queue.initializer) result = sess.run(res) self.assertAllClose(result[0], np.broadcast_to(4, [4, 4])) self.assertAllClose(result[1], np.broadcast_to(27, [4, 4]))