Ejemplo n.º 1
0
    def testTwoOutfeedsDifferentPrograms(self):

        outfeed_queue1 = ipu_outfeed_queue.IPUOutfeedQueue(
            feed_name=next_feed_id())
        outfeed_queue2 = ipu_outfeed_queue.IPUOutfeedQueue(
            feed_name=next_feed_id())

        def body1(v):
            outfeed = outfeed_queue1.enqueue(v)
            v = v + 1
            return (v, outfeed)

        def my_net1(v):
            r = loops.repeat(5, body1, (v))
            return r

        def body2(v):
            outfeed = outfeed_queue2.enqueue(v)
            v = v + 1
            return (v, outfeed)

        def my_net2(v):
            r = loops.repeat(7, body2, (v))
            return r

        with ops.device('cpu'):
            v1 = array_ops.placeholder(np.float32, [4, 4])
            v2 = array_ops.placeholder(np.float32, [5, 5])

        with ipu.ops.ipu_scope("/device:IPU:0"):
            res1 = ipu_compiler.compile(my_net1, inputs=[v1])
            res2 = ipu_compiler.compile(my_net2, inputs=[v2])

        cfg = ipu.utils.create_ipu_config()
        cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False)
        ipu.utils.configure_ipu_system(cfg)

        outfeed1 = outfeed_queue1.dequeue()
        outfeed2 = outfeed_queue2.dequeue()
        with session_lib.Session() as sess:
            result1 = sess.run(res1, {v1: np.ones([4, 4], np.float32)})
            self.assertAllClose(result1[0], np.broadcast_to(6, [4, 4]))
            outfed1 = sess.run(outfeed1)
            for i in range(5):
                self.assertAllClose(outfed1[i], np.broadcast_to(i + 1, [4, 4]))

            result2 = sess.run(res2, {v2: np.full([5, 5], 4, np.float32)})
            self.assertAllClose(result2[0], np.broadcast_to(11, [5, 5]))
            outfed2 = sess.run(outfeed2)
            for i in range(7):
                self.assertAllClose(outfed2[i], np.broadcast_to(i + 4, [5, 5]))
Ejemplo n.º 2
0
    def testSingleInfeedRepeatTupleMerge(self):
        dataset = tu.create_single_increasing_dataset(3, shape=[4, 4])

        def dataset_parser(value):
            image_1 = value
            image_2 = (value + 10.) / 2.0
            return (image_1, image_2)

        dataset = dataset.map(dataset_parser)

        infeed_queue = ipu_infeed_queue.IPUInfeedQueue(dataset, next_feed_id())

        def body(v, im1, im2):
            v = v + im1 + im2
            return (v)

        def my_net():
            v = constant_op.constant(0.0, shape=[4, 4], dtype=np.float32)
            r = loops.repeat(5, body, [v], infeed_queue)
            return r

        with ipu.ops.ipu_scope("/device:IPU:0"):
            res = ipu_compiler.compile(my_net, inputs=[])

        cfg = ipu.utils.create_ipu_config(merge_infeed_io_copies=True)
        cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False)
        ipu.utils.configure_ipu_system(cfg)

        with session_lib.Session() as sess:
            sess.run(infeed_queue.initializer)
            result = sess.run(res)
            self.assertAllClose(result[0], np.broadcast_to(31, [4, 4]))
Ejemplo n.º 3
0
    def testSingleInfeedMultipleRepeats(self):
        dataset = tu.create_single_increasing_dataset(2, shape=[4, 4])

        infeed_queue = ipu_infeed_queue.IPUInfeedQueue(dataset, next_feed_id())

        def body(v, x):
            v = v + x
            return (v)

        def my_net():
            v = constant_op.constant(0.0, shape=[4, 4], dtype=np.float32)
            r = loops.repeat(5, body, [v], infeed_queue)
            r = loops.repeat(5, body, [r], infeed_queue)
            return r

        with ipu.ops.ipu_scope("/device:IPU:0"):
            res = ipu_compiler.compile(my_net, inputs=[])

        cfg = ipu.utils.create_ipu_config()
        cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False)
        ipu.utils.configure_ipu_system(cfg)

        with session_lib.Session() as sess:
            sess.run(infeed_queue.initializer)
            result = sess.run(res)
            self.assertAllClose(result[0], np.broadcast_to(5, [4, 4]))
Ejemplo n.º 4
0
    def testCreateSimpleReplicatedGraphVariable(self):
        def my_graph():
            with ops.device("/device:IPU:0"):
                with variable_scope.variable_scope("", use_resource=True):
                    x = variable_scope.get_variable(
                        "x",
                        dtype=np.float32,
                        shape=[4],
                        initializer=init_ops.constant_initializer(10.0))
                x = x + x
                return [popops_cross_replica_sum.cross_replica_sum(x)]

        out = ipu_compiler.compile(my_graph, [])

        cfg = ipu.utils.create_ipu_config(
            profiling=False, max_cross_replica_sum_buffer_size=10000)
        cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False)
        cfg = ipu.utils.auto_select_ipus(cfg, 2)
        ipu.utils.configure_ipu_system(cfg)

        with sl.Session() as sess:
            sess.run(variables.global_variables_initializer())

            result = sess.run(out, {})

            # Test that the output is just the input
            self.assertAllClose(result[0], 4 * np.full([4], 10.0))
Ejemplo n.º 5
0
    def testCreateSimpleReplicatedGraph(self):
        def my_graph(inp):
            with ops.device("/device:IPU:0"):
                x = inp + inp

                return [popops_cross_replica_sum.cross_replica_sum(x)]

        with ops.device('cpu'):
            inp = array_ops.placeholder(np.float32, [4], name="data")

        out = ipu_compiler.compile(my_graph, [inp])

        cfg = ipu.utils.create_ipu_config(
            profiling=False, max_cross_replica_sum_buffer_size=10000)
        cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False)
        cfg = ipu.utils.auto_select_ipus(cfg, 2)
        ipu.utils.configure_ipu_system(cfg)

        with sl.Session() as sess:
            sess.run(variables.global_variables_initializer())

            data = np.ones([4])
            fd = {inp: data}

            result = sess.run(out, fd)

            # Test that the output is just the input
            self.assertAllClose(result[0], 4 * data)
Ejemplo n.º 6
0
    def testCreateSimpleReplicatedOutfeedWrongReplicationFactor(self):
        shape = [2]
        dataset = tu.create_single_increasing_dataset(3, shape)

        outfeed_queue = ipu_outfeed_queue.IPUOutfeedQueue(
            feed_name=next_feed_id(), replication_factor=4)

        def body(v):
            v = popops_cross_replica_sum.cross_replica_sum(v)
            outfeed = outfeed_queue.enqueue(v)
            return (v, outfeed)

        def my_net():
            v = constant_op.constant(0.0, shape=shape, dtype=np.float32)
            r = loops.repeat(5, body, [v])
            return r

        with ipu.ops.ipu_scope("/device:IPU:0"):
            res = ipu_compiler.compile(my_net, inputs=[])

        cfg = ipu.utils.create_ipu_config(
            profiling=False, max_cross_replica_sum_buffer_size=10000)
        cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False)
        cfg = ipu.utils.auto_select_ipus(cfg, 2)
        ipu.utils.configure_ipu_system(cfg)

        with sl.Session() as sess:
            with self.assertRaisesRegexp(
                    errors.FailedPreconditionError,
                    'Current program has been created with replication_factor 2'
            ):
                result = sess.run(res)
Ejemplo n.º 7
0
    def testErrorWhenNoAllReduce(self):
        shape = [2]
        dataset = tu.create_single_increasing_dataset(3, shape)

        infeed_queue = ipu_infeed_queue.IPUInfeedQueue(
            dataset, feed_name=next_feed_id(), replication_factor=2)
        outfeed_queue = ipu_outfeed_queue.IPUOutfeedQueue(
            feed_name=next_feed_id(), replication_factor=2)

        def body(v, x):
            outfeed = outfeed_queue.enqueue(v)
            return (v + x, outfeed)

        def my_net():
            v = constant_op.constant(0.0, shape=shape, dtype=np.float32)
            r = loops.repeat(5, body, [v], infeed_queue)
            return r

        with ipu.ops.ipu_scope("/device:IPU:0"):
            res = ipu_compiler.compile(my_net, inputs=[])

        outfed = outfeed_queue.dequeue()

        cfg = ipu.utils.create_ipu_config(
            profiling=False, max_cross_replica_sum_buffer_size=10000)
        cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False)
        cfg = ipu.utils.auto_select_ipus(cfg, 2)
        ipu.utils.configure_ipu_system(cfg)

        with sl.Session() as sess:
            sess.run(infeed_queue.initializer)
            with self.assertRaisesRegexp(
                    errors.FailedPreconditionError,
                    'This is not a valid replicated graph because'):
                result = sess.run(res)
Ejemplo n.º 8
0
    def testSingleOutfeedRepeatNonTuple(self):

        outfeed_queue = ipu_outfeed_queue.IPUOutfeedQueue(next_feed_id())

        def body(v):
            outfeed = outfeed_queue.enqueue(v)
            v = v + 1
            return (v, outfeed)

        def my_net(v):
            r = loops.repeat(20, body, (v))
            return r

        with ops.device('cpu'):
            v = array_ops.placeholder(np.float32, [4, 4])

        with ipu.ops.ipu_scope("/device:IPU:0"):
            res = ipu_compiler.compile(my_net, inputs=[v])

        cfg = ipu.utils.create_ipu_config()
        cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False)
        ipu.utils.configure_ipu_system(cfg)

        outfeed = outfeed_queue.dequeue()
        with session_lib.Session() as sess:
            result = sess.run(res, {v: np.ones([4, 4], np.float32)})

            self.assertAllClose(result[0], np.broadcast_to(21, [4, 4]))
            outfed = sess.run(outfeed)
            for i in range(20):
                self.assertAllClose(outfed[i], np.broadcast_to(i + 1, [4, 4]))
Ejemplo n.º 9
0
    def testMultipleOutfeedsRepeatNonTuple(self):

        outfeed_queue1 = ipu_outfeed_queue.IPUOutfeedQueue(next_feed_id())
        outfeed_queue2 = ipu_outfeed_queue.IPUOutfeedQueue(next_feed_id())

        def body(v):
            outfeed1 = outfeed_queue1.enqueue(v)
            outfeed2 = outfeed_queue2.enqueue(v * 2)
            v = v + 1
            return (v, outfeed1, outfeed2)

        def my_net(v):
            r = loops.repeat(20, body, (v))
            return r

        with ops.device('cpu'):
            v = array_ops.placeholder(np.float32, [4, 4])

        with ipu.ops.ipu_scope("/device:IPU:0"):
            res = ipu_compiler.compile(my_net, inputs=[v])

        cfg = ipu.utils.create_ipu_config()
        cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False)
        ipu.utils.configure_ipu_system(cfg)

        outfeed1 = outfeed_queue1.dequeue()
        outfeed2 = outfeed_queue2.dequeue()
        with session_lib.Session() as sess:
            with self.assertRaisesRegexp(
                    errors.InvalidArgumentError,
                    'Only one IPUOutfeedQueue supported per graph'):
                result = sess.run(res, {v: np.ones([4, 4], np.float32)})
Ejemplo n.º 10
0
    def testMultipleConfigureIpuShouldFail(self):
        def my_graph(pa, pb, pc):
            with ops.device("/device:IPU:0"):
                o1 = pa + pb
                o2 = pa + pc
                out = o1 + o2

            return [out]

        with ops.device('cpu'):
            pa = array_ops.placeholder(np.float32, [2], name="a")
            pb = array_ops.placeholder(np.float32, [2], name="b")
            pc = array_ops.placeholder(np.float32, [2], name="c")
            report = gen_ipu_ops.ipu_event_trace()

        out = ipu_compiler.compile(my_graph, [pa, pb, pc])

        cfg = ipu.utils.create_ipu_config(profiling=True)
        cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False)
        cfg = ipu.utils.auto_select_ipus(cfg, 2)
        ipu.utils.configure_ipu_system(cfg)

        with self.assertRaises(Exception):
            cfg = ipu.utils.create_ipu_config(profiling=True)
            cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=True)
            ipu.utils.configure_ipu_system(cfg)
Ejemplo n.º 11
0
    def testSingleInfeedRepeatNonTupleFiniteDataset(self):
        dataset = tu.create_single_increasing_dataset(10,
                                                      shape=[4, 4],
                                                      repeat=False)

        infeed_queue = ipu_infeed_queue.IPUInfeedQueue(dataset, next_feed_id())

        def body(v, x):
            v = v + x
            return (v)

        def my_net(v):
            r = loops.repeat(10, body, (v), infeed_queue)
            return r

        with ops.device('cpu'):
            v = array_ops.placeholder(np.float32, [4, 4])

        with ipu.ops.ipu_scope("/device:IPU:0"):
            res = ipu_compiler.compile(my_net, inputs=[v])

        cfg = ipu.utils.create_ipu_config()
        cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False)
        ipu.utils.configure_ipu_system(cfg)

        with session_lib.Session() as sess:
            sess.run(infeed_queue.initializer)
            result = sess.run(res, {v: np.ones([4, 4], np.float32)})
            self.assertAllClose(result[0], np.broadcast_to(46, [4, 4]))
Ejemplo n.º 12
0
    def testNotEnoughIpus(self):
        def my_graph(pa, pb, pc):
            with ipu.ops.ipu_shard(0):
                o1 = pa + pb
            with ipu.ops.ipu_shard(1):
                o2 = pa + pc
            with ipu.ops.ipu_shard(2):
                out = o1 + o2
                return out

        with ops.device('cpu'):
            pa = array_ops.placeholder(np.float32, [2], name="a")
            pb = array_ops.placeholder(np.float32, [2], name="b")
            pc = array_ops.placeholder(np.float32, [2], name="c")
            report = gen_ipu_ops.ipu_event_trace()

        with ops.device("/device:IPU:0"):
            out = ipu_compiler.compile(my_graph, [pa, pb, pc])

        cfg = ipu.utils.create_ipu_config(profiling=True)
        cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False)
        cfg = ipu.utils.auto_select_ipus(cfg, 2)
        ipu.utils.configure_ipu_system(cfg)

        with sl.Session() as sess:
            with self.assertRaisesRegexp(errors.ResourceExhaustedError,
                                         'Trying to compile a graph for'):
                sess.run(out, {pa: [1., 1.], pb: [0., 1.], pc: [1., 5.]})
Ejemplo n.º 13
0
    def testDropoutImpl(rate):
      def ipu_dropout(w):
        output = poprand.dropout(w, rate=rate)
        return [output]

      with ops.device('cpu'):
        input_data = array_ops.placeholder(np.float32, [1024, 1024, 4])
        report = gen_ipu_ops.ipu_event_trace()

      with ipu.ops.ipu_scope("/device:IPU:0"):
        r = ipu_compiler.compile(ipu_dropout, inputs=[input_data])

      cfg = ipu.utils.create_ipu_config()
      cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False)
      ipu.utils.configure_ipu_system(cfg)
      with sl.Session() as sess:
        in_data = np.random.rand(1024, 1024, 4)

        result = sess.run(r, {input_data: in_data})

        percent_kept = np.count_nonzero(result) / np.count_nonzero(in_data)

        # There's a considerable amount for randomness so we have a reasonably large
        # dimensionality of test data to make sure the error is smaller.
        is_roughly_close = abs(percent_kept - (1.0 - rate))

        # The observed error is actually a lot less than this (>1%) but we don't want to cause
        # random regressions and 3% is probably still acceptable for any outlier randoms.
        self.assertTrue(is_roughly_close < 0.03)
Ejemplo n.º 14
0
    def testDropoutImpl():
      def ipu_dropout_back(w):
        output = poprand.dropout(w, rate=0.4)

        largest = output
        cost = tf.square(largest)

        opt = tf.train.GradientDescentOptimizer(learning_rate=0.1)

        gradients = opt.compute_gradients(cost, w)

        return [output, gradients]

      with ops.device('cpu'):
        input_data = array_ops.placeholder(np.float32, [32])
        report = gen_ipu_ops.ipu_event_trace()

      with ipu.ops.ipu_scope("/device:IPU:0"):
        r = ipu_compiler.compile(ipu_dropout_back, inputs=[input_data])

      cfg = ipu.utils.create_ipu_config()
      cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False)
      ipu.utils.configure_ipu_system(cfg)

      with sl.Session() as sess:
        in_data = np.random.rand(32)
        out = sess.run(r, {input_data: in_data})

        dropout_out = out[0]
        gradients = out[1][0][0]

        # Check we have the same number of zeros.
        self.assertAllEqual(
            np.count_nonzero(dropout_out), np.count_nonzero(gradients))
Ejemplo n.º 15
0
    def testTwoOutfeedsDifferentProgramsSameFeedName(self):

        outfeed_queue1 = ipu_outfeed_queue.IPUOutfeedQueue(feed_name="a")
        outfeed_queue2 = ipu_outfeed_queue.IPUOutfeedQueue(feed_name="a")

        def body1(v):
            outfeed = outfeed_queue1.enqueue(v)
            v = v + 1
            return (v, outfeed)

        def my_net1(v):
            r = loops.repeat(5, body1, (v))
            return r

        def body2(v):
            outfeed = outfeed_queue2.enqueue(v)
            v = v + 1
            return (v, outfeed)

        def my_net2(v):
            r = loops.repeat(7, body2, (v))
            return r

        with ops.device('cpu'):
            v1 = array_ops.placeholder(np.float32, [4, 4])
            v2 = array_ops.placeholder(np.float32, [5, 5])

        with ipu.ops.ipu_scope("/device:IPU:0"):
            res1 = ipu_compiler.compile(my_net1, inputs=[v1])
            res2 = ipu_compiler.compile(my_net2, inputs=[v2])

        cfg = ipu.utils.create_ipu_config()
        cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False)
        ipu.utils.configure_ipu_system(cfg)

        outfeed1 = outfeed_queue1.dequeue()
        outfeed2 = outfeed_queue2.dequeue()
        with session_lib.Session() as sess:
            result1 = sess.run(res1, {v1: np.ones([4, 4], np.float32)})
            with self.assertRaisesRegexp(
                    errors.FailedPreconditionError,
                    'Outfeed with id=\'a\' already exists'):
                result2 = sess.run(res2, {v2: np.full([5, 5], 4, np.float32)})
Ejemplo n.º 16
0
    def testConvAndBiasAddDifferentIPUs(self):
        def my_graph(inp, bias):
            with ops.device("/device:IPU:0"):
                with ipu.ops.ipu_shard(0):
                    x = layers.Conv2D(8,
                                      3,
                                      padding='same',
                                      name="conv",
                                      use_bias=False)(inp)

                with ipu.ops.ipu_shard(1):
                    x = nn_ops.bias_add(x, bias, name='biasAdd')

            return x

        with ops.device('cpu'):
            inp = array_ops.placeholder(np.float32, [1, 32, 32, 4],
                                        name="data")
            bias = array_ops.placeholder(np.float32, [8], name="bias")
            report = gen_ipu_ops.ipu_event_trace()

        out = ipu_compiler.compile(my_graph, [inp, bias])

        cfg = ipu.utils.create_ipu_config(profiling=True)
        cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False)
        cfg = ipu.utils.auto_select_ipus(cfg, 2)
        ipu.utils.configure_ipu_system(cfg)

        with sl.Session() as sess:

            sess.run(report)
            sess.run(variables.global_variables_initializer())
            sess.run(report)

            fd = {inp: np.ones([1, 32, 32, 4]), bias: np.ones([8])}
            sess.run(out, fd)

            rep = sess.run(report)

            num_compiles = 0
            ge_list = []
            evts = ipu.utils.extract_all_events(rep)
            for evt in evts:
                if evt.type == IpuTraceEvent.COMPILE_END:
                    num_compiles = num_compiles + 1
                    ge_list = tu.get_all_global_exchange_from_json_report(evt)

            self.assertEqual(num_compiles, 1)

            # There is 1 piece of global exchange (aprt from progId)
            wl = [
                'switchControlBroadcast*/GlobalPreAll',
                '*_to_/custom-call/GlobalPreAll',
            ]
            self.assertTrue(tu.check_all_compute_sets_and_list(ge_list, wl))
Ejemplo n.º 17
0
    def testCreateSimpleReplicatedInfeedOutfeed(self):
        shape = [2]
        dataset = tu.create_single_increasing_dataset(3, shape)

        infeed_queue = ipu_infeed_queue.IPUInfeedQueue(
            dataset, feed_name=next_feed_id(), replication_factor=2)
        outfeed_queue = ipu_outfeed_queue.IPUOutfeedQueue(
            feed_name=next_feed_id(), replication_factor=2)

        def body(v, x):
            v = popops_cross_replica_sum.cross_replica_sum(v + x)
            outfeed = outfeed_queue.enqueue(v)
            return (v, outfeed)

        def my_net():
            v = constant_op.constant(0.0, shape=shape, dtype=np.float32)
            r = loops.repeat(5, body, [v], infeed_queue)
            return r

        with ipu.ops.ipu_scope("/device:IPU:0"):
            res = ipu_compiler.compile(my_net, inputs=[])

        outfed = outfeed_queue.dequeue()

        cfg = ipu.utils.create_ipu_config(
            profiling=False, max_cross_replica_sum_buffer_size=10000)
        cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False)
        cfg = ipu.utils.auto_select_ipus(cfg, 2)
        ipu.utils.configure_ipu_system(cfg)

        with sl.Session() as sess:
            sess.run(infeed_queue.initializer)
            result = sess.run(res)
            self.assertAllClose(result[0], np.broadcast_to(48, shape))
            outfed_result = sess.run(outfed)

            self.assertTrue(outfed_result.shape[0], 2)
            self.assertAllClose(outfed_result[0][0], outfed_result[0][1])
            self.assertAllClose(outfed_result[0][0], np.broadcast_to(1, shape))

            self.assertAllClose(outfed_result[1][0], outfed_result[1][1])
            self.assertAllClose(outfed_result[1][0], np.broadcast_to(4, shape))

            self.assertAllClose(outfed_result[2][0], outfed_result[2][1])
            self.assertAllClose(outfed_result[2][0],
                                np.broadcast_to(11, shape))

            self.assertAllClose(outfed_result[3][0], outfed_result[3][1])
            self.assertAllClose(outfed_result[3][0],
                                np.broadcast_to(23, shape))

            self.assertAllClose(outfed_result[4][0], outfed_result[4][1])
            self.assertAllClose(outfed_result[4][0],
                                np.broadcast_to(48, shape))
Ejemplo n.º 18
0
        def program(iters, infeed_queue):
            def body(v, x):
                v = v + x
                return (v)

            def my_net():
                v = constant_op.constant(0.0, shape=[4, 4], dtype=np.float32)
                r = loops.repeat(iters, body, (v), infeed_queue)
                return r

            with ipu.ops.ipu_scope("/device:IPU:0"):
                return ipu_compiler.compile(my_net)
Ejemplo n.º 19
0
    def testIpuWhileScope(self):
        # 1: design is targetted at the device
        # 2: variables are resource variables
        # 3: training a while_loop is possible
        def my_net(a, b):
            c = variable_scope.get_variable('c', initializer=[1.0])
            self.assertTrue("ResourceVariable" in str(type(c)))

            lstm_cell = rnn_cell.LSTMCell(1, forget_bias=1.0)
            outputs, states = rnn.dynamic_rnn(lstm_cell, a, dtype=np.float32)

            logits = outputs[-1] * c
            self.assertEqual(logits.device, "/device:IPU:0")

            res = array_ops.reshape(logits, [1, 8, 1])

            l = losses.mean_squared_error(res, b)

            optimizer = gradient_descent.GradientDescentOptimizer(0.1)
            train = optimizer.minimize(l)

            return [l, train]

        with ops.device('cpu'):
            a = array_ops.placeholder(np.float32, [1, 8, 1], name="a")
            b = array_ops.placeholder(np.float32, [1, 8, 1], name="b")

        with ipu.ops.ipu_scope("/device:IPU:0"):

            l = ipu_compiler.compile(my_net, inputs=[a, b])

        cfg = ipu.utils.create_ipu_config()
        cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False)
        ipu.utils.configure_ipu_system(cfg)

        with sl.Session() as sess:
            # Initialize and then discard events relating to initialization
            sess.run(variables.global_variables_initializer())

            fd = {
                a: [[[1.], [1.], [1.], [1.], [1.], [1.], [1.], [1.]]],
                b: [[[1.], [1.], [1.], [1.], [1.], [1.], [1.], [1.]]],
            }

            l_initial = sess.run([l], fd)

            for _ in range(100):
                _ = sess.run([l], fd)

            l_final = sess.run([l], fd)

            self.assertTrue(l_initial > l_final)
Ejemplo n.º 20
0
    def testMultiIpu(self):
        def my_graph(pa, pb, pc):
            with ops.device("/device:IPU:0"):
                with ipu.ops.ipu_shard(0):
                    o1 = pa + pb

                with ipu.ops.ipu_shard(1):
                    o2 = pa + pc
                    out = o1 + o2

            return [out]

        with ops.device('cpu'):
            pa = array_ops.placeholder(np.float32, [2], name="a")
            pb = array_ops.placeholder(np.float32, [2], name="b")
            pc = array_ops.placeholder(np.float32, [2], name="c")
            report = gen_ipu_ops.ipu_event_trace()

        out = ipu_compiler.compile(my_graph, [pa, pb, pc])

        cfg = ipu.utils.create_ipu_config(profiling=True)
        cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False)
        cfg = ipu.utils.auto_select_ipus(cfg, 2)
        ipu.utils.configure_ipu_system(cfg)

        with sl.Session() as sess:

            sess.run(report)

            fd = {pa: [1., 1.], pb: [0., 1.], pc: [1., 5.]}
            result = sess.run(out, fd)
            self.assertAllClose(result[0], [3., 8.])

            rep = sess.run(report)

            evts = ipu.utils.extract_all_events(rep)
            for evt in evts:
                if evt.type == IpuTraceEvent.COMPILE_END:
                    js = json.loads(evt.compile_end.tensor_map.decode('utf-8'))

                    mods = list(js['mappings'].keys())
                    self.assertEqual(len(mods), 1)

                    tiles = set()
                    for tensor in js['mappings'][mods[0]]:
                        for tile in tensor[7]:
                            tiles.add(tile[0])

                    self.assertEqual(len(tiles), 3)
                    self.assertEqual(tiles, set((0, 1, 1216)))
Ejemplo n.º 21
0
    def testIpuSimpleScopeAndExecutionReport(self):
        def my_net(a, b):
            c = a + b
            return [c]

        with ops.device('cpu'):
            a = array_ops.placeholder(np.float32, [1], name="a")
            b = array_ops.placeholder(np.float32, [1], name="b")
            events = gen_ipu_ops.ipu_event_trace()

        with ipu.ops.ipu_scope("/device:IPU:0"):
            r = ipu_compiler.compile(my_net, inputs=[a, b])

        cfg = ipu.utils.create_ipu_config(profiling=True,
                                          profile_execution=True)
        cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False)
        ipu.utils.configure_ipu_system(cfg)

        with sl.Session() as sess:

            fd = {
                a: [1],
                b: [2],
            }

            sess.run(events)

            res = sess.run(r[0], fd)
            self.assertEqual(res, [3])

            e = sess.run(events)
            evts = ipu.utils.extract_all_events(e)
            self.assertEqual(count_compile_end_events(evts), 1)

            compilation_rep = ipu.utils.extract_compile_reports(e)
            self.assertEqual(len(compilation_rep), 1)
            self.assertEqual(type(compilation_rep), list)
            self.assertEqual(type(compilation_rep[0]), tuple)
            self.assertTrue(compilation_rep[0][0].startswith("cluster"))
            self.assertTrue(len(compilation_rep[0][1]) > 1000)
            self.assertTrue(compilation_rep[0][1].startswith('{'))

            execution_rep = ipu.utils.extract_execute_reports(e)
            self.assertEqual(len(execution_rep), 1)
            self.assertEqual(type(execution_rep), list)
            self.assertEqual(type(execution_rep[0]), tuple)
            self.assertTrue(execution_rep[0][0].startswith("cluster"))
            self.assertTrue(len(execution_rep[0][1]) > 1000)
            self.assertTrue(execution_rep[0][1].startswith('{'))
Ejemplo n.º 22
0
    def testSingleInfeedOutfeedRepeatNamedLast(self):
        dataset = tu.create_single_increasing_dataset(3, shape=[4, 4])
        shape = [4, 4]

        def dataset_parser(value):
            image_1 = value
            image_2 = (value + 10.) / 2.0
            return (image_1, image_2)

        dataset = dataset.map(dataset_parser)

        infeed_queue = ipu_infeed_queue.IPUInfeedQueue(dataset, next_feed_id())
        outfeed_queue = ipu_outfeed_queue.IPUOutfeedQueue(
            next_feed_id(), outfeed_mode=ipu_outfeed_queue.IPUOutfeedMode.LAST)

        def body(v, im1, im2):
            v = v + im1 + im2
            outfeed = outfeed_queue.enqueue({
                "v": v,
                "image1": im1,
                "image2": im2
            })
            return (v, outfeed)

        def my_net():
            v = constant_op.constant(0.0, shape=shape, dtype=np.float32)
            r = loops.repeat(5, body, [v], infeed_queue)
            return r

        with ipu.ops.ipu_scope("/device:IPU:0"):
            res = ipu_compiler.compile(my_net, inputs=[])

        outfed = outfeed_queue.dequeue()
        cfg = ipu.utils.create_ipu_config()
        cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False)
        ipu.utils.configure_ipu_system(cfg)

        with session_lib.Session() as sess:
            sess.run(infeed_queue.initializer)
            result = sess.run(res)
            self.assertAllClose(result[0], np.broadcast_to(31, shape))
            outfed_result = sess.run(outfed)
            self.assertTrue(len(outfed_result) == 3)
            self.assertAllClose(outfed_result["v"], np.broadcast_to(31, shape))
            self.assertAllClose(outfed_result["image1"],
                                np.broadcast_to(1, shape))
            self.assertAllClose(outfed_result["image2"],
                                np.broadcast_to(5.5, shape))
Ejemplo n.º 23
0
    def testTrainingLoopWithInfeedAndOutfeedGetLast(self):
        dataset = tu.create_single_increasing_dataset(10, shape=[4, 4, 2])
        dataset = dataset.batch(batch_size=2, drop_remainder=True)

        infeed_queue = ipu_infeed_queue.IPUInfeedQueue(dataset, next_feed_id())
        outfeed_queue = ipu_outfeed_queue.IPUOutfeedQueue(
            next_feed_id(), outfeed_mode=ipu_outfeed_queue.IPUOutfeedMode.LAST)

        def my_net(iters):
            def body(loss, x):
                with variable_scope.variable_scope("vs", use_resource=True):
                    y = layers.Conv2D(
                        2,
                        1,
                        use_bias=True,
                        kernel_initializer=init_ops.ones_initializer(),
                        name='conv1')(x)
                loss = math_ops.reduce_sum(y)
                optimizer = gradient_descent.GradientDescentOptimizer(0.1)
                train = optimizer.minimize(loss)
                outfeed = outfeed_queue.enqueue(loss)
                with ops.control_dependencies([train]):
                    return (array_ops.identity(loss), outfeed)

            loss = 0.0
            return loops.repeat(iters, body, (loss), infeed_queue)

        with ops.device('cpu'):
            iters = array_ops.placeholder(np.int32, shape=[])

        with ipu.ops.ipu_scope("/device:IPU:0"):
            r = ipu_compiler.compile(my_net, inputs=[iters])

        outfeeds = outfeed_queue.dequeue()
        with session_lib.Session() as sess:
            sess.run(infeed_queue.initializer)
            sess.run(variables.global_variables_initializer())
            initial_loss = sess.run(r, {iters: 1})
            final_loss = sess.run(r, {iters: 1000})

            outfed = sess.run(outfeeds)

            self.assertTrue(initial_loss > final_loss)
            self.assertTrue(outfed == final_loss)

            # Check that a scalar is returned instead of a numpy array
            self.assertTrue(type(outfed) == np.float32)
Ejemplo n.º 24
0
    def testGather(self):
        def my_net(w, i):
            out = array_ops.gather(w, i)
            return [out]

        with ops.device('cpu'):
            i = array_ops.placeholder(np.int32, [256])
            w = array_ops.placeholder(np.float32, [8192])
            report = gen_ipu_ops.ipu_event_trace()

        with ipu.ops.ipu_scope("/device:IPU:0"):
            r = ipu_compiler.compile(my_net, inputs=[w, i])

        cfg = ipu.utils.create_ipu_config(profiling=True)
        cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False)
        ipu.utils.configure_ipu_system(cfg)
        with sl.Session() as sess:

            result = sess.run(r, {
                i: np.arange(0, 3 * 256, 3),
                w: np.arange(8192)
            })
            self.assertAllClose(result[0], np.arange(0, 3 * 256, 3))

            rep = sess.run(report)

            events = ipu.utils.extract_all_events(rep)

            for e in events:
                if e.type == IpuTraceEvent.COMPILE_END:
                    j = e.compile_end.tensor_map.decode('utf-8')
                    if len(j) > 0:
                        tm = json.loads(
                            e.compile_end.tensor_map.decode('utf-8'))

                        bad_maps = []
                        for g in tm['mappings']:
                            for tensor in tm['mappings'][g]:
                                # Total elements > 16
                                if tensor[6] > 16:
                                    # Tiles used == 1 and is_constant == 0
                                    if len(tensor[7]) == 1 and tensor[4] == 0:
                                        bad_maps += [tensor[0]]

            self.assertEqual(len(bad_maps), 0)
Ejemplo n.º 25
0
    def testDropoutImpl(rate, seed, in_data):
      def ipu_dropout(w):
        output = poprand.dropout(w, rate=rate, seed=seed)
        return [output]

      with ops.device('cpu'):
        input_data = array_ops.placeholder(np.float32, [32, 4])
        report = gen_ipu_ops.ipu_event_trace()

      with ipu.ops.ipu_scope("/device:IPU:0"):
        r = ipu_compiler.compile(ipu_dropout, inputs=[input_data])

      cfg = ipu.utils.create_ipu_config()
      cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False)
      ipu.utils.configure_ipu_system(cfg)

      with sl.Session() as sess:
        return sess.run(r, {input_data: in_data})
    def testIpuWhilePerfTest(self):
        def cond(i, v):
            return math_ops.less(i, 15)

        def body(i, v):
            v = v + i
            i = i + 1
            return (i, v)

        def my_net(v):
            i = constant_op.constant(0)
            r = control_flow_ops.while_loop(cond,
                                            body, (i, v),
                                            maximum_iterations=10)
            return [r[1]]

        with ops.device('cpu'):
            v = array_ops.placeholder(np.int32, [500])
            report = gen_ipu_ops.ipu_event_trace()

        with ipu.ops.ipu_scope("/device:IPU:0"):
            r = ipu_compiler.compile(my_net, inputs=[v])

        cfg = ipu.utils.create_ipu_config(profiling=True,
                                          profile_execution=True)
        cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False)
        ipu.utils.configure_ipu_system(cfg)
        with sl.Session() as sess:

            result = sess.run(r, {v: np.zeros([500], np.int32)})
            self.assertAllClose(result[0], np.broadcast_to(45, [500]))

            rep = sess.run(report)

            # Check that there is only one real compile
            reps = ipu.utils.extract_compile_reports(rep)
            self.assertEqual(len(reps), 1)

            # Check that there is only one execute
            reps = ipu.utils.extract_execute_reports(rep)
            self.assertEqual(len(reps), 1)
Ejemplo n.º 27
0
def _RunLayer(layer_func, x, y):
    with ops.device('cpu'):
        px = array_ops.placeholder(dataType, shape=x.shape)
        ph = array_ops.placeholder(dataType, shape=[batch_size, num_hidden])
        pc = array_ops.placeholder(dataType, shape=[batch_size, num_hidden])
        py = array_ops.placeholder(dataType, shape=y.shape)
    with ipu.ops.ipu_scope("/device:IPU:0"):
        r = ipu_compiler.compile(layer_func, inputs=[px, ph, pc, py])

    opts = utils.create_ipu_config(profiling=True, use_poplar_text_report=True)
    opts = ipu.utils.set_ipu_model_options(opts, compile_ipu_code=False)
    ipu.utils.configure_ipu_system(opts)

    with sl.Session() as sess:
        sess.run(variables.global_variables_initializer())
        fd = {px: x, ph: np.ones(ph.shape), pc: np.ones(pc.shape), py: y}
        losses = []
        for _ in range(0, num_training_steps):
            loss = sess.run(r, fd)
            losses.append(loss)
    return losses
Ejemplo n.º 28
0
    def testCreateCombinedReplicatedSumGraph(self):
        def my_graph():
            with ops.device("/device:IPU:0"):
                with variable_scope.variable_scope("", use_resource=True):
                    x1 = variable_scope.get_variable(
                        "x1",
                        dtype=np.float32,
                        shape=[100],
                        initializer=init_ops.constant_initializer(10.0))
                    x2 = variable_scope.get_variable(
                        "x2",
                        dtype=np.int32,
                        shape=[100],
                        initializer=init_ops.constant_initializer(10))
                y1 = popops_cross_replica_sum.cross_replica_sum(x1 + x1)
                z1 = popops_cross_replica_sum.cross_replica_sum(x1 * x1)
                y2 = popops_cross_replica_sum.cross_replica_sum(x2 + x2)
                z2 = popops_cross_replica_sum.cross_replica_sum(x2 * x2)
                return [
                    popops_cross_replica_sum.cross_replica_sum(z1 + y1),
                    popops_cross_replica_sum.cross_replica_sum(z2 + y2)
                ]

        out = ipu_compiler.compile(my_graph, [])
        cfg = ipu.utils.create_ipu_config(
            profiling=False, max_cross_replica_sum_buffer_size=10000)
        cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False)
        cfg = ipu.utils.auto_select_ipus(cfg, 2)
        ipu.utils.configure_ipu_system(cfg)

        with sl.Session() as sess:
            sess.run(variables.global_variables_initializer())

            result = sess.run(out, {})
            ref = np.empty([2, 100])
            ref.fill(480.0)

            # Check output equals the expected value
            self.assertAllClose(result, ref)
Ejemplo n.º 29
0
    def testSingleInfeedWhileLoopTuple(self):
        dataset = tu.create_single_increasing_dataset(3, shape=[4, 4])

        def dataset_parser(value):
            image_1 = value
            image_2 = (value + 10.) / 2.0
            return (image_1, image_2)

        dataset = dataset.map(dataset_parser)

        infeed_queue = ipu_infeed_queue.IPUInfeedQueue(dataset, next_feed_id())

        def cond(i, v):
            return i < 20

        def body(i, v, im1, im2):
            v = v + im1 + im2
            return (i + 1, v)

        def my_net(v):
            i = 0
            r = loops.while_loop(cond, body, (i, v), infeed_queue)
            return r[1]

        with ops.device('cpu'):
            v = array_ops.placeholder(np.float32, [4, 4])

        with ipu.ops.ipu_scope("/device:IPU:0"):
            res = ipu_compiler.compile(my_net, inputs=[v])

        cfg = ipu.utils.create_ipu_config()
        cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False)
        ipu.utils.configure_ipu_system(cfg)

        with session_lib.Session() as sess:
            sess.run(infeed_queue.initializer)
            result = sess.run(res, {v: np.ones([4, 4], np.float32)})
            self.assertAllClose(result[0], np.broadcast_to(129.5, [4, 4]))
Ejemplo n.º 30
0
    def testSingleInfeedRepeatNamed(self):
        dataset = tu.create_single_increasing_dataset(3, shape=[4, 4])

        def dataset_parser(value):
            image_1 = value
            image_2 = (value + 10.) / 2.0
            return {"a": image_1, "b": image_2}

        dataset = dataset.map(dataset_parser)

        infeed_queue = ipu_infeed_queue.IPUInfeedQueue(dataset, next_feed_id())

        # Note how the parameters are swapped around.
        def body(v1, v2, b, a):
            v1 = v1 + a
            v2 = v2 + b
            return (v1, v2)

        def my_net():
            v1 = constant_op.constant(0.0, shape=[4, 4], dtype=np.float32)
            v2 = constant_op.constant(0.0, shape=[4, 4], dtype=np.float32)
            r = loops.repeat(5, body, [v1, v2], infeed_queue)
            return r

        with ipu.ops.ipu_scope("/device:IPU:0"):
            res = ipu_compiler.compile(my_net, inputs=[])

        cfg = ipu.utils.create_ipu_config()
        cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False)
        ipu.utils.configure_ipu_system(cfg)

        with session_lib.Session() as sess:
            sess.run(infeed_queue.initializer)
            result = sess.run(res)
            self.assertAllClose(result[0], np.broadcast_to(4, [4, 4]))
            self.assertAllClose(result[1], np.broadcast_to(27, [4, 4]))