예제 #1
0
    def testNotEnoughIpus(self):
        def my_graph(pa, pb, pc):
            with ipu.ops.ipu_shard(0):
                o1 = pa + pb
            with ipu.ops.ipu_shard(1):
                o2 = pa + pc
            with ipu.ops.ipu_shard(2):
                out = o1 + o2
                return out

        with ops.device('cpu'):
            pa = array_ops.placeholder(np.float32, [2], name="a")
            pb = array_ops.placeholder(np.float32, [2], name="b")
            pc = array_ops.placeholder(np.float32, [2], name="c")
            report = gen_ipu_ops.ipu_event_trace()

        with ops.device("/device:IPU:0"):
            out = ipu_compiler.compile(my_graph, [pa, pb, pc])

        cfg = ipu.utils.create_ipu_config(profiling=True)
        cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False)
        cfg = ipu.utils.auto_select_ipus(cfg, 2)
        ipu.utils.configure_ipu_system(cfg)

        with sl.Session() as sess:
            with self.assertRaisesRegexp(errors.ResourceExhaustedError,
                                         'Trying to compile a graph for'):
                sess.run(out, {pa: [1., 1.], pb: [0., 1.], pc: [1., 5.]})
예제 #2
0
  def testConvBackpropFilter(self):
    with ops.device("/device:IPU:0"):
      inp = array_ops.placeholder(np.float32, [2, 8, 8, 3])
      fil = constant_op.constant([2, 2, 3, 5], np.int32)
      bck = array_ops.placeholder(np.float32, [2, 8, 8, 5], name="wei")

      output = nn_ops.conv2d_backprop_filter(
          inp, fil, bck, strides=[1, 1, 1, 1], padding="SAME")

    with ops.device('cpu'):
      report = gen_ipu_ops.ipu_event_trace()

    tu.configure_ipu_system()

    with tu.ipu_session() as sess:
      sess.run(report)

      fd = {
          inp: np.zeros([2, 8, 8, 3]),
          bck: np.zeros([2, 8, 8, 5]),
      }
      result = sess.run(output, fd)
      self.assertAllClose(result, np.zeros([2, 2, 3, 5]))

      result = sess.run(report)

      s = tu.extract_all_strings_from_event_trace(result)
      cs_list = tu.get_compute_sets_from_report(s)

      ok = ['__seed*', 'Copy_', 'Conv2DBackpropFilter/convolution.*/Conv_8x8']
      self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
예제 #3
0
  def testDepthwiseConvBackpropFilter1x1WithRelu(self):
    with ops.device("/device:IPU:0"):
      pa = array_ops.placeholder(np.float32, [1, 6, 6, 3], name="a")
      pb = constant_op.constant([1, 1, 3, 2], dtype=np.int32)  # filter sizes
      pc = array_ops.placeholder(np.float32, [1, 6, 6, 6], name="c")
      c = nn.depthwise_conv2d_native_backprop_filter(
          pa, pb, pc, strides=[1, 1, 1, 1], padding="SAME")
      c = nn.relu(c)

    with ops.device('cpu'):
      report = gen_ipu_ops.ipu_event_trace()

    tu.configure_ipu_system()

    with tu.ipu_session() as sess:
      sess.run(report)

      fd = {pa: np.zeros([1, 6, 6, 3]), pc: np.zeros([1, 6, 6, 6])}
      result = sess.run(c, fd)
      self.assertAllClose(result, np.zeros([1, 1, 3, 2]))

      result = sess.run(report)

      s = tu.extract_all_strings_from_event_trace(result)
      cs_list = tu.get_compute_sets_from_report(s)

      ok = [
          '__seed*', 'Copy_',
          'DepthwiseConv2dNativeBackpropFilter/fusion*/Conv_6x6',
          'Relu/custom-call*/Nonlinearity'
      ]
      self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
예제 #4
0
    def testIpuEventsWithoutPoplarReporting(self):
        with self.session() as sess:
            with ops.device("/device:IPU:0"):
                pa = array_ops.placeholder(np.float32, [2, 2], name="a")
                pb = array_ops.placeholder(np.float32, [2, 2], name="b")
                out = math_ops.add(pa, pb)

            with ops.device('cpu'):
                report = gen_ipu_ops.ipu_event_trace()

            opts = utils.create_ipu_config(profiling=False,
                                           enable_ipu_events=True)
            utils.configure_ipu_system(opts)

            fd = {pa: [[1., 1.], [2., 3.]], pb: [[0., 1.], [4., 5.]]}
            sess.run(report, fd)

            sess.run(out, fd)

            rep = sess.run(report, fd)
            evts = utils.extract_all_events(rep)
            self.assertEqual(len(evts),
                             3)  # compile begin, compile end, execute

            for e in evts:
                if e.type == IpuTraceEvent.COMPILE_END:
                    self.assertFalse(e.compile_end.compilation_report)
                if e.type == IpuTraceEvent.EXECUTE:
                    self.assertFalse(e.execute.execution_report)

            sess.close()
예제 #5
0
    def testDropoutImpl():
      def ipu_dropout_back(w):
        output = poprand.dropout(w, rate=0.4)

        largest = output
        cost = tf.square(largest)

        opt = tf.train.GradientDescentOptimizer(learning_rate=0.1)

        gradients = opt.compute_gradients(cost, w)

        return [output, gradients]

      with ops.device('cpu'):
        input_data = array_ops.placeholder(np.float32, [32])
        report = gen_ipu_ops.ipu_event_trace()

      with ipu.ops.ipu_scope("/device:IPU:0"):
        r = ipu_compiler.compile(ipu_dropout_back, inputs=[input_data])

      cfg = ipu.utils.create_ipu_config()
      cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False)
      ipu.utils.configure_ipu_system(cfg)

      with sl.Session() as sess:
        in_data = np.random.rand(32)
        out = sess.run(r, {input_data: in_data})

        dropout_out = out[0]
        gradients = out[1][0][0]

        # Check we have the same number of zeros.
        self.assertAllEqual(
            np.count_nonzero(dropout_out), np.count_nonzero(gradients))
    def testNamedOperations(self):
        with ops.device("/device:IPU:0"):
            pa = array_ops.placeholder(np.float32, [2, 2], name="a")
            pb = array_ops.placeholder(np.float32, [2, 2], name="b")
            with ops.name_scope('my_ops'):
                out = math_ops.add(pa, pb, 'my_add_op')

        with ops.device('cpu'):
            report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system()

        with tu.ipu_session() as sess:
            fd = {pa: [[1., 1.], [2., 3.]], pb: [[0., 1.], [4., 5.]]}
            sess.run(report, fd)

            result = sess.run(out, fd)
            self.assertAllClose(result, [[1., 2.], [6., 8.]])

            rep = sess.run(report, fd)
            s = tu.extract_all_strings_from_event_trace(rep)
            cs_list = tu.get_compute_sets_from_report(s)

            ok = ['__seed*', 'my_ops/my_add_op/add']

            self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
예제 #7
0
    def testReportEveryNthExecution_Every1(self):
        with self.session() as sess:
            with ops.device("/device:IPU:0"):
                pa = array_ops.placeholder(np.float32, [2, 2], name="a")
                pb = array_ops.placeholder(np.float32, [2, 2], name="b")
                out = math_ops.add(pa, pb)

            with ops.device('cpu'):
                report = gen_ipu_ops.ipu_event_trace()

            opts = utils.create_ipu_config(profiling=True,
                                           profile_execution=True,
                                           report_every_nth_execution=1,
                                           use_poplar_text_report=False)
            utils.configure_ipu_system(opts)

            fd = {pa: [[1., 1.], [2., 3.]], pb: [[0., 1.], [4., 5.]]}
            sess.run(report, fd)

            sess.run(out, fd)
            sess.run(out, fd)
            sess.run(out, fd)
            sess.run(out, fd)
            sess.run(out, fd)

            rep = sess.run(report, fd)
            r = tu.ReportJSON(self)
            types = r.parse_events(rep)
            self.assertEqual(types[IpuTraceEvent.EXECUTE], 5)
            self.assertEqual(len(r.get_execution_reports()), 5,
                             "Every execution should have generated a report")
예제 #8
0
    def testBatchNormalizeLayerFusedFp16(self):
        with ops.device("/device:IPU:0"):
            with variable_scope.variable_scope("", use_resource=True):
                x = array_ops.placeholder(np.float16, [4, 64, 64, 4], name="a")

                normed = layers_norm.batch_normalization(x, fused=True)

        with ops.device('cpu'):
            report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system()

        with tu.ipu_session() as sess:
            sess.run(report)

            sess.run(variables.global_variables_initializer())
            result = sess.run(normed, {x: np.zeros([4, 64, 64, 4])})
            self.assertAllClose(result, np.zeros([4, 64, 64, 4]))

            rep = sess.run(report)
            s = tu.extract_all_strings_from_event_trace(rep)
            cs = tu.get_compute_sets_from_report(s)

            bl = ['*convert*/Cast*']
            self.assertTrue(tu.check_compute_sets_not_in_blacklist(cs, bl))
예제 #9
0
    def testMaxPool(self):
        with ops.device("/device:IPU:0"):
            pa = array_ops.placeholder(np.float32, [1, 1, 10, 10], name="a")
            c = nn.max_pool(pa,
                            ksize=[1, 1, 5, 5],
                            strides=[1, 1, 2, 2],
                            data_format='NCHW',
                            padding='SAME',
                            name="max")

        with ops.device('cpu'):
            report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system()

        with tu.ipu_session() as sess:
            sess.run(report)

            fd = {
                pa: np.ones([1, 1, 10, 10]),
            }
            result = sess.run(c, fd)
            self.assertAllClose(result, np.ones([1, 1, 5, 5]))

            result = sess.run(report)
            self.assertTrue(len(result) == 3)

            s = tu.extract_all_strings_from_event_trace(result)
            cs_list = tu.get_compute_sets_from_report(s)

            ok = ['__seed*', 'max/custom-call*/maxPool5x5']
            self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
예제 #10
0
    def testArgMax(self):

        batchsize = 4
        n_categories = 1200

        def model(a):
            return math_ops.argmax(a, axis=1, output_type=dtypes.int32)

        with ops.device('cpu'):
            pa = array_ops.placeholder(np.float32, [batchsize, n_categories])
            report = gen_ipu_ops.ipu_event_trace()

        with ops.device("/device:IPU:0"):
            out = model(pa)

        tu.configure_ipu_system()

        with tu.ipu_session() as sess:
            sess.run(report)

            input = np.random.rand(batchsize, n_categories)

            fd = {pa: input}
            result = sess.run(out, fd)
            self.assertAllClose(result, np.argmax(input, axis=1))

            result = sess.run(report)
            self.assertTrue(len(result) == 3)
    def testCheckMaxTileSize(self):
        dtype = np.float32
        shape = (1024, 2048)
        with ops.device("/device:IPU:0"):
            with variable_scope.variable_scope("", use_resource=True):
                a = variable_scope.get_variable(
                    "a",
                    shape=shape,
                    initializer=init_ops.constant_initializer(2),
                    dtype=dtype)
            pb = array_ops.placeholder(shape=shape, dtype=dtype, name="b")
            c = constant_op.constant(4, shape=shape, dtype=dtype, name="c")
            output = a + pb + c

        with ops.device('cpu'):
            report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system(execution_trace=False)

        with tu.ipu_session() as sess:
            sess.run(variables.global_variables_initializer())
            result = sess.run(report)

            s = tu.extract_all_strings_from_event_trace(result)
            max_tile_size = tu.get_maximum_tile_size_from_events(s)
            self.assertTrue(max_tile_size < 17000)

            out = sess.run(output, {pb: np.ones(shape=shape, dtype=dtype)})
            self.assertAllClose(np.full(shape, 7, dtype=dtype), out)
            result = sess.run(report)
            s = tu.extract_all_strings_from_event_trace(result)
            max_tile_size = tu.get_maximum_tile_size_from_events(s)
            self.assertTrue(max_tile_size < 40000)
예제 #12
0
    def testTopK(self):

        n_categories = 1200
        topn = 24

        def model(a):
            values, indices = nn.top_k(a, topn)
            return indices

        with ops.device('cpu'):
            pa = array_ops.placeholder(np.float32, [n_categories], name="a")
            report = gen_ipu_ops.ipu_event_trace()

        with ops.device("/device:IPU:0"):
            out = model(pa)

        tu.configure_ipu_system()

        with tu.ipu_session() as sess:
            sess.run(report)

            input = np.random.random(n_categories)
            expected = (-input).argsort()[:topn]

            fd = {pa: input}
            result = sess.run(out, fd)
            self.assertAllClose(result, expected)

            result = sess.run(report)
            self.assertTrue(len(result) == 3)
예제 #13
0
  def testTraining(self):
    x = array_ops.placeholder(datatype, shape=[1, 224, 224, 4])
    y_ = array_ops.placeholder(datatype, shape=[1, 1000])

    with ipu_ops.ipu_scope("/device:IPU:0"):
      logits = inference(x)

      loss = math_ops.reduce_mean(
          nn_ops.softmax_cross_entropy_with_logits_v2(
              logits=logits, labels=array_ops.stop_gradient(y_)))

      train = gradient_descent.GradientDescentOptimizer(0.01).minimize(loss)

    with ops.device('cpu'):
      report = gen_ipu_ops.ipu_event_trace()

    opts = utils.create_ipu_config(profiling=True)
    utils.configure_ipu_system(opts)

    sess = sl.Session()

    sess.run(variables.global_variables_initializer())
    sess.run(report)

    data = np.zeros([1, 224, 224, 4])
    labels = np.zeros([1, 1000])

    sess.run(train, feed_dict={x: data, y_: labels})
    out = sess.run(report)

    sess.close()

    evts = utils.extract_all_events(out)
    size = utils.get_memory_size_from_events(evts)
    self.assertTrue(size < 174000000)
예제 #14
0
    def testMultipleConfigureIpuShouldFail(self):
        def my_graph(pa, pb, pc):
            with ops.device("/device:IPU:0"):
                o1 = pa + pb
                o2 = pa + pc
                out = o1 + o2

            return [out]

        with ops.device('cpu'):
            pa = array_ops.placeholder(np.float32, [2], name="a")
            pb = array_ops.placeholder(np.float32, [2], name="b")
            pc = array_ops.placeholder(np.float32, [2], name="c")
            report = gen_ipu_ops.ipu_event_trace()

        out = ipu_compiler.compile(my_graph, [pa, pb, pc])

        cfg = ipu.utils.create_ipu_config(profiling=True)
        cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False)
        cfg = ipu.utils.auto_select_ipus(cfg, 2)
        ipu.utils.configure_ipu_system(cfg)

        with self.assertRaises(Exception):
            cfg = ipu.utils.create_ipu_config(profiling=True)
            cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=True)
            ipu.utils.configure_ipu_system(cfg)
예제 #15
0
    def testUniformRandomNonScalarInitalizer(self):
        with ops.device('cpu'):
            report = gen_ipu_ops.ipu_event_trace()
        with ops.device("/device:IPU:0"):
            with variable_scope.variable_scope("vs", use_resource=True):
                i = init_ops.random_uniform_initializer(minval=-2.0,
                                                        maxval=2.0)
                z = variable_scope.get_variable("z1",
                                                shape=[2],
                                                dtype=np.float32,
                                                initializer=i)

        tu.configure_ipu_system()

        with tu.ipu_session() as sess:
            # Clean existing reports
            sess.run(report)
            sess.run(variables.global_variables_initializer())
            r = sess.run(report)

            o = sess.run(z)
            self.assertAllClose(o, [0.0, 0.0], 2.0, 2.0)

            s = tu.extract_all_strings_from_event_trace(r)
            cs_list = tu.get_compute_sets_from_report(s)
            ok = [
                '__seed*',
                'vs/z1/Initializer/random_uniform/RandomUniform/fusion/uniform'
            ]
            self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
예제 #16
0
    def testScaledSubtractFrom(self):
        with ops.device("/device:IPU:0"):
            pa = array_ops.placeholder(np.float16, [3])
            pb = array_ops.placeholder(np.float16, [3])
            const = array_ops.constant(2.0, np.float16)
            # note how const operand index varies compared to testScaledAddTo
            # still should match as it will be reordered
            c = pa - const * pb

        with ops.device('cpu'):
            report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system()

        with tu.ipu_session() as sess:
            sess.run(report)

            fd = {pa: [2.0, 0.5, 1.0], pb: [1.0, 2.0, 3.0]}
            result = sess.run(c, fd)
            self.assertAllClose(result, [0.0, -3.5, -5.0])

            result = sess.run(report)
            self.assertTrue(len(result) == 3)

            s = tu.extract_all_strings_from_event_trace(result)
            cs_list = tu.get_compute_sets_from_report(s)

            ok = ['__seed*', 'host-exchange-local-copy-', 'sub/fusion/AddTo']
            self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
예제 #17
0
    def testTuplesOfTuplesAreStreamed(self):
        with ops.device("/device:IPU:0"):
            with variable_scope.variable_scope("vs", use_resource=True):
                pa = array_ops.placeholder(np.int64, [2, 2], name="a")
                pb = array_ops.placeholder(np.int64, [2, 2], name="b")
                pc = array_ops.placeholder(np.int64, [2, 2], name="c")
                c = control_flow_ops.tuple((pa + pc, pb + pc))

            with ops.device('cpu'):
                report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system(True, True, True)

        with tu.ipu_session() as sess:
            sess.run(report)
            in0 = np.full((2, 2), 7)
            in1 = np.full((2, 2), 6)
            in2 = np.full((2, 2), 5)
            fd = {
                pa: in0,
                pb: in1,
                pc: in2,
            }
            out = sess.run(c, fd)
            self.assertEqual(len(out), 2)
            self.assertAllClose(out, (np.full((2, 2), 12), np.full(
                (2, 2), 11)))

            rep = sess.run(report)
            io_evts = tu.extract_all_io_events(rep)
            # No io_events implies the data was streamed
            self.assertEqual(len(list(io_evts)), 0)
예제 #18
0
    def testScaledSubtractFromVariable(self):
        with ops.device("/device:IPU:0"):
            pa = array_ops.placeholder(np.float16, [3])
            pb = array_ops.placeholder(np.float16, [3])
            pc = array_ops.placeholder(np.float16, [1])
            c = pa - pc * pb

        with ops.device('cpu'):
            report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system()

        with tu.ipu_session() as sess:
            sess.run(report)

            fd = {pa: [2.0, 0.5, 1.0], pb: [1.0, 2.0, 3.0], pc: [2.0]}
            result = sess.run(c, fd)
            self.assertAllClose(result, [0.0, -3.5, -5.0])

            result = sess.run(report)
            self.assertTrue(len(result) == 3)

            s = tu.extract_all_strings_from_event_trace(result)
            cs_list = tu.get_compute_sets_from_report(s)

            ok = ['__seed*', 'host-exchange-local-copy-', 'sub/fusion/AddTo']
            self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
    def testCborReport(self):
        with ops.device("/device:IPU:0"):
            pa = array_ops.placeholder(np.float32, [2, 2], name="a")
            pb = array_ops.placeholder(np.float32, [2, 2], name="b")
            out = math_ops.add(pa, pb)

        with ops.device('cpu'):
            report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system(text_report=False, cbor_report=True)

        with tu.ipu_session() as sess:
            fd = {pa: [[1., 1.], [2., 3.]], pb: [[0., 1.], [4., 5.]]}
            sess.run(report, fd)

            sess.run(out, fd)

            rep = sess.run(report, fd)
            evts = tu.extract_all_events(rep)
            self.assertEqual(len(evts), 3)  # begin, end, execute

            self.assertEqual(evts[1].compile_end.compilation_report[0],
                             bytes(bytearray([217]))[0])
            self.assertEqual(evts[2].execute.execution_report[0],
                             bytes(bytearray([217]))[0])
예제 #20
0
    def testSigmoidNotInplace(self):
        with ops.device("/device:IPU:0"):
            pa = array_ops.placeholder(np.float32, [3], name="a")
            c = math_ops.sigmoid(pa) + pa

        with ops.device('cpu'):
            report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system()

        with tu.ipu_session() as sess:
            sess.run(report)

            fd = {pa: [-6.0, 0.0, 6.0]}
            result = sess.run(c, fd)
            self.assertAllClose(result, [-5.997527, 0.5, 6.997527])

            result = sess.run(report)
            self.assertTrue(len(result) == 3)

            s = tu.extract_all_strings_from_event_trace(result)
            cs_list = tu.get_compute_sets_from_report(s)

            ok = [
                '__seed*', 'Sigmoid/custom-call/Nonlinearity',
                'Copy_XLA_Args/arg0.*_to_Sigmoid/custom-call.clone/OnTileCopy-0',
                'add/add.*/AddTo'
            ]
            self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
예제 #21
0
    def testCborReport(self):
        with self.session() as sess:
            with ops.device("/device:IPU:0"):
                pa = array_ops.placeholder(np.float32, [2, 2], name="a")
                pb = array_ops.placeholder(np.float32, [2, 2], name="b")
                out = math_ops.add(pa, pb)

            with ops.device('cpu'):
                report = gen_ipu_ops.ipu_event_trace()

            opts = utils.create_ipu_config(profiling=True,
                                           profile_execution=True,
                                           use_poplar_text_report=False,
                                           use_poplar_cbor_report=True)
            utils.configure_ipu_system(opts)

            fd = {pa: [[1., 1.], [2., 3.]], pb: [[0., 1.], [4., 5.]]}
            sess.run(report, fd)

            sess.run(out, fd)

            rep = sess.run(report, fd)
            evts = utils.extract_all_events(rep)
            self.assertEqual(len(evts), 4)  # engine, begin, end, execute

            self.assertEqual(evts[1].compile_end.compilation_report[0],
                             bytes(bytearray([217]))[0])
            self.assertEqual(evts[3].execute.execution_report[0],
                             bytes(bytearray([217]))[0])
예제 #22
0
    def testSigmoidGrad(self):
        with ops.device("/device:IPU:0"):
            pa = array_ops.placeholder(np.float32, [3], name="grad")
            pb = array_ops.placeholder(np.float32, [3], name="in")
            c = gen_math_ops.sigmoid_grad(pa, pb)

        with ops.device('cpu'):
            report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system()

        with tu.ipu_session() as sess:
            sess.run(report)

            fd = {pa: [2.0, 0.5, 1.0], pb: [-1.0, 1.0, 6.0]}
            result = sess.run(c, fd)
            self.assertAllClose(result, [2.0, 0.25, 0.0])

            result = sess.run(report)
            self.assertTrue(len(result) == 3)

            s = tu.extract_all_strings_from_event_trace(result)
            cs_list = tu.get_compute_sets_from_report(s)

            ok = ['__seed*', 'SigmoidGrad/custom-call/NonLinearityGrad']
            self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
예제 #23
0
    def testIpuModelDeviceWithMultipleReport(self):
        with self.session() as sess:
            with ops.device("/device:IPU:0"):
                pa = array_ops.placeholder(np.float32, [2, 2], name="a")
                pb = array_ops.placeholder(np.float32, [2, 2], name="b")
                out1 = pa + pb
                out2 = pa - pb

            with ops.device('cpu'):
                with ops.control_dependencies([out1, out2]):
                    report = gen_ipu_ops.ipu_event_trace()

            opts = utils.create_ipu_config(profiling=True,
                                           profile_execution=True)
            utils.configure_ipu_system(opts)

            fd = {pa: [[1., 1.], [2., 3.]], pb: [[0., 1.], [4., 5.]]}
            sess.run(report, fd)

            result = sess.run(out1, fd)
            self.assertAllClose(result, [[1., 2.], [6., 8.]])

            result, rep = sess.run([out2, report], fd)
            self.assertAllClose(result, [[1., 0.], [-2., -2.]])

            # 2x engine, 2x compile_begin, 2x compile_end, 2x load engine
            self.assertEqual(len(rep), 8)
  def testDontOutlineInplaceExpression(self):
    with ops.device("/device:IPU:0"):
      pa = array_ops.placeholder(np.float32, [])
      pb = array_ops.placeholder(np.float32, [])
      pc = array_ops.placeholder(np.float32, [])
      pd = array_ops.placeholder(np.float32, [])
      e = pa + pb - pc + pd

    with ops.device('cpu'):
      report = gen_ipu_ops.ipu_event_trace()

    tu.configure_ipu_system()

    with tu.ipu_session() as sess:
      sess.run(report)
      fd = {pa: 1, pb: 2, pc: 3, pd: 4}
      result = sess.run(e, fd)
      self.assertAllClose(result, 4)

      result = sess.run(report)
      self.assertTrue(len(result) == 3)

      s = tu.extract_all_strings_from_event_trace(result)
      cs_list = tu.get_compute_sets_from_report(s)

      ok = [
          '__seed*', 'add/add.*/AddTo', 'sub/subtract.*/AddTo',
          'add_1/add.*/AddTo'
      ]
      self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
예제 #25
0
    def testDropoutImpl(rate):
      def ipu_dropout(w):
        output = poprand.dropout(w, rate=rate)
        return [output]

      with ops.device('cpu'):
        input_data = array_ops.placeholder(np.float32, [1024, 1024, 4])
        report = gen_ipu_ops.ipu_event_trace()

      with ipu.ops.ipu_scope("/device:IPU:0"):
        r = ipu_compiler.compile(ipu_dropout, inputs=[input_data])

      cfg = ipu.utils.create_ipu_config()
      cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False)
      ipu.utils.configure_ipu_system(cfg)
      with sl.Session() as sess:
        in_data = np.random.rand(1024, 1024, 4)

        result = sess.run(r, {input_data: in_data})

        percent_kept = np.count_nonzero(result) / np.count_nonzero(in_data)

        # There's a considerable amount for randomness so we have a reasonably large
        # dimensionality of test data to make sure the error is smaller.
        is_roughly_close = abs(percent_kept - (1.0 - rate))

        # The observed error is actually a lot less than this (>1%) but we don't want to cause
        # random regressions and 3% is probably still acceptable for any outlier randoms.
        self.assertTrue(is_roughly_close < 0.03)
예제 #26
0
    def testRandomNormalInitalizer(self):
        with ops.device('cpu'):
            report = gen_ipu_ops.ipu_event_trace()
        with ops.device("/device:IPU:0"):
            with variable_scope.variable_scope("vs", use_resource=True):
                i = init_ops.random_normal_initializer(mean=2.0, stddev=0.01)
                z = variable_scope.get_variable("z1",
                                                shape=[],
                                                dtype=np.float32,
                                                initializer=i)

        tu.configure_ipu_system()

        with tu.ipu_session() as sess:
            # Clean existing reports
            sess.run(report)
            sess.run(variables.global_variables_initializer())
            r = sess.run(report)

            o = sess.run(z)
            self.assertAllClose(o, 2.0, 0.2, 0.2)

            s = tu.extract_all_strings_from_event_trace(r)
            cs_list = tu.get_compute_sets_from_report(s)
            ok = [
                '__seed*',
                'vs/z1/Initializer/random_normal/RandomStandardNormal/fusion/normal'
            ]
            self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
예제 #27
0
  def testDepthwiseConvBackpropInput1x1(self):
    with ops.device("/device:IPU:0"):
      pa = constant_op.constant([1, 8, 8, 3], dtype=np.int32)  # input sizes
      pb = array_ops.placeholder(np.float32, [1, 1, 3, 2], name="b")
      pc = array_ops.placeholder(np.float32, [1, 8, 8, 6], name="c")
      c = nn.depthwise_conv2d_native_backprop_input(
          pa, pb, pc, strides=[1, 1, 1, 1], padding="SAME")

    with ops.device('cpu'):
      report = gen_ipu_ops.ipu_event_trace()

    tu.configure_ipu_system()

    with tu.ipu_session() as sess:
      sess.run(report)

      fd = {pb: np.zeros([1, 1, 3, 2]), pc: np.zeros([1, 8, 8, 6])}
      result = sess.run(c, fd)
      self.assertAllClose(result, np.zeros([1, 8, 8, 3]))

      result = sess.run(report)

      s = tu.extract_all_strings_from_event_trace(result)
      cs_list = tu.get_compute_sets_from_report(s)

      ok = [
          '__seed*',
          'DepthwiseConv2dNativeBackpropInput/fusion*/WeightTranspose',
          'DepthwiseConv2dNativeBackpropInput/fusion*/Conv_1x1', 'Copy_'
      ]

      self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
예제 #28
0
    def testDefaultTruncatedNormalInitalizer(self):

        with ops.device('cpu'):
            report = gen_ipu_ops.ipu_event_trace()

        with ops.device("/device:IPU:0"):
            with variable_scope.variable_scope("", use_resource=True):
                i = init_ops.truncated_normal_initializer()
                z = variable_scope.get_variable("z1",
                                                shape=[2, 4],
                                                dtype=np.float32,
                                                initializer=i)

        tu.configure_ipu_system()

        with tu.ipu_session() as sess:
            sess.run(variables.global_variables_initializer())
            o = sess.run(z)
            self.assertAllClose(o, np.ones((2, 4)), 2.0, 2.0)

            # Find of the names of compute sets
            r = sess.run(report)

            s = tu.extract_all_strings_from_event_trace(r)
            cs_list = tu.get_compute_sets_from_report(s)

            ok = [
                '__seed*',
                'z1/Initializer/truncated_normal/TruncatedNormal/custom-call*/truncatedNormal'
            ]
            self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
예제 #29
0
def ipu_compile_summary(name, op_list, collections=None):
  """Create an IPU compiler summary operation.

  Args:
    name: A name for the summary.
    op_list: An operation or list of operations to make this summary dependent
             upon.
    collections: Optional collections to add the summary into.

  Returns:
    The new summary operation

  """

  if not isinstance(op_list, list):
    op_list = [op_list]

  with ops.device("cpu"):
    with ops.control_dependencies(op_list):

      reports = gen_ipu_ops.ipu_event_trace()

      summary_metadata = summary_pb2.SummaryMetadata(
          plugin_data=summary_pb2.SummaryMetadata.PluginData(
              plugin_name="ipu"))

      t_summary = tensor_summary(name='ipu_trace',
                                 tensor=reports,
                                 summary_metadata=summary_metadata,
                                 collections=collections,
                                 display_name=name)

  return t_summary
예제 #30
0
    def testMultiScopeTest(self):
        with ops.device('cpu'):
            x = array_ops.placeholder(np.float32, [2, 2])
            y = array_ops.placeholder(np.float32, [2, 2])
            report = gen_ipu_ops.ipu_event_trace()

        with ipu.scopes.ipu_scope('/device:IPU:0'):
            z = math_ops.matmul(x, y)
        with ipu.scopes.ipu_scope('/device:IPU:0'):
            z2 = math_ops.matmul(x, z)

        cfg = ipu.utils.create_ipu_config(profiling=True)
        cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False)
        ipu.utils.configure_ipu_system(cfg)

        with sl.Session() as sess:
            sess.run(report)
            result = sess.run(z2, {x: np.ones([2, 2]), y: np.ones([2, 2])})

            self.assertAllEqual(result, [[4, 4], [4, 4]])

            rep = sess.run(report)
            evts = ipu.utils.extract_all_types_from_event_trace(rep)

            num_compiles = 0
            num_executions = 0
            for e in evts:
                if e == IpuTraceEvent.COMPILE_END:
                    num_compiles += 1
                if e == IpuTraceEvent.EXECUTE:
                    num_executions += 1

            self.assertEqual(num_compiles, 1)
            self.assertEqual(num_executions, 1)