def testCheckMaxTileSize(self):
        dtype = np.float32
        shape = (1024, 2048)
        with ops.device("/device:IPU:0"):
            with variable_scope.variable_scope("", use_resource=True):
                a = variable_scope.get_variable(
                    "a",
                    shape=shape,
                    initializer=init_ops.constant_initializer(2),
                    dtype=dtype)
            pb = array_ops.placeholder(shape=shape, dtype=dtype, name="b")
            c = constant_op.constant(4, shape=shape, dtype=dtype, name="c")
            output = a + pb + c

        with ops.device('cpu'):
            report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system(execution_trace=False)

        with tu.ipu_session() as sess:
            sess.run(variables.global_variables_initializer())
            result = sess.run(report)

            s = tu.extract_all_strings_from_event_trace(result)
            max_tile_size = tu.get_maximum_tile_size_from_events(s)
            self.assertTrue(max_tile_size < 17000)

            out = sess.run(output, {pb: np.ones(shape=shape, dtype=dtype)})
            self.assertAllClose(np.full(shape, 7, dtype=dtype), out)
            result = sess.run(report)
            s = tu.extract_all_strings_from_event_trace(result)
            max_tile_size = tu.get_maximum_tile_size_from_events(s)
            self.assertTrue(max_tile_size < 40000)
Ejemplo n.º 2
0
  def testDepthwiseConvBackpropFilter1x1WithRelu(self):
    with ops.device("/device:IPU:0"):
      pa = array_ops.placeholder(np.float32, [1, 6, 6, 3], name="a")
      pb = constant_op.constant([1, 1, 3, 2], dtype=np.int32)  # filter sizes
      pc = array_ops.placeholder(np.float32, [1, 6, 6, 6], name="c")
      c = nn.depthwise_conv2d_native_backprop_filter(
          pa, pb, pc, strides=[1, 1, 1, 1], padding="SAME")
      c = nn.relu(c)

    with ops.device('cpu'):
      report = gen_ipu_ops.ipu_event_trace()

    tu.configure_ipu_system()

    with tu.ipu_session() as sess:
      sess.run(report)

      fd = {pa: np.zeros([1, 6, 6, 3]), pc: np.zeros([1, 6, 6, 6])}
      result = sess.run(c, fd)
      self.assertAllClose(result, np.zeros([1, 1, 3, 2]))

      result = sess.run(report)

      s = tu.extract_all_strings_from_event_trace(result)
      cs_list = tu.get_compute_sets_from_report(s)

      ok = [
          '__seed*', 'Copy_',
          'DepthwiseConv2dNativeBackpropFilter/fusion*/Conv_6x6',
          'Relu/custom-call*/Nonlinearity'
      ]
      self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
Ejemplo n.º 3
0
    def testTopK(self):

        n_categories = 1200
        topn = 24

        def model(a):
            values, indices = nn.top_k(a, topn)
            return indices

        with ops.device('cpu'):
            pa = array_ops.placeholder(np.float32, [n_categories], name="a")
            report = gen_ipu_ops.ipu_event_trace()

        with ops.device("/device:IPU:0"):
            out = model(pa)

        tu.configure_ipu_system()

        with tu.ipu_session() as sess:
            sess.run(report)

            input = np.random.random(n_categories)
            expected = (-input).argsort()[:topn]

            fd = {pa: input}
            result = sess.run(out, fd)
            self.assertAllClose(result, expected)

            result = sess.run(report)
            self.assertTrue(len(result) == 3)
    def testPrefixPathWithTranspose(self):
        with ops.device("/device:IPU:0"):
            x = array_ops.placeholder(np.float32, shape=[1, 4, 4, 2])
            z = array_ops.placeholder(np.float32, shape=[4, 4, 2, 1])

            with variable_scope.variable_scope("vs", use_resource=True):
                y = layers.Conv2D(
                    2,
                    1,
                    use_bias=True,
                    kernel_initializer=init_ops.ones_initializer())(x)
            res = array_ops.transpose(y, [1, 2, 3, 0]) + z

        tu.configure_ipu_system(True, True, True)

        with tu.ipu_session() as sess:
            sess.run(variables.global_variables_initializer())

            result = sess.run(
                res, {
                    x: np.reshape(np.arange(32), [1, 4, 4, 2]),
                    z: np.ones([4, 4, 2, 1])
                })
            self.assertAllClose(result, [[[[2.], [2.]], [[6.], [6.]],
                                          [[10.], [10.]], [[14.], [14.]]],
                                         [[[18.], [18.]], [[22.], [22.]],
                                          [[26.], [26.]], [[30.], [30.]]],
                                         [[[34.], [34.]], [[38.], [38.]],
                                          [[42.], [42.]], [[46.], [46.]]],
                                         [[[50.], [50.]], [[54.], [54.]],
                                          [[58.], [58.]], [[62.], [62.]]]])
    def testPrefixPathWithReshape(self):
        with ops.device("/device:IPU:0"):
            x = array_ops.placeholder(np.float32, shape=[1, 4, 4, 2])
            z = array_ops.placeholder(np.float32, shape=[32])

            with variable_scope.variable_scope("vs", use_resource=True):
                y = layers.Conv2D(
                    2,
                    1,
                    use_bias=True,
                    kernel_initializer=init_ops.ones_initializer())(x)
            res = gen_array_ops.reshape(y, [32]) + z

        tu.configure_ipu_system(True, True, True)

        with tu.ipu_session() as sess:
            sess.run(variables.global_variables_initializer())

            result = sess.run(res, {
                x: np.reshape(np.arange(32), [1, 4, 4, 2]),
                z: np.ones([32])
            })
            # Confirmed with values on the CPU.
            self.assertAllClose(result, [
                2., 2., 6., 6., 10., 10., 14., 14., 18., 18., 22., 22., 26.,
                26., 30., 30., 34., 34., 38., 38., 42., 42., 46., 46., 50.,
                50., 54., 54., 58., 58., 62., 62.
            ])
Ejemplo n.º 6
0
    def testMaxPool(self):
        with ops.device("/device:IPU:0"):
            pa = array_ops.placeholder(np.float32, [1, 1, 10, 10], name="a")
            c = nn.max_pool(pa,
                            ksize=[1, 1, 5, 5],
                            strides=[1, 1, 2, 2],
                            data_format='NCHW',
                            padding='SAME',
                            name="max")

        with ops.device('cpu'):
            report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system()

        with tu.ipu_session() as sess:
            sess.run(report)

            fd = {
                pa: np.ones([1, 1, 10, 10]),
            }
            result = sess.run(c, fd)
            self.assertAllClose(result, np.ones([1, 1, 5, 5]))

            result = sess.run(report)
            self.assertTrue(len(result) == 3)

            s = tu.extract_all_strings_from_event_trace(result)
            cs_list = tu.get_compute_sets_from_report(s)

            ok = ['__seed*', 'max/custom-call*/maxPool5x5']
            self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
    def testPrefixPathWithElementwiseInPath(self):
        with ops.device("/device:IPU:0"):
            x = array_ops.placeholder(np.float32, shape=[1, 4, 4, 2])
            z = array_ops.placeholder(np.float32, shape=[1, 4, 4, 2])
            s = array_ops.placeholder(np.float32, shape=[])

            with variable_scope.variable_scope("vs", use_resource=True):
                y = layers.Conv2D(
                    2,
                    1,
                    use_bias=True,
                    kernel_initializer=init_ops.ones_initializer())(x)
            res = y + z * s

        tu.configure_ipu_system(True, True, True)

        with tu.ipu_session() as sess:
            sess.run(variables.global_variables_initializer())

            result = sess.run(
                res, {
                    x: np.reshape(np.arange(32), [1, 4, 4, 2]),
                    z: np.reshape(np.arange(32), [1, 4, 4, 2]),
                    s: 2.0
                })
            # Confirmed with values on the CPU.
            self.assertAllClose(
                result,
                [[[[1., 3.], [9., 11.], [17., 19.], [25., 27.]],
                  [[33., 35.], [41., 43.], [49., 51.], [57., 59.]],
                  [[65., 67.], [73., 75.], [81., 83.], [89., 91.]],
                  [[97., 99.], [105., 107.], [113., 115.], [121., 123.]]]])
Ejemplo n.º 8
0
    def testArgMax(self):

        batchsize = 4
        n_categories = 1200

        def model(a):
            return math_ops.argmax(a, axis=1, output_type=dtypes.int32)

        with ops.device('cpu'):
            pa = array_ops.placeholder(np.float32, [batchsize, n_categories])
            report = gen_ipu_ops.ipu_event_trace()

        with ops.device("/device:IPU:0"):
            out = model(pa)

        tu.configure_ipu_system()

        with tu.ipu_session() as sess:
            sess.run(report)

            input = np.random.rand(batchsize, n_categories)

            fd = {pa: input}
            result = sess.run(out, fd)
            self.assertAllClose(result, np.argmax(input, axis=1))

            result = sess.run(report)
            self.assertTrue(len(result) == 3)
Ejemplo n.º 9
0
  def testConvBackpropFilter(self):
    with ops.device("/device:IPU:0"):
      inp = array_ops.placeholder(np.float32, [2, 8, 8, 3])
      fil = constant_op.constant([2, 2, 3, 5], np.int32)
      bck = array_ops.placeholder(np.float32, [2, 8, 8, 5], name="wei")

      output = nn_ops.conv2d_backprop_filter(
          inp, fil, bck, strides=[1, 1, 1, 1], padding="SAME")

    with ops.device('cpu'):
      report = gen_ipu_ops.ipu_event_trace()

    tu.configure_ipu_system()

    with tu.ipu_session() as sess:
      sess.run(report)

      fd = {
          inp: np.zeros([2, 8, 8, 3]),
          bck: np.zeros([2, 8, 8, 5]),
      }
      result = sess.run(output, fd)
      self.assertAllClose(result, np.zeros([2, 2, 3, 5]))

      result = sess.run(report)

      s = tu.extract_all_strings_from_event_trace(result)
      cs_list = tu.get_compute_sets_from_report(s)

      ok = ['__seed*', 'Copy_', 'Conv2DBackpropFilter/convolution.*/Conv_8x8']
      self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
    def testNamedOperations(self):
        with ops.device("/device:IPU:0"):
            pa = array_ops.placeholder(np.float32, [2, 2], name="a")
            pb = array_ops.placeholder(np.float32, [2, 2], name="b")
            with ops.name_scope('my_ops'):
                out = math_ops.add(pa, pb, 'my_add_op')

        with ops.device('cpu'):
            report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system()

        with tu.ipu_session() as sess:
            fd = {pa: [[1., 1.], [2., 3.]], pb: [[0., 1.], [4., 5.]]}
            sess.run(report, fd)

            result = sess.run(out, fd)
            self.assertAllClose(result, [[1., 2.], [6., 8.]])

            rep = sess.run(report, fd)
            s = tu.extract_all_strings_from_event_trace(rep)
            cs_list = tu.get_compute_sets_from_report(s)

            ok = ['__seed*', 'my_ops/my_add_op/add']

            self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
Ejemplo n.º 11
0
    def testTuplesOfTuplesAreStreamed(self):
        with ops.device("/device:IPU:0"):
            with variable_scope.variable_scope("vs", use_resource=True):
                pa = array_ops.placeholder(np.int64, [2, 2], name="a")
                pb = array_ops.placeholder(np.int64, [2, 2], name="b")
                pc = array_ops.placeholder(np.int64, [2, 2], name="c")
                c = control_flow_ops.tuple((pa + pc, pb + pc))

            with ops.device('cpu'):
                report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system(True, True, True)

        with tu.ipu_session() as sess:
            sess.run(report)
            in0 = np.full((2, 2), 7)
            in1 = np.full((2, 2), 6)
            in2 = np.full((2, 2), 5)
            fd = {
                pa: in0,
                pb: in1,
                pc: in2,
            }
            out = sess.run(c, fd)
            self.assertEqual(len(out), 2)
            self.assertAllClose(out, (np.full((2, 2), 12), np.full(
                (2, 2), 11)))

            rep = sess.run(report)
            io_evts = tu.extract_all_io_events(rep)
            # No io_events implies the data was streamed
            self.assertEqual(len(list(io_evts)), 0)
    def testIpuEventsWithoutPoplarReporting(self):
        with ops.device("/device:IPU:0"):
            pa = array_ops.placeholder(np.float32, [2, 2], name="a")
            pb = array_ops.placeholder(np.float32, [2, 2], name="b")
            out = math_ops.add(pa, pb)

        with ops.device('cpu'):
            report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system(enable_ipu_events=True,
                                compilation_trace=False,
                                io_trace=False,
                                execution_trace=False)

        with tu.ipu_session() as sess:
            fd = {pa: [[1., 1.], [2., 3.]], pb: [[0., 1.], [4., 5.]]}
            sess.run(report, fd)

            sess.run(out, fd)

            rep = sess.run(report, fd)
            evts = tu.extract_all_events(rep)
            self.assertEqual(len(evts),
                             3)  # compile begin, compile end, execute

            for e in evts:
                if e.type == IpuTraceEvent.COMPILE_END:
                    self.assertTrue(len(e.compile_end.compilation_report) == 0)
                if e.type == IpuTraceEvent.EXECUTE:
                    self.assertTrue(len(e.execute.execution_report) == 0)

            sess.close()
Ejemplo n.º 13
0
    def testDefaultTruncatedNormalInitalizer(self):

        with ops.device('cpu'):
            report = gen_ipu_ops.ipu_event_trace()

        with ops.device("/device:IPU:0"):
            with variable_scope.variable_scope("", use_resource=True):
                i = init_ops.truncated_normal_initializer()
                z = variable_scope.get_variable("z1",
                                                shape=[2, 4],
                                                dtype=np.float32,
                                                initializer=i)

        tu.configure_ipu_system()

        with tu.ipu_session() as sess:
            sess.run(variables.global_variables_initializer())
            o = sess.run(z)
            self.assertAllClose(o, np.ones((2, 4)), 2.0, 2.0)

            # Find of the names of compute sets
            r = sess.run(report)

            s = tu.extract_all_strings_from_event_trace(r)
            cs_list = tu.get_compute_sets_from_report(s)

            ok = [
                '__seed*',
                'z1/Initializer/truncated_normal/TruncatedNormal/custom-call*/truncatedNormal'
            ]
            self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
Ejemplo n.º 14
0
    def testUniformRandomNonScalarInitalizer(self):
        with ops.device('cpu'):
            report = gen_ipu_ops.ipu_event_trace()
        with ops.device("/device:IPU:0"):
            with variable_scope.variable_scope("vs", use_resource=True):
                i = init_ops.random_uniform_initializer(minval=-2.0,
                                                        maxval=2.0)
                z = variable_scope.get_variable("z1",
                                                shape=[2],
                                                dtype=np.float32,
                                                initializer=i)

        tu.configure_ipu_system()

        with tu.ipu_session() as sess:
            # Clean existing reports
            sess.run(report)
            sess.run(variables.global_variables_initializer())
            r = sess.run(report)

            o = sess.run(z)
            self.assertAllClose(o, [0.0, 0.0], 2.0, 2.0)

            s = tu.extract_all_strings_from_event_trace(r)
            cs_list = tu.get_compute_sets_from_report(s)
            ok = [
                '__seed*',
                'vs/z1/Initializer/random_uniform/RandomUniform/fusion/uniform'
            ]
            self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
Ejemplo n.º 15
0
    def testScaledSubtractFrom(self):
        with ops.device("/device:IPU:0"):
            pa = array_ops.placeholder(np.float16, [3])
            pb = array_ops.placeholder(np.float16, [3])
            const = array_ops.constant(2.0, np.float16)
            # note how const operand index varies compared to testScaledAddTo
            # still should match as it will be reordered
            c = pa - const * pb

        with ops.device('cpu'):
            report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system()

        with tu.ipu_session() as sess:
            sess.run(report)

            fd = {pa: [2.0, 0.5, 1.0], pb: [1.0, 2.0, 3.0]}
            result = sess.run(c, fd)
            self.assertAllClose(result, [0.0, -3.5, -5.0])

            result = sess.run(report)
            self.assertTrue(len(result) == 3)

            s = tu.extract_all_strings_from_event_trace(result)
            cs_list = tu.get_compute_sets_from_report(s)

            ok = ['__seed*', 'host-exchange-local-copy-', 'sub/fusion/AddTo']
            self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
Ejemplo n.º 16
0
    def testRandomNormalInitalizer(self):
        with ops.device('cpu'):
            report = gen_ipu_ops.ipu_event_trace()
        with ops.device("/device:IPU:0"):
            with variable_scope.variable_scope("vs", use_resource=True):
                i = init_ops.random_normal_initializer(mean=2.0, stddev=0.01)
                z = variable_scope.get_variable("z1",
                                                shape=[],
                                                dtype=np.float32,
                                                initializer=i)

        tu.configure_ipu_system()

        with tu.ipu_session() as sess:
            # Clean existing reports
            sess.run(report)
            sess.run(variables.global_variables_initializer())
            r = sess.run(report)

            o = sess.run(z)
            self.assertAllClose(o, 2.0, 0.2, 0.2)

            s = tu.extract_all_strings_from_event_trace(r)
            cs_list = tu.get_compute_sets_from_report(s)
            ok = [
                '__seed*',
                'vs/z1/Initializer/random_normal/RandomStandardNormal/fusion/normal'
            ]
            self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
Ejemplo n.º 17
0
    def testBatchNormalizeLayerFusedFp16(self):
        with ops.device("/device:IPU:0"):
            with variable_scope.variable_scope("", use_resource=True):
                x = array_ops.placeholder(np.float16, [4, 64, 64, 4], name="a")

                normed = layers_norm.batch_normalization(x, fused=True)

        with ops.device('cpu'):
            report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system()

        with tu.ipu_session() as sess:
            sess.run(report)

            sess.run(variables.global_variables_initializer())
            result = sess.run(normed, {x: np.zeros([4, 64, 64, 4])})
            self.assertAllClose(result, np.zeros([4, 64, 64, 4]))

            rep = sess.run(report)
            s = tu.extract_all_strings_from_event_trace(rep)
            cs = tu.get_compute_sets_from_report(s)

            bl = ['*convert*/Cast*']
            self.assertTrue(tu.check_compute_sets_not_in_blacklist(cs, bl))
  def testDontOutlineInplaceExpression(self):
    with ops.device("/device:IPU:0"):
      pa = array_ops.placeholder(np.float32, [])
      pb = array_ops.placeholder(np.float32, [])
      pc = array_ops.placeholder(np.float32, [])
      pd = array_ops.placeholder(np.float32, [])
      e = pa + pb - pc + pd

    with ops.device('cpu'):
      report = gen_ipu_ops.ipu_event_trace()

    tu.configure_ipu_system()

    with tu.ipu_session() as sess:
      sess.run(report)
      fd = {pa: 1, pb: 2, pc: 3, pd: 4}
      result = sess.run(e, fd)
      self.assertAllClose(result, 4)

      result = sess.run(report)
      self.assertTrue(len(result) == 3)

      s = tu.extract_all_strings_from_event_trace(result)
      cs_list = tu.get_compute_sets_from_report(s)

      ok = [
          '__seed*', 'add/add.*/AddTo', 'sub/subtract.*/AddTo',
          'add_1/add.*/AddTo'
      ]
      self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
Ejemplo n.º 19
0
    def testSigmoidGrad(self):
        with ops.device("/device:IPU:0"):
            pa = array_ops.placeholder(np.float32, [3], name="grad")
            pb = array_ops.placeholder(np.float32, [3], name="in")
            c = gen_math_ops.sigmoid_grad(pa, pb)

        with ops.device('cpu'):
            report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system()

        with tu.ipu_session() as sess:
            sess.run(report)

            fd = {pa: [2.0, 0.5, 1.0], pb: [-1.0, 1.0, 6.0]}
            result = sess.run(c, fd)
            self.assertAllClose(result, [2.0, 0.25, 0.0])

            result = sess.run(report)
            self.assertTrue(len(result) == 3)

            s = tu.extract_all_strings_from_event_trace(result)
            cs_list = tu.get_compute_sets_from_report(s)

            ok = ['__seed*', 'SigmoidGrad/custom-call/NonLinearityGrad']
            self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
Ejemplo n.º 20
0
  def testDepthwiseConvBackpropInput1x1(self):
    with ops.device("/device:IPU:0"):
      pa = constant_op.constant([1, 8, 8, 3], dtype=np.int32)  # input sizes
      pb = array_ops.placeholder(np.float32, [1, 1, 3, 2], name="b")
      pc = array_ops.placeholder(np.float32, [1, 8, 8, 6], name="c")
      c = nn.depthwise_conv2d_native_backprop_input(
          pa, pb, pc, strides=[1, 1, 1, 1], padding="SAME")

    with ops.device('cpu'):
      report = gen_ipu_ops.ipu_event_trace()

    tu.configure_ipu_system()

    with tu.ipu_session() as sess:
      sess.run(report)

      fd = {pb: np.zeros([1, 1, 3, 2]), pc: np.zeros([1, 8, 8, 6])}
      result = sess.run(c, fd)
      self.assertAllClose(result, np.zeros([1, 8, 8, 3]))

      result = sess.run(report)

      s = tu.extract_all_strings_from_event_trace(result)
      cs_list = tu.get_compute_sets_from_report(s)

      ok = [
          '__seed*',
          'DepthwiseConv2dNativeBackpropInput/fusion*/WeightTranspose',
          'DepthwiseConv2dNativeBackpropInput/fusion*/Conv_1x1', 'Copy_'
      ]

      self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
Ejemplo n.º 21
0
    def testSigmoidNotInplace(self):
        with ops.device("/device:IPU:0"):
            pa = array_ops.placeholder(np.float32, [3], name="a")
            c = math_ops.sigmoid(pa) + pa

        with ops.device('cpu'):
            report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system()

        with tu.ipu_session() as sess:
            sess.run(report)

            fd = {pa: [-6.0, 0.0, 6.0]}
            result = sess.run(c, fd)
            self.assertAllClose(result, [-5.997527, 0.5, 6.997527])

            result = sess.run(report)
            self.assertTrue(len(result) == 3)

            s = tu.extract_all_strings_from_event_trace(result)
            cs_list = tu.get_compute_sets_from_report(s)

            ok = [
                '__seed*', 'Sigmoid/custom-call/Nonlinearity',
                'Copy_XLA_Args/arg0.*_to_Sigmoid/custom-call.clone/OnTileCopy-0',
                'add/add.*/AddTo'
            ]
            self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
Ejemplo n.º 22
0
    def testScaledSubtractFromVariable(self):
        with ops.device("/device:IPU:0"):
            pa = array_ops.placeholder(np.float16, [3])
            pb = array_ops.placeholder(np.float16, [3])
            pc = array_ops.placeholder(np.float16, [1])
            c = pa - pc * pb

        with ops.device('cpu'):
            report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system()

        with tu.ipu_session() as sess:
            sess.run(report)

            fd = {pa: [2.0, 0.5, 1.0], pb: [1.0, 2.0, 3.0], pc: [2.0]}
            result = sess.run(c, fd)
            self.assertAllClose(result, [0.0, -3.5, -5.0])

            result = sess.run(report)
            self.assertTrue(len(result) == 3)

            s = tu.extract_all_strings_from_event_trace(result)
            cs_list = tu.get_compute_sets_from_report(s)

            ok = ['__seed*', 'host-exchange-local-copy-', 'sub/fusion/AddTo']
            self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
    def testCborReport(self):
        with ops.device("/device:IPU:0"):
            pa = array_ops.placeholder(np.float32, [2, 2], name="a")
            pb = array_ops.placeholder(np.float32, [2, 2], name="b")
            out = math_ops.add(pa, pb)

        with ops.device('cpu'):
            report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system(text_report=False, cbor_report=True)

        with tu.ipu_session() as sess:
            fd = {pa: [[1., 1.], [2., 3.]], pb: [[0., 1.], [4., 5.]]}
            sess.run(report, fd)

            sess.run(out, fd)

            rep = sess.run(report, fd)
            evts = tu.extract_all_events(rep)
            self.assertEqual(len(evts), 3)  # begin, end, execute

            self.assertEqual(evts[1].compile_end.compilation_report[0],
                             bytes(bytearray([217]))[0])
            self.assertEqual(evts[2].execute.execution_report[0],
                             bytes(bytearray([217]))[0])
Ejemplo n.º 24
0
    def testConvolutionBiasApplyVariableLR(self):
        with ops.device("/device:IPU:0"):
            x = array_ops.placeholder(np.float32, shape=[1, 4, 4, 2])
            lr = array_ops.placeholder(np.float32, shape=[])

            with variable_scope.variable_scope("vs", use_resource=True):
                y = layers.Conv2D(
                    2,
                    1,
                    use_bias=True,
                    kernel_initializer=init_ops.ones_initializer())(x)
                y = layers.Conv2D(
                    2,
                    1,
                    use_bias=True,
                    kernel_initializer=init_ops.ones_initializer())(y)

            loss = math_ops.reduce_sum(y)
            optimizer = gradient_descent.GradientDescentOptimizer(lr)
            train = optimizer.minimize(loss)

            with ops.device('cpu'):
                report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system(True, True, True)

        with tu.ipu_session() as sess:
            sess.run(variables.global_variables_initializer())

            sess.run(report)

            sess.run([train, loss], {x: np.zeros([1, 4, 4, 2]), lr: 0.1})

            result = sess.run(report)
            self.assertEqual(
                len(result),
                6)  # 2xcompile, 1xupload, 1xload, 1xdownload, 1xexecute

            s = tu.extract_all_strings_from_event_trace(result)
            cs_list = tu.get_compute_sets_from_report(s)
            ok = [
                '__seed*',
                'Copy_',
                'host-exchange-local-copy-',
                'vs/conv2d/BiasAdd/fusion*/addToChannel',
                'vs/conv2d/Conv2D/convolution*',
                'vs/conv2d_1/BiasAdd/fusion.2/addToChannel',
                'GradientDescent/update_vs/conv2d/bias/ResourceApplyGradientDescent/fusion.3/ReduceFinalStage/IntermediateToOutput/Reduce',
                'GradientDescent/update_vs/conv2d/bias/ResourceApplyGradientDescent/fusion*/negate/Op/Negate',
                'gradients/vs/conv2d_1/Conv2D_grad/Conv2DBackpropFilter/fusion*/Conv_4x4/',
                'gradients/vs/conv2d_1/Conv2D_grad/Conv2DBackpropFilter/fusion*/AddTo',
                'GradientDescent/update_vs/conv2d_1/bias/ResourceApplyGradientDescent/multiply*/Op/Multiply',
                'GradientDescent/update_vs/conv2d_1/bias/ResourceApplyGradientDescent/subtract*/AddTo',
                'vs/conv2d/BiasAdd/fusion*/addToChannel',
                'Sum/reduce*/ReduceFinalStage/IntermediateToOutput/Reduce',
            ]

            self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
    def testConvolutionsMatchFwdBwdWu(self):
        with ops.device("/device:IPU:0"):
            x = array_ops.placeholder(np.float32, shape=[1, 4, 4, 2])

            with variable_scope.variable_scope("vs", use_resource=True):
                y = layers.Conv2D(
                    2,
                    1,
                    use_bias=False,
                    kernel_initializer=init_ops.ones_initializer(),
                    name='conv1')(x)
                y = layers.Conv2D(
                    2,
                    1,
                    use_bias=False,
                    kernel_initializer=init_ops.ones_initializer(),
                    name='conv2')(y)
                y = layers.Conv2D(
                    2,
                    1,
                    use_bias=False,
                    kernel_initializer=init_ops.ones_initializer(),
                    name='conv3')(y)

            loss = math_ops.reduce_sum(y)
            optimizer = gradient_descent.GradientDescentOptimizer(0.1)
            train = optimizer.minimize(loss)

            with ops.device('cpu'):
                report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system(True, True, True)

        with tu.ipu_session() as sess:
            sess.run(variables.global_variables_initializer())

            sess.run(report)

            sess.run([train, loss], {x: np.zeros([1, 4, 4, 2])})

            result = sess.run(report)

            s = tu.extract_all_strings_from_event_trace(result)
            cs_list = tu.get_compute_sets_from_report(s)

            # Fwd and BackpropInput should be shared
            # Weight transpose for BackpropInput should be present
            # Both BackpropFilter should be shared
            ok = [
                '__seed*', 'host-exchange-local-copy-', 'Copy_',
                'vs/conv1/Conv2D/convolution.*/Conv_1x1',
                'Sum/reduce.*/ReduceOnTile/InToIntermediateNoExchange/Reduce',
                'Sum/reduce.*/ReduceFinalStage/IntermediateToOutput/Reduce',
                'gradients/vs/conv3/Conv2D_grad/Conv2DBackpropInput/fusion.*/WeightTranspose',
                'gradients/vs/conv2/Conv2D_grad/Conv2DBackpropFilter/fusion.*/Conv_4x4',
                'gradients/vs/conv2/Conv2D_grad/Conv2DBackpropFilter/fusion.*/DeltasPartialTranspose',
                'gradients/vs/conv2/Conv2D_grad/Conv2DBackpropFilter/fusion.*/AddTo'
            ]
    def testBatchNormAndGroupNormalizeMixedInference(self):
        with ops.device("/device:IPU:0"):
            x = array_ops.placeholder(np.float32, shape=[1, 4, 4, 2])

            with variable_scope.variable_scope("vs", use_resource=True):
                y = convolutional.conv2d(
                    x,
                    2,
                    1,
                    use_bias=False,
                    kernel_initializer=init_ops.ones_initializer())
                gamma = constant_op.constant([0.5, 0.5], np.float32)
                beta = constant_op.constant([0.5, 0.5], np.float32)
                mean = constant_op.constant([0.5, 0.5], np.float32)
                inv_std_dev = constant_op.constant([0.5, 0.5], np.float32)
                y = gen_popnn_ops.popnn_group_norm_inference(
                    inputs=y,
                    gamma=gamma,
                    beta=beta,
                    mean=mean,
                    inv_std_dev=inv_std_dev,
                    data_format="NHWC",
                    epsilon=0.0015,
                    num_groups=2)
                y = convolutional.conv2d(
                    y,
                    2,
                    1,
                    use_bias=False,
                    kernel_initializer=init_ops.ones_initializer())
                y = layers_norm.batch_normalization(y, fused=True)

            with ops.device('cpu'):
                report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system(True, True, True)

        with tu.ipu_session() as sess:
            sess.run(variables.global_variables_initializer())

            sess.run(report)

            sess.run(y, {x: np.zeros([1, 4, 4, 2])})

            result = sess.run(report)

            s = tu.extract_all_strings_from_event_trace(result)
            cs_list = tu.get_compute_sets_from_report(s)

            # Would fail if there were two batch norms in the graph
            ok = [
                '__seed*', 'host-exchange-local-copy', 'Copy_',
                'vs/conv2d/Conv2D/convolution.*/Conv_1x1/Convolve',
                'vs/PopnnGroupNormInference/custom-call*/',
                'vs/batch_normalization/FusedBatchNorm/batch-norm-inference.*/'
            ]
            self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
Ejemplo n.º 27
0
    def testNonModifiedResourceIsNotOverwrittenInPlaceOp(self):
        # This test verifies that if we have a resource varaible (w) which is marked
        # as not modified then a copy is inserted to make sure it is not overwritten
        # between executions if it is used by an inplace op
        w_val = [1, 2, 3, 4]
        with ops.device("/device:IPU:0"):
            with variable_scope.variable_scope("vs", use_resource=True):
                w = variable_scope.get_variable(
                    "w",
                    shape=[4],
                    dtype=np.float32,
                    initializer=init_ops.constant_initializer(
                        np.array(w_val, dtype=np.float32)))

            px = array_ops.placeholder(np.float32, shape=[4])
            y = w + px

            with ops.device('cpu'):
                report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system(True, True, True)

        with tu.ipu_session() as sess:
            sess.run(variables.global_variables_initializer())

            sess.run(report)
            xs = [
                np.array([7, 3, 5, 9], dtype=np.float32),
                np.array([1, 8, 3, 4], dtype=np.float32),
                np.array([9, 2, 2, 6], dtype=np.float32)
            ]
            for x in xs:
                out = sess.run(y, {px: x})
                self.assertAllClose(out, x + w_val)

            rep = sess.run(report)
            io_evts = tu.extract_all_io_events(rep)

            host_to_device = list(
                filter(lambda x: x[0] == IpuTraceEvent.HOST_TO_DEVICE_TRANSFER,
                       io_evts))
            self.assertEqual(len(list(host_to_device)), 1)
            device_to_host = list(
                filter(lambda x: x[0] == IpuTraceEvent.DEVICE_TO_HOST_TRANSFER,
                       io_evts))
            self.assertEqual(len(list(device_to_host)), 0)

            # w should be copied to device once and that should be the only io event
            w_dl = "1.0"
            self.assertEqual(
                len(list(filter(lambda x: x[1] == w_dl, host_to_device))), 1)
Ejemplo n.º 28
0
    def testFwdAndBwdMaxPool(self):
        input = np.arange(16).reshape(1, 4, 4, 1)
        output_grad = np.full((1, 2, 2, 1), 0.1)

        with ops.device("/device:IPU:0"):
            pa = array_ops.placeholder(np.float32, [1, 4, 4, 1], name="a")
            pb = array_ops.placeholder(np.float32, [1, 2, 2, 1], name="b")
            c = nn.max_pool(pa,
                            ksize=[1, 2, 2, 1],
                            strides=[1, 2, 2, 1],
                            data_format='NCHW',
                            padding='SAME')
            d = gen_nn_ops.max_pool_grad(pa,
                                         c,
                                         pb,
                                         ksize=[1, 2, 2, 1],
                                         strides=[1, 2, 2, 1],
                                         data_format='NCHW',
                                         padding='SAME')

        with ops.device('cpu'):
            report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system()

        with tu.ipu_session() as sess:
            sess.run(report)
            fe = {
                pa: input,
                pb: output_grad,
            }
            output, input_grad = sess.run((c, d), fe)
            self.assertAllClose(output, [[[[5.], [7.]], [[13.], [15.]]]])
            self.assertAllClose(
                input_grad,
                [[[[0.], [0.], [0.], [0.]], [[0.], [0.1], [0.], [0.1]],
                  [[0.], [0.], [0.], [0.]], [[0.], [0.1], [0.], [0.1]]]])

            result = sess.run(report)
            self.assertTrue(len(result) == 3)

            s = tu.extract_all_strings_from_event_trace(result)
            cs_list = tu.get_compute_sets_from_report(s)

            ok = [
                '__seed*', 'Copy_*', 'MaxPool/custom-call*/maxPool2x2/',
                'MaxPoolGrad/custom-call*/maxPool2x2'
            ]
            self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
    def testWideConstantWithAllocationTarget(self):
        # This test will fail if the dynamic slice is not mapped correctly.
        dtype = np.float32
        shape = (512, 2, 2048)

        def my_net(y):
            def cond(i, x, y):
                return i < 2

            def body(i, x, y):
                s = array_ops.slice(x, [i, i, i], [1, 1, 2048])
                y = y + math_ops.reduce_mean(s)
                i = i + 1
                return (i, x, y)

            i = 0
            c = constant_op.constant(4, shape=shape, dtype=dtype, name="c")
            return control_flow_ops.while_loop(cond, body, (i, c, y))[2]

        with ops.device('cpu'):
            y = array_ops.placeholder(dtype, [1])
            report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system()

        with ops.device("/device:IPU:0"):
            r = xla.compile(my_net, inputs=[y])

        with tu.ipu_session() as sess:
            sess.run(report)
            y = sess.run(r, {y: [10]})
            self.assertAllClose(y[0], [18])

            result = sess.run(report)
            self.assertTrue(len(result) == 3)

            s = tu.extract_all_strings_from_event_trace(result)
            cs_list = tu.get_compute_sets_from_report(s)

            ok = [
                '__seed*', 'Copy_*_to_*',
                'while/Slice/dynamic-slice*/dynamicSlice', 'while/Mean/reduce',
                'while/Mean/multiply', 'while/add*/add*/AddTo'
            ]
            self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))

            max_tile_size = tu.get_maximum_tile_size_from_events(s)
            self.assertTrue(max_tile_size < 60000)
    def testBatchNormalizeInferenceDontMatchDifferentTypes(self):
        with ops.device("/device:IPU:0"):
            x = array_ops.placeholder(np.float32, shape=[1, 4, 4, 2])

            with variable_scope.variable_scope("vs", use_resource=True):
                y = convolutional.conv2d(
                    x,
                    2,
                    1,
                    use_bias=False,
                    kernel_initializer=init_ops.ones_initializer())
                y = layers_norm.batch_normalization(y, fused=True)
                y = math_ops.cast(y, np.float16)
                y = convolutional.conv2d(
                    y,
                    2,
                    1,
                    use_bias=False,
                    kernel_initializer=init_ops.ones_initializer())
                y = layers_norm.batch_normalization(y, fused=True)

            with ops.device('cpu'):
                report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system(True, True, True)

        with tu.ipu_session() as sess:
            sess.run(variables.global_variables_initializer())

            sess.run(report)

            sess.run(y, {x: np.zeros([1, 4, 4, 2])})

            result = sess.run(report)

            s = tu.extract_all_strings_from_event_trace(result)
            cs_list = tu.get_compute_sets_from_report(s)
            # Matches two convolutions
            ok = [
                '__seed*', 'host-exchange-local-copy-', 'Copy_',
                'vs/conv2d/Conv2D/convolution.*/Conv_1x1',
                'vs/batch_normalization/FusedBatchNorm/batch-norm-inference.*/',
                'vs/Cast/convert.*/Cast',
                'vs/conv2d_1/Conv2D/convolution.*/Conv_1x1',
                'vs/batch_normalization_1/FusedBatchNormV2/batch-norm-inference.*/'
            ]
            self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))