コード例 #1
0
    def testMaxPool(self):
        with ops.device("/device:IPU:0"):
            pa = array_ops.placeholder(np.float32, [1, 1, 10, 10], name="a")
            c = nn.max_pool(pa,
                            ksize=[1, 1, 5, 5],
                            strides=[1, 1, 2, 2],
                            data_format='NCHW',
                            padding='SAME',
                            name="max")

        with ops.device('cpu'):
            report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system()

        with tu.ipu_session() as sess:
            sess.run(report)

            fd = {
                pa: np.ones([1, 1, 10, 10]),
            }
            result = sess.run(c, fd)
            self.assertAllClose(result, np.ones([1, 1, 5, 5]))

            result = sess.run(report)
            self.assertTrue(len(result) == 3)

            s = tu.extract_all_strings_from_event_trace(result)
            cs_list = tu.get_compute_sets_from_report(s)

            ok = ['__seed*', 'max/custom-call*/maxPool5x5']
            self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
コード例 #2
0
  def testConvBackpropFilter(self):
    with ops.device("/device:IPU:0"):
      inp = array_ops.placeholder(np.float32, [2, 8, 8, 3])
      fil = constant_op.constant([2, 2, 3, 5], np.int32)
      bck = array_ops.placeholder(np.float32, [2, 8, 8, 5], name="wei")

      output = nn_ops.conv2d_backprop_filter(
          inp, fil, bck, strides=[1, 1, 1, 1], padding="SAME")

    with ops.device('cpu'):
      report = gen_ipu_ops.ipu_event_trace()

    tu.configure_ipu_system()

    with tu.ipu_session() as sess:
      sess.run(report)

      fd = {
          inp: np.zeros([2, 8, 8, 3]),
          bck: np.zeros([2, 8, 8, 5]),
      }
      result = sess.run(output, fd)
      self.assertAllClose(result, np.zeros([2, 2, 3, 5]))

      result = sess.run(report)

      s = tu.extract_all_strings_from_event_trace(result)
      cs_list = tu.get_compute_sets_from_report(s)

      ok = ['__seed*', 'Copy_', 'Conv2DBackpropFilter/convolution.*/Conv_8x8']
      self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
コード例 #3
0
    def testSigmoidNotInplace(self):
        with ops.device("/device:IPU:0"):
            pa = array_ops.placeholder(np.float32, [3], name="a")
            c = math_ops.sigmoid(pa) + pa

        with ops.device('cpu'):
            report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system()

        with tu.ipu_session() as sess:
            sess.run(report)

            fd = {pa: [-6.0, 0.0, 6.0]}
            result = sess.run(c, fd)
            self.assertAllClose(result, [-5.997527, 0.5, 6.997527])

            result = sess.run(report)
            self.assertTrue(len(result) == 3)

            s = tu.extract_all_strings_from_event_trace(result)
            cs_list = tu.get_compute_sets_from_report(s)

            ok = [
                '__seed*', 'Sigmoid/custom-call/Nonlinearity',
                'Copy_XLA_Args/arg0.*_to_Sigmoid/custom-call.clone/OnTileCopy-0',
                'add/add.*/AddTo'
            ]
            self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
コード例 #4
0
    def testRandomNormalInitalizer(self):
        with ops.device('cpu'):
            report = gen_ipu_ops.ipu_event_trace()
        with ops.device("/device:IPU:0"):
            with variable_scope.variable_scope("vs", use_resource=True):
                i = init_ops.random_normal_initializer(mean=2.0, stddev=0.01)
                z = variable_scope.get_variable("z1",
                                                shape=[],
                                                dtype=np.float32,
                                                initializer=i)

        tu.configure_ipu_system()

        with tu.ipu_session() as sess:
            # Clean existing reports
            sess.run(report)
            sess.run(variables.global_variables_initializer())
            r = sess.run(report)

            o = sess.run(z)
            self.assertAllClose(o, 2.0, 0.2, 0.2)

            s = tu.extract_all_strings_from_event_trace(r)
            cs_list = tu.get_compute_sets_from_report(s)
            ok = [
                '__seed*',
                'vs/z1/Initializer/random_normal/RandomStandardNormal/fusion/normal'
            ]
            self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
コード例 #5
0
  def testDepthwiseConvBackpropInput1x1(self):
    with ops.device("/device:IPU:0"):
      pa = constant_op.constant([1, 8, 8, 3], dtype=np.int32)  # input sizes
      pb = array_ops.placeholder(np.float32, [1, 1, 3, 2], name="b")
      pc = array_ops.placeholder(np.float32, [1, 8, 8, 6], name="c")
      c = nn.depthwise_conv2d_native_backprop_input(
          pa, pb, pc, strides=[1, 1, 1, 1], padding="SAME")

    with ops.device('cpu'):
      report = gen_ipu_ops.ipu_event_trace()

    tu.configure_ipu_system()

    with tu.ipu_session() as sess:
      sess.run(report)

      fd = {pb: np.zeros([1, 1, 3, 2]), pc: np.zeros([1, 8, 8, 6])}
      result = sess.run(c, fd)
      self.assertAllClose(result, np.zeros([1, 8, 8, 3]))

      result = sess.run(report)

      s = tu.extract_all_strings_from_event_trace(result)
      cs_list = tu.get_compute_sets_from_report(s)

      ok = [
          '__seed*',
          'DepthwiseConv2dNativeBackpropInput/fusion*/WeightTranspose',
          'DepthwiseConv2dNativeBackpropInput/fusion*/Conv_1x1', 'Copy_'
      ]

      self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
コード例 #6
0
    def testDefaultTruncatedNormalInitalizer(self):

        with ops.device('cpu'):
            report = gen_ipu_ops.ipu_event_trace()

        with ops.device("/device:IPU:0"):
            with variable_scope.variable_scope("", use_resource=True):
                i = init_ops.truncated_normal_initializer()
                z = variable_scope.get_variable("z1",
                                                shape=[2, 4],
                                                dtype=np.float32,
                                                initializer=i)

        tu.configure_ipu_system()

        with tu.ipu_session() as sess:
            sess.run(variables.global_variables_initializer())
            o = sess.run(z)
            self.assertAllClose(o, np.ones((2, 4)), 2.0, 2.0)

            # Find of the names of compute sets
            r = sess.run(report)

            s = tu.extract_all_strings_from_event_trace(r)
            cs_list = tu.get_compute_sets_from_report(s)

            ok = [
                '__seed*',
                'z1/Initializer/truncated_normal/TruncatedNormal/custom-call*/truncatedNormal'
            ]
            self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
コード例 #7
0
  def testDontOutlineInplaceExpression(self):
    with ops.device("/device:IPU:0"):
      pa = array_ops.placeholder(np.float32, [])
      pb = array_ops.placeholder(np.float32, [])
      pc = array_ops.placeholder(np.float32, [])
      pd = array_ops.placeholder(np.float32, [])
      e = pa + pb - pc + pd

    with ops.device('cpu'):
      report = gen_ipu_ops.ipu_event_trace()

    tu.configure_ipu_system()

    with tu.ipu_session() as sess:
      sess.run(report)
      fd = {pa: 1, pb: 2, pc: 3, pd: 4}
      result = sess.run(e, fd)
      self.assertAllClose(result, 4)

      result = sess.run(report)
      self.assertTrue(len(result) == 3)

      s = tu.extract_all_strings_from_event_trace(result)
      cs_list = tu.get_compute_sets_from_report(s)

      ok = [
          '__seed*', 'add/add.*/AddTo', 'sub/subtract.*/AddTo',
          'add_1/add.*/AddTo'
      ]
      self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
コード例 #8
0
  def testDepthwiseConvBackpropFilter1x1WithRelu(self):
    with ops.device("/device:IPU:0"):
      pa = array_ops.placeholder(np.float32, [1, 6, 6, 3], name="a")
      pb = constant_op.constant([1, 1, 3, 2], dtype=np.int32)  # filter sizes
      pc = array_ops.placeholder(np.float32, [1, 6, 6, 6], name="c")
      c = nn.depthwise_conv2d_native_backprop_filter(
          pa, pb, pc, strides=[1, 1, 1, 1], padding="SAME")
      c = nn.relu(c)

    with ops.device('cpu'):
      report = gen_ipu_ops.ipu_event_trace()

    tu.configure_ipu_system()

    with tu.ipu_session() as sess:
      sess.run(report)

      fd = {pa: np.zeros([1, 6, 6, 3]), pc: np.zeros([1, 6, 6, 6])}
      result = sess.run(c, fd)
      self.assertAllClose(result, np.zeros([1, 1, 3, 2]))

      result = sess.run(report)

      s = tu.extract_all_strings_from_event_trace(result)
      cs_list = tu.get_compute_sets_from_report(s)

      ok = [
          '__seed*', 'Copy_',
          'DepthwiseConv2dNativeBackpropFilter/fusion*/Conv_6x6',
          'Relu/custom-call*/Nonlinearity'
      ]
      self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
コード例 #9
0
    def testSigmoidGrad(self):
        with ops.device("/device:IPU:0"):
            pa = array_ops.placeholder(np.float32, [3], name="grad")
            pb = array_ops.placeholder(np.float32, [3], name="in")
            c = gen_math_ops.sigmoid_grad(pa, pb)

        with ops.device('cpu'):
            report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system()

        with tu.ipu_session() as sess:
            sess.run(report)

            fd = {pa: [2.0, 0.5, 1.0], pb: [-1.0, 1.0, 6.0]}
            result = sess.run(c, fd)
            self.assertAllClose(result, [2.0, 0.25, 0.0])

            result = sess.run(report)
            self.assertTrue(len(result) == 3)

            s = tu.extract_all_strings_from_event_trace(result)
            cs_list = tu.get_compute_sets_from_report(s)

            ok = ['__seed*', 'SigmoidGrad/custom-call/NonLinearityGrad']
            self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
コード例 #10
0
    def testNamedOperations(self):
        with ops.device("/device:IPU:0"):
            pa = array_ops.placeholder(np.float32, [2, 2], name="a")
            pb = array_ops.placeholder(np.float32, [2, 2], name="b")
            with ops.name_scope('my_ops'):
                out = math_ops.add(pa, pb, 'my_add_op')

        with ops.device('cpu'):
            report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system()

        with tu.ipu_session() as sess:
            fd = {pa: [[1., 1.], [2., 3.]], pb: [[0., 1.], [4., 5.]]}
            sess.run(report, fd)

            result = sess.run(out, fd)
            self.assertAllClose(result, [[1., 2.], [6., 8.]])

            rep = sess.run(report, fd)
            s = tu.extract_all_strings_from_event_trace(rep)
            cs_list = tu.get_compute_sets_from_report(s)

            ok = ['__seed*', 'my_ops/my_add_op/add']

            self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
コード例 #11
0
    def testUniformRandomNonScalarInitalizer(self):
        with ops.device('cpu'):
            report = gen_ipu_ops.ipu_event_trace()
        with ops.device("/device:IPU:0"):
            with variable_scope.variable_scope("vs", use_resource=True):
                i = init_ops.random_uniform_initializer(minval=-2.0,
                                                        maxval=2.0)
                z = variable_scope.get_variable("z1",
                                                shape=[2],
                                                dtype=np.float32,
                                                initializer=i)

        tu.configure_ipu_system()

        with tu.ipu_session() as sess:
            # Clean existing reports
            sess.run(report)
            sess.run(variables.global_variables_initializer())
            r = sess.run(report)

            o = sess.run(z)
            self.assertAllClose(o, [0.0, 0.0], 2.0, 2.0)

            s = tu.extract_all_strings_from_event_trace(r)
            cs_list = tu.get_compute_sets_from_report(s)
            ok = [
                '__seed*',
                'vs/z1/Initializer/random_uniform/RandomUniform/fusion/uniform'
            ]
            self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
コード例 #12
0
    def testScaledSubtractFromVariable(self):
        with ops.device("/device:IPU:0"):
            pa = array_ops.placeholder(np.float16, [3])
            pb = array_ops.placeholder(np.float16, [3])
            pc = array_ops.placeholder(np.float16, [1])
            c = pa - pc * pb

        with ops.device('cpu'):
            report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system()

        with tu.ipu_session() as sess:
            sess.run(report)

            fd = {pa: [2.0, 0.5, 1.0], pb: [1.0, 2.0, 3.0], pc: [2.0]}
            result = sess.run(c, fd)
            self.assertAllClose(result, [0.0, -3.5, -5.0])

            result = sess.run(report)
            self.assertTrue(len(result) == 3)

            s = tu.extract_all_strings_from_event_trace(result)
            cs_list = tu.get_compute_sets_from_report(s)

            ok = ['__seed*', 'host-exchange-local-copy-', 'sub/fusion/AddTo']
            self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
コード例 #13
0
    def testScaledSubtractFrom(self):
        with ops.device("/device:IPU:0"):
            pa = array_ops.placeholder(np.float16, [3])
            pb = array_ops.placeholder(np.float16, [3])
            const = array_ops.constant(2.0, np.float16)
            # note how const operand index varies compared to testScaledAddTo
            # still should match as it will be reordered
            c = pa - const * pb

        with ops.device('cpu'):
            report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system()

        with tu.ipu_session() as sess:
            sess.run(report)

            fd = {pa: [2.0, 0.5, 1.0], pb: [1.0, 2.0, 3.0]}
            result = sess.run(c, fd)
            self.assertAllClose(result, [0.0, -3.5, -5.0])

            result = sess.run(report)
            self.assertTrue(len(result) == 3)

            s = tu.extract_all_strings_from_event_trace(result)
            cs_list = tu.get_compute_sets_from_report(s)

            ok = ['__seed*', 'host-exchange-local-copy-', 'sub/fusion/AddTo']
            self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
コード例 #14
0
    def testConvolutionBiasApplyVariableLR(self):
        with ops.device("/device:IPU:0"):
            x = array_ops.placeholder(np.float32, shape=[1, 4, 4, 2])
            lr = array_ops.placeholder(np.float32, shape=[])

            with variable_scope.variable_scope("vs", use_resource=True):
                y = layers.Conv2D(
                    2,
                    1,
                    use_bias=True,
                    kernel_initializer=init_ops.ones_initializer())(x)
                y = layers.Conv2D(
                    2,
                    1,
                    use_bias=True,
                    kernel_initializer=init_ops.ones_initializer())(y)

            loss = math_ops.reduce_sum(y)
            optimizer = gradient_descent.GradientDescentOptimizer(lr)
            train = optimizer.minimize(loss)

            with ops.device('cpu'):
                report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system(True, True, True)

        with tu.ipu_session() as sess:
            sess.run(variables.global_variables_initializer())

            sess.run(report)

            sess.run([train, loss], {x: np.zeros([1, 4, 4, 2]), lr: 0.1})

            result = sess.run(report)
            self.assertEqual(
                len(result),
                6)  # 2xcompile, 1xupload, 1xload, 1xdownload, 1xexecute

            s = tu.extract_all_strings_from_event_trace(result)
            cs_list = tu.get_compute_sets_from_report(s)
            ok = [
                '__seed*',
                'Copy_',
                'host-exchange-local-copy-',
                'vs/conv2d/BiasAdd/fusion*/addToChannel',
                'vs/conv2d/Conv2D/convolution*',
                'vs/conv2d_1/BiasAdd/fusion.2/addToChannel',
                'GradientDescent/update_vs/conv2d/bias/ResourceApplyGradientDescent/fusion.3/ReduceFinalStage/IntermediateToOutput/Reduce',
                'GradientDescent/update_vs/conv2d/bias/ResourceApplyGradientDescent/fusion*/negate/Op/Negate',
                'gradients/vs/conv2d_1/Conv2D_grad/Conv2DBackpropFilter/fusion*/Conv_4x4/',
                'gradients/vs/conv2d_1/Conv2D_grad/Conv2DBackpropFilter/fusion*/AddTo',
                'GradientDescent/update_vs/conv2d_1/bias/ResourceApplyGradientDescent/multiply*/Op/Multiply',
                'GradientDescent/update_vs/conv2d_1/bias/ResourceApplyGradientDescent/subtract*/AddTo',
                'vs/conv2d/BiasAdd/fusion*/addToChannel',
                'Sum/reduce*/ReduceFinalStage/IntermediateToOutput/Reduce',
            ]

            self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
コード例 #15
0
    def testBatchNormAndGroupNormalizeMixedInference(self):
        with ops.device("/device:IPU:0"):
            x = array_ops.placeholder(np.float32, shape=[1, 4, 4, 2])

            with variable_scope.variable_scope("vs", use_resource=True):
                y = convolutional.conv2d(
                    x,
                    2,
                    1,
                    use_bias=False,
                    kernel_initializer=init_ops.ones_initializer())
                gamma = constant_op.constant([0.5, 0.5], np.float32)
                beta = constant_op.constant([0.5, 0.5], np.float32)
                mean = constant_op.constant([0.5, 0.5], np.float32)
                inv_std_dev = constant_op.constant([0.5, 0.5], np.float32)
                y = gen_popnn_ops.popnn_group_norm_inference(
                    inputs=y,
                    gamma=gamma,
                    beta=beta,
                    mean=mean,
                    inv_std_dev=inv_std_dev,
                    data_format="NHWC",
                    epsilon=0.0015,
                    num_groups=2)
                y = convolutional.conv2d(
                    y,
                    2,
                    1,
                    use_bias=False,
                    kernel_initializer=init_ops.ones_initializer())
                y = layers_norm.batch_normalization(y, fused=True)

            with ops.device('cpu'):
                report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system(True, True, True)

        with tu.ipu_session() as sess:
            sess.run(variables.global_variables_initializer())

            sess.run(report)

            sess.run(y, {x: np.zeros([1, 4, 4, 2])})

            result = sess.run(report)

            s = tu.extract_all_strings_from_event_trace(result)
            cs_list = tu.get_compute_sets_from_report(s)

            # Would fail if there were two batch norms in the graph
            ok = [
                '__seed*', 'host-exchange-local-copy', 'Copy_',
                'vs/conv2d/Conv2D/convolution.*/Conv_1x1/Convolve',
                'vs/PopnnGroupNormInference/custom-call*/',
                'vs/batch_normalization/FusedBatchNorm/batch-norm-inference.*/'
            ]
            self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
コード例 #16
0
    def testConvolutionsMatchFwdBwdWuVariableLR(self):
        with ops.device("/device:IPU:0"):
            x = array_ops.placeholder(np.float32, shape=[1, 4, 4, 2])
            lr = array_ops.placeholder(np.float32, shape=[])

            with variable_scope.variable_scope("vs", use_resource=True):
                y = layers.Conv2D(
                    2,
                    1,
                    use_bias=False,
                    kernel_initializer=init_ops.ones_initializer(),
                    name='conv1')(x)
                y = layers.Conv2D(
                    2,
                    1,
                    use_bias=False,
                    kernel_initializer=init_ops.ones_initializer(),
                    name='conv2')(y)
                y = layers.Conv2D(
                    2,
                    1,
                    use_bias=False,
                    kernel_initializer=init_ops.ones_initializer(),
                    name='conv3')(y)

            loss = math_ops.reduce_sum(y)
            optimizer = gradient_descent.GradientDescentOptimizer(lr)
            train = optimizer.minimize(loss)

            with ops.device('cpu'):
                report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system(True, True, True)

        with tu.ipu_session() as sess:
            sess.run(variables.global_variables_initializer())

            sess.run(report)

            sess.run([train, loss], {x: np.zeros([1, 4, 4, 2]), lr: 0.1})

            result = sess.run(report)

            s = tu.extract_all_strings_from_event_trace(result)
            cs_list = tu.get_compute_sets_from_report(s)

            # Fwd and BackpropInput should be shared
            # Weight transpose for BackpropInput should be present
            # Both BackpropFilter should be shared
            ok = [
                '__seed*', 'host-exchange-local-copy-', 'Copy_',
                'vs/conv1/Conv2D/convolution.*/Conv_1x1',
                'Sum/reduce.*/ReduceFinalStage/IntermediateToOutput/Reduce',
                'gradients/vs/conv3/Conv2D_grad/Conv2DBackpropFilter/fusion.*/Conv_4x4',
                'gradients/vs/conv3/Conv2D_grad/Conv2DBackpropFilter/fusion.*/AddTo'
            ]
            self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
コード例 #17
0
    def testFwdAndBwdMaxPool(self):
        input = np.arange(16).reshape(1, 4, 4, 1)
        output_grad = np.full((1, 2, 2, 1), 0.1)

        with ops.device("/device:IPU:0"):
            pa = array_ops.placeholder(np.float32, [1, 4, 4, 1], name="a")
            pb = array_ops.placeholder(np.float32, [1, 2, 2, 1], name="b")
            c = nn.max_pool(pa,
                            ksize=[1, 2, 2, 1],
                            strides=[1, 2, 2, 1],
                            data_format='NCHW',
                            padding='SAME')
            d = gen_nn_ops.max_pool_grad(pa,
                                         c,
                                         pb,
                                         ksize=[1, 2, 2, 1],
                                         strides=[1, 2, 2, 1],
                                         data_format='NCHW',
                                         padding='SAME')

        with ops.device('cpu'):
            report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system()

        with tu.ipu_session() as sess:
            sess.run(report)
            fe = {
                pa: input,
                pb: output_grad,
            }
            output, input_grad = sess.run((c, d), fe)
            self.assertAllClose(output, [[[[5.], [7.]], [[13.], [15.]]]])
            self.assertAllClose(
                input_grad,
                [[[[0.], [0.], [0.], [0.]], [[0.], [0.1], [0.], [0.1]],
                  [[0.], [0.], [0.], [0.]], [[0.], [0.1], [0.], [0.1]]]])

            result = sess.run(report)
            self.assertTrue(len(result) == 3)

            s = tu.extract_all_strings_from_event_trace(result)
            cs_list = tu.get_compute_sets_from_report(s)

            ok = [
                '__seed*', 'Copy_*', 'MaxPool/custom-call*/maxPool2x2/',
                'MaxPoolGrad/custom-call*/maxPool2x2'
            ]
            self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
コード例 #18
0
    def testWideConstantWithAllocationTarget(self):
        # This test will fail if the dynamic slice is not mapped correctly.
        dtype = np.float32
        shape = (512, 2, 2048)

        def my_net(y):
            def cond(i, x, y):
                return i < 2

            def body(i, x, y):
                s = array_ops.slice(x, [i, i, i], [1, 1, 2048])
                y = y + math_ops.reduce_mean(s)
                i = i + 1
                return (i, x, y)

            i = 0
            c = constant_op.constant(4, shape=shape, dtype=dtype, name="c")
            return control_flow_ops.while_loop(cond, body, (i, c, y))[2]

        with ops.device('cpu'):
            y = array_ops.placeholder(dtype, [1])
            report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system()

        with ops.device("/device:IPU:0"):
            r = xla.compile(my_net, inputs=[y])

        with tu.ipu_session() as sess:
            sess.run(report)
            y = sess.run(r, {y: [10]})
            self.assertAllClose(y[0], [18])

            result = sess.run(report)
            self.assertTrue(len(result) == 3)

            s = tu.extract_all_strings_from_event_trace(result)
            cs_list = tu.get_compute_sets_from_report(s)

            ok = [
                '__seed*', 'Copy_*_to_*',
                'while/Slice/dynamic-slice*/dynamicSlice', 'while/Mean/reduce',
                'while/Mean/multiply', 'while/add*/add*/AddTo'
            ]
            self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))

            max_tile_size = tu.get_maximum_tile_size_from_events(s)
            self.assertTrue(max_tile_size < 60000)
コード例 #19
0
    def testBatchNormalizeInferenceDontMatchDifferentTypes(self):
        with ops.device("/device:IPU:0"):
            x = array_ops.placeholder(np.float32, shape=[1, 4, 4, 2])

            with variable_scope.variable_scope("vs", use_resource=True):
                y = convolutional.conv2d(
                    x,
                    2,
                    1,
                    use_bias=False,
                    kernel_initializer=init_ops.ones_initializer())
                y = layers_norm.batch_normalization(y, fused=True)
                y = math_ops.cast(y, np.float16)
                y = convolutional.conv2d(
                    y,
                    2,
                    1,
                    use_bias=False,
                    kernel_initializer=init_ops.ones_initializer())
                y = layers_norm.batch_normalization(y, fused=True)

            with ops.device('cpu'):
                report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system(True, True, True)

        with tu.ipu_session() as sess:
            sess.run(variables.global_variables_initializer())

            sess.run(report)

            sess.run(y, {x: np.zeros([1, 4, 4, 2])})

            result = sess.run(report)

            s = tu.extract_all_strings_from_event_trace(result)
            cs_list = tu.get_compute_sets_from_report(s)
            # Matches two convolutions
            ok = [
                '__seed*', 'host-exchange-local-copy-', 'Copy_',
                'vs/conv2d/Conv2D/convolution.*/Conv_1x1',
                'vs/batch_normalization/FusedBatchNorm/batch-norm-inference.*/',
                'vs/Cast/convert.*/Cast',
                'vs/conv2d_1/Conv2D/convolution.*/Conv_1x1',
                'vs/batch_normalization_1/FusedBatchNormV2/batch-norm-inference.*/'
            ]
            self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
コード例 #20
0
    def testBatchNormalizeInferenceMatchWithSharding(self):
        with ops.device("/device:IPU:0"):
            x = array_ops.placeholder(np.float32, shape=[1, 4, 4, 2])

            with variable_scope.variable_scope("vs", use_resource=True):
                with tu.ipu_shard(0):
                    a = convolutional.conv2d(
                        x,
                        2,
                        1,
                        use_bias=False,
                        kernel_initializer=init_ops.ones_initializer())
                    b = layers_norm.batch_normalization(a, fused=True)

                with tu.ipu_shard(0):
                    c = convolutional.conv2d(
                        b,
                        2,
                        1,
                        use_bias=False,
                        kernel_initializer=init_ops.ones_initializer())
                    d = layers_norm.batch_normalization(c, fused=True)

            with ops.device('cpu'):
                report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system(True, True, True, sharded=True)

        with tu.ipu_session() as sess:
            sess.run(variables.global_variables_initializer())

            sess.run(report)

            sess.run(d, {x: np.zeros([1, 4, 4, 2])})

            result = sess.run(report)

            s = tu.extract_all_strings_from_event_trace(result)
            cs_list = tu.get_compute_sets_from_report(s)

            # Would fail if there were two batch norms in the graph
            ok = [
                '__seed*', '*OnTileCopy*',
                'vs/conv2d/Conv2D/convolution.*/Conv_1x1/Convolve',
                'vs/batch_normalization/FusedBatchNorm/batch-norm-inference.*/'
            ]
            self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
コード例 #21
0
    def testAvgPoolSameWithReshape(self):
        np.random.seed(0)
        shape = [1, 10, 10, 1]
        data = np.random.uniform(0, 1, shape)
        # The expected answer was generated using TF on the cpu
        expected = [[[[0.64431685], [0.51738459], [0.49705142], [0.60235918],
                      [0.73694557]],
                     [[0.57755166], [0.47387227], [0.40451217], [0.4876942],
                      [0.55843753]],
                     [[0.49037799], [0.4466258], [0.35829377], [0.40070742],
                      [0.37205362]],
                     [[0.47563809], [0.4075647], [0.34894851], [0.35470542],
                      [0.3322109]],
                     [[0.52914065], [0.45464769], [0.38156652], [0.32455513],
                      [0.33199897]]]]

        with ops.device("/device:IPU:0"):
            pa = array_ops.placeholder(np.float32, shape, name="a")
            output = nn.avg_pool(pa,
                                 ksize=[1, 5, 5, 1],
                                 strides=[1, 2, 2, 1],
                                 data_format='NHWC',
                                 padding='SAME',
                                 name="avg")

        with ops.device('cpu'):
            report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system(True, True, True)

        with tu.ipu_session() as sess:
            sess.run(variables.global_variables_initializer())

            sess.run(report)

            fd = {pa: data}
            result = sess.run(output, fd)
            self.assertAllClose(result, expected)

            result = sess.run(report)
            self.assertEqual(len(result), 4)

            s = tu.extract_all_strings_from_event_trace(result)
            cs_list = tu.get_compute_sets_from_report(s)
            ok = ['__seed*', 'avg/custom-call*/avgPool5x5']
            self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
コード例 #22
0
  def testConv8x8_WithBias(self):
    for fmt in self.data_formats:
      with ops.device("/device:IPU:0"):
        inp = array_ops.placeholder(
            np.float32, self._ip_shp([1, 84, 84, 4], fmt), name="inp")
        wei = array_ops.placeholder(np.float32, [8, 8, 4, 16], name="wei")
        bia = array_ops.placeholder(np.float32, [16], name="bia")
        output = nn_ops.conv2d(
            inp,
            wei,
            strides=self._ip_shp([1, 4, 4, 1], fmt),
            padding="VALID",
            data_format=fmt,
            name='cnv4')
        output = nn_ops.bias_add(output, bia, data_format=fmt, name='ba4')

      with ops.device('cpu'):
        report = gen_ipu_ops.ipu_event_trace()

    tu.configure_ipu_system()

    with tu.ipu_session() as sess:
      sess.run(report)

      fd = {
          inp: np.zeros(self._ip_shp([1, 84, 84, 4], fmt)),
          wei: np.zeros([8, 8, 4, 16]),
          bia: np.zeros([16]),
      }
      result = sess.run(output, fd)
      self.assertAllClose(result, np.zeros(self._ip_shp([1, 20, 20, 16], fmt)))

      result = sess.run(report)

      s = tu.extract_all_strings_from_event_trace(result)
      cs_list = tu.get_compute_sets_from_report(s)

      ok = [
          '__seed*', 'host-exchange-local-copy-',
          'Copy_XLA_Args/arg2.*_weights_to_cnv4*/convolution.*/Conv_8x8_stride4x4/weightsRearranged',
          'cnv4*/convolution.*/Conv_8x8_stride4x4', 'ba4*/fusion/addToChannel'
      ]
      self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
コード例 #23
0
  def tesInplaceAddCopyWithInplacePeer2(self):
    data_a = np.array([[10, -10], [-5, 5]])
    data_b = np.array([[-15, 15], [25, -25]])
    data_c = 2
    with ops.device("/device:IPU:0"):
      pa = array_ops.placeholder(np.float32, [2, 2])
      pb = array_ops.placeholder(np.float32, [2, 2])
      pc = array_ops.placeholder(np.float32, [])
      a = array_ops.transpose(pa)
      b = pa + pb * pc
      c = a * pb + pc
      d = b / c

    with ops.device('cpu'):
      report = gen_ipu_ops.ipu_event_trace()

    tu.configure_ipu_system()

    with tu.ipu_session() as sess:
      sess.run(report)
      fd = {
          pa: data_a,
          pb: data_b,
          pc: data_c,
      }
      np_result = (data_a + data_b * data_c) / (
          np.transpose(data_a) * data_b + data_c)
      result = sess.run(d, fd)
      self.assertAllClose(result, np_result)

      result = sess.run(report)
      self.assertTrue(len(result) == 3)  #compile_begin, compile_end, execute

      s = tu.extract_all_strings_from_event_trace(result)
      cs_list = tu.get_compute_sets_from_report(s)

      ok = [
          '__seed*', 'Copy_XLA_Args/arg0.*_to_transpose/transpose'
          'mul/multiply.*/Op/Multiply', 'add/add.*/AddTo',
          'mul_1/multiply.*/Op/Multiply', 'add_1/add.*/AddTo',
          'truediv/divide.*/Op/Divide'
      ]
      self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
コード例 #24
0
    def testConvolutionsDontMatchDifferentDevices(self):
        with ops.device("/device:IPU:0"):
            x = array_ops.placeholder(np.float32, shape=[1, 4, 4, 2])

            with variable_scope.variable_scope("vs", use_resource=True):
                with tu.ipu_shard(0):
                    y = layers.Conv2D(
                        2,
                        1,
                        use_bias=False,
                        kernel_initializer=init_ops.ones_initializer())(x)
                with tu.ipu_shard(1):
                    y = layers.Conv2D(
                        2,
                        1,
                        use_bias=False,
                        kernel_initializer=init_ops.ones_initializer())(y)

            with ops.device('cpu'):
                report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system(True, True, True, sharded=True)

        with tu.ipu_session() as sess:
            sess.run(variables.global_variables_initializer())

            sess.run(report)

            sess.run(y, {x: np.zeros([1, 4, 4, 2])})

            result = sess.run(report)

            s = tu.extract_all_strings_from_event_trace(result)
            cs_list = tu.get_compute_sets_from_report(s)

            # Note how there are two convolutions
            ok = [
                '__seed*', '*OnTileCopy*', 'vs/conv2d/Conv2D/convolution.*',
                'Copy_vs/conv2d/Conv2D/convolution.*',
                'vs/conv2d_1/Conv2D/convolution.*'
            ]
            self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
コード例 #25
0
    def testConvolutionsDontMatchDifferentConvParams(self):
        with ops.device("/device:IPU:0"):
            x = array_ops.placeholder(np.float32, shape=[1, 4, 4, 2])

            with variable_scope.variable_scope("vs", use_resource=True):
                y = layers.Conv2D(
                    2,
                    1,
                    use_bias=False,
                    kernel_initializer=init_ops.ones_initializer())(x)
                y = layers.Conv2D(
                    2,
                    1,
                    use_bias=False,
                    strides=(2, 1),
                    kernel_initializer=init_ops.ones_initializer())(y)

            with ops.device('cpu'):
                report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system(True, True, True)

        with tu.ipu_session() as sess:
            sess.run(variables.global_variables_initializer())

            sess.run(report)

            sess.run(y, {x: np.zeros([1, 4, 4, 2])})

            result = sess.run(report)

            s = tu.extract_all_strings_from_event_trace(result)
            cs_list = tu.get_compute_sets_from_report(s)
            # Matches two convolutions
            ok = [
                '__seed*', 'Copy_*weightsRearranged',
                'host-exchange-local-copy-',
                'vs/conv2d/Conv2D/convolution.*/Conv_1x1',
                'vs/conv2d_1/Conv2D/convolution.*/Conv_1x1'
            ]
            self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
コード例 #26
0
    def test3DConv8x8x8_WithBias(self):
        with ops.device("/device:IPU:0"):
            inp = array_ops.placeholder(np.float32, [1, 84, 84, 84, 2],
                                        name="inp")
            wei = array_ops.placeholder(np.float32, [8, 8, 8, 2, 4],
                                        name="wei")
            bia = array_ops.placeholder(np.float32, [4], name="bia")
            output = nn_ops.conv3d(inp,
                                   wei,
                                   strides=[1, 4, 4, 4, 1],
                                   padding="VALID")
            output = nn_ops.bias_add(output, bia)

        with ops.device('cpu'):
            report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system()

        with tu.ipu_session() as sess:
            sess.run(report)

            fd = {
                inp: np.zeros([1, 84, 84, 84, 2]),
                wei: np.zeros([8, 8, 8, 2, 4]),
                bia: np.zeros([4]),
            }
            result = sess.run(output, fd)
            self.assertAllClose(result, np.zeros([1, 20, 20, 20, 4]))

            result = sess.run(report)

            s = tu.extract_all_strings_from_event_trace(result)
            cs_list = tu.get_compute_sets_from_report(s)

            ok = [
                '__seed*', 'host-exchange-local-copy-', 'Copy_',
                'Conv3D/convolution.*/Conv_8x8x8_stride4x4x4',
                'BiasAdd/fusion/addToChannel'
            ]
            self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
コード例 #27
0
    def testAvgPoolValidWithBroadcast(self):
        np.random.seed(0)
        shape = [1, 10, 10, 1]
        data = np.random.uniform(0, 1, shape)
        # The expected answer was generated using TF on the cpu
        expected = [[[[0.52647954], [0.44196457], [0.49284577]],
                     [[0.44039682], [0.44067329], [0.44934618]],
                     [[0.46444583], [0.45419583], [0.38236427]]]]

        with ops.device("/device:IPU:0"):
            pa = array_ops.placeholder(np.float32, shape, name="a")
            output = nn.avg_pool(pa,
                                 ksize=[1, 5, 5, 1],
                                 strides=[1, 2, 2, 1],
                                 data_format='NHWC',
                                 padding='VALID',
                                 name="avg")

        with ops.device('cpu'):
            report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system(True, True, True)

        with tu.ipu_session() as sess:
            sess.run(variables.global_variables_initializer())

            sess.run(report)

            fd = {pa: data}
            result = sess.run(output, fd)
            self.assertAllClose(result, expected)

            result = sess.run(report)
            self.assertEqual(len(result), 4)

            s = tu.extract_all_strings_from_event_trace(result)
            cs_list = tu.get_compute_sets_from_report(s)

            ok = ['__seed*', 'avg/custom-call*/avgPool5x5']
            self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
コード例 #28
0
    def testConvWithBnAndRelu(self):
        with ops.device("/device:IPU:0"):
            x = array_ops.placeholder(np.float32, shape=[1, 4, 4, 2])
            with variable_scope.variable_scope("vs", use_resource=True):
                y = layers.Conv2D(
                    2,
                    1,
                    use_bias=True,
                    kernel_initializer=init_ops.ones_initializer())(x)
                y = layers_norm.batch_normalization(y, fused=True)
                y = nn_ops.relu(y)

            with ops.device('cpu'):
                report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system(True, True, True)

        with tu.ipu_session() as sess:

            sess.run(variables.global_variables_initializer())

            sess.run(report)

            sess.run(y, {x: np.zeros([1, 4, 4, 2])})

            result = sess.run(report)
            self.assertEqual(
                len(result),
                6)  # 2xcompile, 1xupload 1xload, 1xdownload, 1xexecute

            s = tu.extract_all_strings_from_event_trace(result)
            cs_list = tu.get_compute_sets_from_report(s)

            ok = [
                '__seed*', 'host-exchange-local-copy', 'Copy_',
                'vs/conv2d/Conv2D/convolution.*/Conv_1x1', 'vs/conv2d/BiasAdd',
                'vs/batch_normalization/FusedBatchNorm/batch-norm-inference.*/',
                'vs/Relu/custom-call/Nonlinearity'
            ]
            self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
コード例 #29
0
  def testInplaceTuple(self):
    def my_net(x):
      def cond(i, x, y):
        return i < 1

      def body(i, x, y):
        i = i + 1
        x = nn.tanh(x)
        y = nn.tanh(y)
        return (i, x, y)

      i = 0
      return control_flow_ops.while_loop(cond, body, (i, x, x))[1:]

    with ops.device('cpu'):
      x = array_ops.placeholder(np.float32, [4])
      report = gen_ipu_ops.ipu_event_trace()

    tu.configure_ipu_system()

    with ops.device("/device:IPU:0"):
      r = xla.compile(my_net, inputs=[x])

    with tu.ipu_session() as sess:
      sess.run(report)
      x, y = sess.run(r, {x: np.full([4], 2)})
      self.assertAllClose(x, np.full([4], np.tanh(2)))
      self.assertAllClose(y, np.full([4], np.tanh(2)))

      result = sess.run(report)
      self.assertTrue(len(result) == 3)

      s = tu.extract_all_strings_from_event_trace(result)
      cs_list = tu.get_compute_sets_from_report(s)

      ok = [
          '__seed*', 'Copy_*_to_*', 'while/Tanh/tanh*/Op/Tanh',
          'while/Tanh_1/tanh*/Op/Tanh'
      ]
      self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
コード例 #30
0
  def testConv3x3_WithBias(self):
    for fmt in self.data_formats:
      with ops.device("/device:IPU:0"):
        pa = array_ops.placeholder(
            np.float32, self._ip_shp([1, 14, 14, 64], fmt), name="a")
        pb = array_ops.placeholder(np.float32, [3, 3, 64, 128], name="b")
        bi = array_ops.placeholder(np.float32, [128], name="b")
        output = nn_ops.convolution(
            pa, pb, padding="SAME", data_format=fmt, name='cnv3')
        output = nn_ops.bias_add(output, bi, data_format=fmt, name='ba3')

      with ops.device('cpu'):
        report = gen_ipu_ops.ipu_event_trace()

    tu.configure_ipu_system()

    with tu.ipu_session() as sess:
      sess.run(report)

      fd = {
          pa: np.zeros(self._ip_shp([1, 14, 14, 64], fmt)),
          pb: np.zeros([3, 3, 64, 128]),
          bi: np.zeros([128]),
      }
      result = sess.run(output, fd)
      self.assertAllClose(result, np.zeros(
          self._ip_shp([1, 14, 14, 128], fmt)))

      result = sess.run(report)

      s = tu.extract_all_strings_from_event_trace(result)
      cs_list = tu.get_compute_sets_from_report(s)

      ok = [
          '__seed*', 'Copy_*actsRearranged', 'host-exchange-local-copy-',
          'cnv3*/convolution.*/Conv_3x3', 'ba3*/fusion/addToChannel'
      ]

      self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))