Ejemplo n.º 1
0
    def testBatchNormalizeFused(self):
        x = array_ops.placeholder(np.float32, [4, 64, 64, 4], name="a")

        with ops.device("/device:IPU:0"):
            with variable_scope.variable_scope("", use_resource=True):

                beta = variable_scope.get_variable(
                    "x",
                    dtype=np.float32,
                    shape=[4],
                    initializer=init_ops.constant_initializer(0.0))
                gamma = variable_scope.get_variable(
                    "y",
                    dtype=np.float32,
                    shape=[4],
                    initializer=init_ops.constant_initializer(1.0))

                b_mean, b_var = nn.moments(x, [0, 1, 2], name='moments')

                normed = nn.fused_batch_norm(x,
                                             gamma,
                                             beta,
                                             b_mean,
                                             b_var,
                                             is_training=False)

        with ops.device('cpu'):
            report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system()

        with tu.ipu_session() as sess:
            sess.run(report)

            sess.run(variables.global_variables_initializer())
            result, _, _ = sess.run(normed, {x: np.zeros([4, 64, 64, 4])})
            self.assertAllClose(result, np.zeros([4, 64, 64, 4]))

            rep = sess.run(report)
            s = tu.extract_all_strings_from_event_trace(rep)
            cs = tu.get_compute_sets_from_report(s)

            bl = ['*convert*/Cast*']
            self.assertTrue(tu.check_compute_sets_not_in_blacklist(cs, bl))
    def testBatchNormalizeInference(self):
        with ops.device("/device:IPU:0"):
            x = array_ops.placeholder(np.float32, shape=[1, 4, 4, 2])

            with variable_scope.variable_scope("vs", use_resource=True):
                y = convolutional.conv2d(
                    x,
                    2,
                    1,
                    use_bias=False,
                    kernel_initializer=init_ops.ones_initializer())
                y = layers_norm.batch_normalization(y, fused=True)
                y = convolutional.conv2d(
                    y,
                    2,
                    1,
                    use_bias=False,
                    kernel_initializer=init_ops.ones_initializer())
                y = layers_norm.batch_normalization(y, fused=True)

            with ops.device('cpu'):
                report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system(True, True, True)

        with tu.ipu_session() as sess:
            sess.run(variables.global_variables_initializer())

            sess.run(report)

            sess.run(y, {x: np.zeros([1, 4, 4, 2])})

            result = sess.run(report)

            s = tu.extract_all_strings_from_event_trace(result)
            cs_list = tu.get_compute_sets_from_report(s)

            # Would fail if there were two batch norms in the graph
            ok = [
                '__seed*', 'host-exchange-local-copy', 'Copy_',
                'vs/conv2d/Conv2D/convolution.*/Conv_1x1/Convolve',
                'vs/batch_normalization/FusedBatchNorm/batch-norm-inference.*/'
            ]
            self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
    def testEngineCompilationOptions(self):
        with ops.device("/device:IPU:0"):
            pa = array_ops.placeholder(np.float32, [480], name="a")
            pb = array_ops.placeholder(np.float32, [480], name="b")
            output = pa + pb

        tu.configure_ipu_system(True,
                                True,
                                True,
                                engine_opts={"some_option": "some_value"})

        try:
            with session_lib.Session() as sess:
                fd = {pa: np.zeros([480]), pb: np.zeros([480])}
                sess.run(output, fd)

                self.assertTrue(False)
        except errors.InvalidArgumentError:
            pass
    def testIpuModelDeviceWithNoReport(self):
        with ops.device("/device:IPU:0"):
            pa = array_ops.placeholder(np.float32, [2, 2], name="a")
            pb = array_ops.placeholder(np.float32, [2, 2], name="b")
            output = pa + pb

        with ops.device('cpu'):
            with ops.control_dependencies([output]):
                report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system(False, False, False)

        with session_lib.Session() as sess:
            fd = {pa: [[1., 1.], [2., 3.]], pb: [[0., 1.], [4., 5.]]}
            sess.run(report, fd)

            result, rep = sess.run([output, report], fd)
            self.assertAllClose(result, [[1., 2.], [6., 8.]])
            self.assertTrue(len(rep) == 0)
Ejemplo n.º 5
0
  def testConv8x8_WithBias(self):
    for fmt in self.data_formats:
      with ops.device("/device:IPU:0"):
        inp = array_ops.placeholder(
            np.float32, self._ip_shp([1, 84, 84, 4], fmt), name="inp")
        wei = array_ops.placeholder(np.float32, [8, 8, 4, 16], name="wei")
        bia = array_ops.placeholder(np.float32, [16], name="bia")
        output = nn_ops.conv2d(
            inp,
            wei,
            strides=self._ip_shp([1, 4, 4, 1], fmt),
            padding="VALID",
            data_format=fmt,
            name='cnv4')
        output = nn_ops.bias_add(output, bia, data_format=fmt, name='ba4')

      with ops.device('cpu'):
        report = gen_ipu_ops.ipu_event_trace()

    tu.configure_ipu_system()

    with tu.ipu_session() as sess:
      sess.run(report)

      fd = {
          inp: np.zeros(self._ip_shp([1, 84, 84, 4], fmt)),
          wei: np.zeros([8, 8, 4, 16]),
          bia: np.zeros([16]),
      }
      result = sess.run(output, fd)
      self.assertAllClose(result, np.zeros(self._ip_shp([1, 20, 20, 16], fmt)))

      result = sess.run(report)

      s = tu.extract_all_strings_from_event_trace(result)
      cs_list = tu.get_compute_sets_from_report(s)

      ok = [
          '__seed*', 'host-exchange-local-copy-',
          'Copy_XLA_Args/arg2.*_weights_to_cnv4*/convolution.*/Conv_8x8_stride4x4/weightsRearranged',
          'cnv4*/convolution.*/Conv_8x8_stride4x4', 'ba4*/fusion/addToChannel'
      ]
      self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
    def testConvolutionsDontMatchDifferentTypes(self):
        with ops.device("/device:IPU:0"):
            x = array_ops.placeholder(np.float32, shape=[1, 4, 4, 2])

            with variable_scope.variable_scope("vs", use_resource=True):
                y = layers.Conv2D(
                    2,
                    1,
                    use_bias=False,
                    kernel_initializer=init_ops.ones_initializer())(x)
                y = math_ops.cast(y, np.float16)
                y = layers.Conv2D(
                    2,
                    1,
                    use_bias=False,
                    kernel_initializer=init_ops.ones_initializer())(y)

            with ops.device('cpu'):
                report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system(True, True, True)

        with tu.ipu_session() as sess:
            sess.run(variables.global_variables_initializer())

            sess.run(report)

            sess.run(y, {x: np.zeros([1, 4, 4, 2])})

            result = sess.run(report)

            s = tu.extract_all_strings_from_event_trace(result)
            cs_list = tu.get_compute_sets_from_report(s)
            # Matches two convolutions
            ok = [
                '__seed*', 'Copy_*weightsRearranged',
                'host-exchange-local-copy-',
                'Copy_vs/conv2d_1/Conv2D/convolution.7/Conv_1x1/cast_',
                'vs/conv2d/Conv2D/convolution.*/Conv_1x1',
                'vs/Cast/convert.*/Cast',
                'vs/conv2d_1/Conv2D/convolution.*/Conv_1x1'
            ]
            self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
Ejemplo n.º 7
0
    def testBatchNormalizeLayerFusedTrainingFp16(self):
        # This test checks for the correct behaviour in batch norm grad when
        # perofrming training, but the batch norm attribute `training` is False
        with ops.device("/device:IPU:0"):
            with variable_scope.variable_scope("", use_resource=True):
                x = array_ops.placeholder(np.float16, [4, 64, 64, 4], name="a")
                normed = layers_norm.batch_normalization(x,
                                                         fused=True,
                                                         training=False)
            loss = math_ops.reduce_sum(normed)
            optimizer = gradient_descent.GradientDescentOptimizer(0.1)
            train = optimizer.minimize(loss)

        tu.configure_ipu_system()

        with tu.ipu_session() as sess:
            sess.run(variables.global_variables_initializer())
            result = sess.run([normed, train], {x: np.zeros([4, 64, 64, 4])})
            self.assertAllClose(result[0], np.zeros([4, 64, 64, 4]))
Ejemplo n.º 8
0
    def testBiasApplyVariableLR(self):
        input = np.ones((1, 4, 4, 2))

        with ops.device("/device:IPU:0"):
            x = array_ops.placeholder(np.float16, shape=[1, 4, 4, 2])
            lr = array_ops.placeholder(np.float16, shape=[])
            with variable_scope.variable_scope("vs", use_resource=True):
                y = layers.Conv2D(
                    2,
                    1,
                    use_bias=True,
                    kernel_initializer=init_ops.ones_initializer(),
                    bias_initializer=init_ops.ones_initializer(),
                    name="a")(x)
                y = nn.relu(y)

            loss = math_ops.reduce_sum(y)
            optimizer = gradient_descent.GradientDescentOptimizer(lr)
            train = optimizer.minimize(loss)

        with ops.device('cpu'):
            report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system()

        with tu.ipu_session() as sess:
            sess.run(variables.global_variables_initializer())
            sess.run(report)
            fe = {
                x: input,
                lr: 0.1,
            }
            l, _ = sess.run((loss, train), fe)
            tvars = variables.global_variables()
            tvars_vals = sess.run(tvars)

            found = False
            for var, val in zip(tvars, tvars_vals):
                if var.name == "vs/a/bias:0":
                    # Value computed using the CPU backend
                    self.assertAllClose(val, [-0.6, -0.6], atol=0.001)
                    found = True
            self.assertTrue(found)
  def tesInplaceAddCopyWithInplacePeer2(self):
    data_a = np.array([[10, -10], [-5, 5]])
    data_b = np.array([[-15, 15], [25, -25]])
    data_c = 2
    with ops.device("/device:IPU:0"):
      pa = array_ops.placeholder(np.float32, [2, 2])
      pb = array_ops.placeholder(np.float32, [2, 2])
      pc = array_ops.placeholder(np.float32, [])
      a = array_ops.transpose(pa)
      b = pa + pb * pc
      c = a * pb + pc
      d = b / c

    with ops.device('cpu'):
      report = gen_ipu_ops.ipu_event_trace()

    tu.configure_ipu_system()

    with tu.ipu_session() as sess:
      sess.run(report)
      fd = {
          pa: data_a,
          pb: data_b,
          pc: data_c,
      }
      np_result = (data_a + data_b * data_c) / (
          np.transpose(data_a) * data_b + data_c)
      result = sess.run(d, fd)
      self.assertAllClose(result, np_result)

      result = sess.run(report)
      self.assertTrue(len(result) == 3)  #compile_begin, compile_end, execute

      s = tu.extract_all_strings_from_event_trace(result)
      cs_list = tu.get_compute_sets_from_report(s)

      ok = [
          '__seed*', 'Copy_XLA_Args/arg0.*_to_transpose/transpose'
          'mul/multiply.*/Op/Multiply', 'add/add.*/AddTo',
          'mul_1/multiply.*/Op/Multiply', 'add_1/add.*/AddTo',
          'truediv/divide.*/Op/Divide'
      ]
      self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
Ejemplo n.º 10
0
    def testConvolutionsDontMatchDifferentDevices(self):
        with ops.device("/device:IPU:0"):
            x = array_ops.placeholder(np.float32, shape=[1, 4, 4, 2])

            with variable_scope.variable_scope("vs", use_resource=True):
                with tu.ipu_shard(0):
                    y = layers.Conv2D(
                        2,
                        1,
                        use_bias=False,
                        kernel_initializer=init_ops.ones_initializer())(x)
                with tu.ipu_shard(1):
                    y = layers.Conv2D(
                        2,
                        1,
                        use_bias=False,
                        kernel_initializer=init_ops.ones_initializer())(y)

            with ops.device('cpu'):
                report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system(True, True, True, sharded=True)

        with tu.ipu_session() as sess:
            sess.run(variables.global_variables_initializer())

            sess.run(report)

            sess.run(y, {x: np.zeros([1, 4, 4, 2])})

            result = sess.run(report)

            s = tu.extract_all_strings_from_event_trace(result)
            cs_list = tu.get_compute_sets_from_report(s)

            # Note how there are two convolutions
            ok = [
                '__seed*', '*OnTileCopy*', 'vs/conv2d/Conv2D/convolution.*',
                'Copy_vs/conv2d/Conv2D/convolution.*',
                'vs/conv2d_1/Conv2D/convolution.*'
            ]
            self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
Ejemplo n.º 11
0
        def executeModel(inputs, expected):

            # Decide what the output type should be.
            data_type = inputs["on"].dtype

            # The actual model function which perfoms the one-hot operation based on the inputs given to executeModel.
            def model(a):
                return array_ops.one_hot(a,
                                         inputs["n_classes"],
                                         dtype=data_type,
                                         on_value=inputs["on"],
                                         off_value=inputs["off"],
                                         axis=inputs["axis"])

            # We run once on the CPU to get the expected result, then on the IPU to compare the two.
            cpuRun = expected is None

            with ops.device('cpu'):
                pa = array_ops.placeholder(np.int32, inputs["shape"], name="a")
                report = gen_ipu_ops.ipu_event_trace()

            # Check if we should be running on IPU or cpu.
            device = "cpu:0" if cpuRun else "/device:IPU:0"

            with ops.device(device):
                out = model(pa)

            tu.configure_ipu_system()

            with tu.ipu_session() as sess:
                sess.run(report)

                in_data = np.array(inputs["in_values"])

                fd = {pa: in_data}
                result = sess.run(out, fd)

                if cpuRun:
                    return result
                else:
                    self.assertAllClose(result, expected)
Ejemplo n.º 12
0
    def testAvgPoolValidWithBroadcast(self):
        np.random.seed(0)
        shape = [1, 10, 10, 1]
        data = np.random.uniform(0, 1, shape)
        # The expected answer was generated using TF on the cpu
        expected = [[[[0.52647954], [0.44196457], [0.49284577]],
                     [[0.44039682], [0.44067329], [0.44934618]],
                     [[0.46444583], [0.45419583], [0.38236427]]]]

        with ops.device("/device:IPU:0"):
            pa = array_ops.placeholder(np.float32, shape, name="a")
            output = nn.avg_pool(pa,
                                 ksize=[1, 5, 5, 1],
                                 strides=[1, 2, 2, 1],
                                 data_format='NHWC',
                                 padding='VALID',
                                 name="avg")

        with ops.device('cpu'):
            report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system(True, True, True)

        with tu.ipu_session() as sess:
            sess.run(variables.global_variables_initializer())

            sess.run(report)

            fd = {pa: data}
            result = sess.run(output, fd)
            self.assertAllClose(result, expected)

            result = sess.run(report)
            self.assertEqual(len(result), 4)

            s = tu.extract_all_strings_from_event_trace(result)
            cs_list = tu.get_compute_sets_from_report(s)

            ok = ['__seed*', 'avg/custom-call*/avgPool5x5']
            self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
  def testInplaceTuple(self):
    def my_net(x):
      def cond(i, x, y):
        return i < 1

      def body(i, x, y):
        i = i + 1
        x = nn.tanh(x)
        y = nn.tanh(y)
        return (i, x, y)

      i = 0
      return control_flow_ops.while_loop(cond, body, (i, x, x))[1:]

    with ops.device('cpu'):
      x = array_ops.placeholder(np.float32, [4])
      report = gen_ipu_ops.ipu_event_trace()

    tu.configure_ipu_system()

    with ops.device("/device:IPU:0"):
      r = xla.compile(my_net, inputs=[x])

    with tu.ipu_session() as sess:
      sess.run(report)
      x, y = sess.run(r, {x: np.full([4], 2)})
      self.assertAllClose(x, np.full([4], np.tanh(2)))
      self.assertAllClose(y, np.full([4], np.tanh(2)))

      result = sess.run(report)
      self.assertTrue(len(result) == 3)

      s = tu.extract_all_strings_from_event_trace(result)
      cs_list = tu.get_compute_sets_from_report(s)

      ok = [
          '__seed*', 'Copy_*_to_*', 'while/Tanh/tanh*/Op/Tanh',
          'while/Tanh_1/tanh*/Op/Tanh'
      ]
      self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
Ejemplo n.º 14
0
    def test3DConv8x8x8_WithBias(self):
        with ops.device("/device:IPU:0"):
            inp = array_ops.placeholder(np.float32, [1, 84, 84, 84, 2],
                                        name="inp")
            wei = array_ops.placeholder(np.float32, [8, 8, 8, 2, 4],
                                        name="wei")
            bia = array_ops.placeholder(np.float32, [4], name="bia")
            output = nn_ops.conv3d(inp,
                                   wei,
                                   strides=[1, 4, 4, 4, 1],
                                   padding="VALID")
            output = nn_ops.bias_add(output, bia)

        with ops.device('cpu'):
            report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system()

        with tu.ipu_session() as sess:
            sess.run(report)

            fd = {
                inp: np.zeros([1, 84, 84, 84, 2]),
                wei: np.zeros([8, 8, 8, 2, 4]),
                bia: np.zeros([4]),
            }
            result = sess.run(output, fd)
            self.assertAllClose(result, np.zeros([1, 20, 20, 20, 4]))

            result = sess.run(report)

            s = tu.extract_all_strings_from_event_trace(result)
            cs_list = tu.get_compute_sets_from_report(s)

            ok = [
                '__seed*', 'host-exchange-local-copy-', 'Copy_',
                'Conv3D/convolution.*/Conv_8x8x8_stride4x4x4',
                'BiasAdd/fusion/addToChannel'
            ]
            self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
Ejemplo n.º 15
0
    def testConvWithBnAndRelu(self):
        with ops.device("/device:IPU:0"):
            x = array_ops.placeholder(np.float32, shape=[1, 4, 4, 2])
            with variable_scope.variable_scope("vs", use_resource=True):
                y = layers.Conv2D(
                    2,
                    1,
                    use_bias=True,
                    kernel_initializer=init_ops.ones_initializer())(x)
                y = layers_norm.batch_normalization(y, fused=True)
                y = nn_ops.relu(y)

            with ops.device('cpu'):
                report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system(True, True, True)

        with tu.ipu_session() as sess:

            sess.run(variables.global_variables_initializer())

            sess.run(report)

            sess.run(y, {x: np.zeros([1, 4, 4, 2])})

            result = sess.run(report)
            self.assertEqual(
                len(result),
                6)  # 2xcompile, 1xupload 1xload, 1xdownload, 1xexecute

            s = tu.extract_all_strings_from_event_trace(result)
            cs_list = tu.get_compute_sets_from_report(s)

            ok = [
                '__seed*', 'host-exchange-local-copy', 'Copy_',
                'vs/conv2d/Conv2D/convolution.*/Conv_1x1', 'vs/conv2d/BiasAdd',
                'vs/batch_normalization/FusedBatchNorm/batch-norm-inference.*/',
                'vs/Relu/custom-call/Nonlinearity'
            ]
            self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
Ejemplo n.º 16
0
    def test3DConvBackpropInput(self):
        with ops.device("/device:IPU:0"):
            ins = constant_op.constant([2, 8, 8, 8, 3], np.int32)
            fil = array_ops.placeholder(np.float32, [2, 2, 2, 3, 5],
                                        name="inp")
            bck = array_ops.placeholder(np.float32, [2, 8, 8, 8, 5],
                                        name="wei")

            output = nn_ops.conv3d_backprop_input_v2(ins,
                                                     fil,
                                                     bck,
                                                     strides=[1, 1, 1, 1, 1],
                                                     padding="SAME")

        with ops.device('cpu'):
            report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system()

        with tu.ipu_session() as sess:
            sess.run(report)

            fd = {
                fil: np.zeros([2, 2, 2, 3, 5]),
                bck: np.zeros([2, 8, 8, 8, 5]),
            }
            result = sess.run(output, fd)
            self.assertAllClose(result, np.zeros([2, 8, 8, 8, 3]))

            result = sess.run(report)

            s = tu.extract_all_strings_from_event_trace(result)
            cs_list = tu.get_compute_sets_from_report(s)

            ok = [
                '__seed*', 'Copy_', 'Conv3DBackpropInputV2/fusion*/Conv_2x2x2'
            ]

            self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
    def testConvolutionsMatch(self):
        with ops.device("/device:IPU:0"):
            x = array_ops.placeholder(np.float32, shape=[1, 4, 4, 2])

            with variable_scope.variable_scope("vs", use_resource=True):
                y = layers.Conv2D(
                    2,
                    1,
                    use_bias=False,
                    kernel_initializer=init_ops.ones_initializer())(x)
                y = layers.Conv2D(
                    2,
                    1,
                    use_bias=False,
                    kernel_initializer=init_ops.ones_initializer())(y)

            with ops.device('cpu'):
                report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system(True, True, True)

        with tu.ipu_session() as sess:
            sess.run(variables.global_variables_initializer())

            sess.run(report)

            sess.run(y, {x: np.zeros([1, 4, 4, 2])})

            result = sess.run(report)

            s = tu.extract_all_strings_from_event_trace(result)
            cs_list = tu.get_compute_sets_from_report(s)
            # Would fail if there were two convolutions in the graph as they would be
            # called conv2d and conv2d_1
            ok = [
                '__seed*', 'host-exchange-local-copy-',
                'vs/conv2d/Conv2D/convolution.*/Conv_1x1', 'Copy_'
            ]
            self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
Ejemplo n.º 18
0
  def testConv3x3_WithBias(self):
    for fmt in self.data_formats:
      with ops.device("/device:IPU:0"):
        pa = array_ops.placeholder(
            np.float32, self._ip_shp([1, 14, 14, 64], fmt), name="a")
        pb = array_ops.placeholder(np.float32, [3, 3, 64, 128], name="b")
        bi = array_ops.placeholder(np.float32, [128], name="b")
        output = nn_ops.convolution(
            pa, pb, padding="SAME", data_format=fmt, name='cnv3')
        output = nn_ops.bias_add(output, bi, data_format=fmt, name='ba3')

      with ops.device('cpu'):
        report = gen_ipu_ops.ipu_event_trace()

    tu.configure_ipu_system()

    with tu.ipu_session() as sess:
      sess.run(report)

      fd = {
          pa: np.zeros(self._ip_shp([1, 14, 14, 64], fmt)),
          pb: np.zeros([3, 3, 64, 128]),
          bi: np.zeros([128]),
      }
      result = sess.run(output, fd)
      self.assertAllClose(result, np.zeros(
          self._ip_shp([1, 14, 14, 128], fmt)))

      result = sess.run(report)

      s = tu.extract_all_strings_from_event_trace(result)
      cs_list = tu.get_compute_sets_from_report(s)

      ok = [
          '__seed*', 'Copy_*actsRearranged', 'host-exchange-local-copy-',
          'cnv3*/convolution.*/Conv_3x3', 'ba3*/fusion/addToChannel'
      ]

      self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
Ejemplo n.º 19
0
    def testFullyConnectedWithBias(self):
        with ops.device("/device:IPU:0"):
            x = array_ops.placeholder(np.float32, shape=[2, 2])
            weights = array_ops.placeholder(np.float32, shape=[2, 2])
            bias = array_ops.placeholder(np.float32, shape=[2])
            x_new = nn.xw_plus_b(x, weights, bias)

            with ops.device('cpu'):
                report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system(True, True, True)

        with tu.ipu_session() as sess:

            sess.run(report)

            out = sess.run(
                x_new, {
                    x: np.full([2, 2], 3),
                    weights: np.full([2, 2], 4),
                    bias: np.ones([2]),
                })
            self.assertAllClose(np.full([2, 2], 25), out)

            result = sess.run(report)
            self.assertEqual(len(result),
                             4)  # 1xcompile, 1xload, 1xdownload, 1xexecute

            s = tu.extract_all_strings_from_event_trace(result)
            cs_list = tu.get_compute_sets_from_report(s)

            ok = [
                '__seed*', 'host-exchange-local-copy',
                'xw_plus_b/MatMul/dot.*/Conv_1/Convolve',
                'xw_plus_b/fusion/addToChannel'
            ]
            self.assertTrue(
                tu.check_compute_sets_in_whitelist_entries(cs_list, ok))
  def testInplaceOpAddCopyWithInplaceParent(self):
    with ops.device("/device:IPU:0"):
      pa = array_ops.placeholder(np.float32, [3])
      pb = array_ops.placeholder(np.float32, [3])
      pc = array_ops.placeholder(np.float32, [])
      c = array_ops.slice(pa, [0], [2])
      d = array_ops.slice(pb, [0], [2])
      e = c + d
      f = e / pc
      g = array_ops.slice(pa, [1], [2])
      h = f + g

    with ops.device('cpu'):
      report = gen_ipu_ops.ipu_event_trace()

    tu.configure_ipu_system()

    with tu.ipu_session() as sess:
      sess.run(report)
      fd = {
          pa: [1, 2, 3],
          pb: [5, 6, 7],
          pc: 2,
      }
      result = sess.run(h, fd)
      self.assertAllClose(result, [5, 7])

      result = sess.run(report)
      self.assertTrue(len(result) == 3)

      s = tu.extract_all_strings_from_event_trace(result)
      cs_list = tu.get_compute_sets_from_report(s)

      ok = [
          '__seed*', 'Copy_XLA_Args/arg*_to_Slice*/slice*.clone',
          'add/add.*/AddTo', 'truediv/divide.*/Op/Divide', 'add_1/add.*/AddTo'
      ]
      self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
    def testIpuModelDeviceWithReport(self):
        with ops.device("/device:IPU:0"):
            pa = array_ops.placeholder(np.float32, [2, 2], name="a")
            pb = array_ops.placeholder(np.float32, [2, 2], name="b")
            output = pa + pb

        with ops.device('cpu'):
            with ops.control_dependencies([output]):
                report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system()

        with session_lib.Session() as sess:
            fd = {pa: [[1., 1.], [2., 3.]], pb: [[0., 1.], [4., 5.]]}
            sess.run(report, fd)

            result, rep = sess.run([output, report], fd)
            self.assertAllClose(result, [[1., 2.], [6., 8.]])
            self.assertEqual(len(rep), 3)
            evts = tu.extract_all_events(rep)
            self.assertEqual(evts[0].type, IpuTraceEvent.COMPILE_BEGIN)
            self.assertEqual(evts[1].type, IpuTraceEvent.COMPILE_END)
            self.assertEqual(evts[2].type, IpuTraceEvent.EXECUTE)
Ejemplo n.º 22
0
    def testRelu(self):
        with ops.device("/device:IPU:0"):
            pa = array_ops.placeholder(np.float32, [3], name="a")
            c = nn_ops.relu(pa)

        with ops.device('cpu'):
            report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system()

        with tu.ipu_session() as sess:
            fd = {pa: [-6.0, 0.0, 6.0]}
            result = sess.run(c, fd)
            self.assertAllClose(result, [0.0, 0.0, 6.0])

            result = sess.run(report)
            self.assertTrue(len(result) == 3)

            s = tu.extract_all_strings_from_event_trace(result)
            cs_list = tu.get_compute_sets_from_report(s)

            ok = ['__seed*', 'Relu/custom-call/Nonlinearity']
            self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
  def tesInplaceAddCopyWithInplacePeer(self):
    data_a = np.array([[10, -20], [5, 1]])
    data_b = np.array([[-12, 11], [12, -13]])
    with ops.device("/device:IPU:0"):
      pa = array_ops.placeholder(np.float32, [2, 2])
      pb = array_ops.placeholder(np.float32, [2, 2])
      c = array_ops.transpose(pa)
      d = pa + pb
      e = c / d

    with ops.device('cpu'):
      report = gen_ipu_ops.ipu_event_trace()

    tu.configure_ipu_system()

    with tu.ipu_session() as sess:
      sess.run(report)
      fd = {
          pa: data_a,
          pb: data_b,
      }
      result = sess.run(e, fd)
      np_result = np.transpose(data_a) / (data_a + data_b)
      self.assertAllClose(result, np_result)

      result = sess.run(report)
      self.assertTrue(len(result) == 3)  #compile_begin, compile_end, execute

      s = tu.extract_all_strings_from_event_trace(result)
      cs_list = tu.get_compute_sets_from_report(s)

      ok = [
          '__seed*', 'host-exchange-local-copy-',
          'Copy_XLA_Args/arg0.*_to_transpose/transpose', 'add/add.*/AddTo',
          'truediv/divide.*/Op/Divide'
      ]
      self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
Ejemplo n.º 24
0
  def testDepthwiseConv3x1(self):
    with ops.device("/device:IPU:0"):
      pa = array_ops.placeholder(np.float32, [1, 2, 2, 3], name="a")
      pb = array_ops.placeholder(np.float32, [1, 1, 3, 1], name="b")
      pc = array_ops.placeholder(np.float32, [3], name="c")
      c = nn.depthwise_conv2d(pa, pb, strides=[1, 1, 1, 1], padding="SAME")
      output = c + pc

    with ops.device('cpu'):
      report = gen_ipu_ops.ipu_event_trace()

    tu.configure_ipu_system()

    with tu.ipu_session() as sess:
      sess.run(report)

      fd = {
          pa: [[[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]],
          pb: [[[[6], [4], [2]]]],
          pc: [1, 1, 1]
      }
      result = sess.run(output, fd)
      self.assertAllClose(
          result, [[[[7, 9, 7], [25, 21, 13]], [[43, 33, 19], [61, 45, 25]]]])

      result = sess.run(report)

      s = tu.extract_all_strings_from_event_trace(result)
      cs_list = tu.get_compute_sets_from_report(s)

      ok = [
          '__seed*', 'host-exchange-local-copy-', 'Copy_',
          'depthwise/convolution.*/Conv_1x1',
          'Copy_depthwise/convolution.*/Conv_1x1/partials_to_depthwise/convolution.*/Conv_1x1/partials[[]cloned[]]',
          'add/fusion*/addToChannel'
      ]
      self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
Ejemplo n.º 25
0
    def testTruncatedNormalInitalizer(self):

        with ops.device('cpu'):
            report = gen_ipu_ops.ipu_event_trace()

        with ops.device("/device:IPU:0"):
            with variable_scope.variable_scope("", use_resource=True):
                i = init_ops.truncated_normal_initializer(mean=1.0,
                                                          stddev=0.01)
                z = variable_scope.get_variable("z1",
                                                shape=[2, 4],
                                                dtype=np.float32,
                                                initializer=i)

        tu.configure_ipu_system()

        with tu.ipu_session() as sess:
            # Clean existing reports
            sess.run(report)

            sess.run(variables.global_variables_initializer())
            o = sess.run(z)
            self.assertAllClose(o, np.ones((2, 4)), 0.2, 0.2)

            # Find of the names of compute sets
            r = sess.run(report)

            s = tu.extract_all_strings_from_event_trace(r)
            cs_list = tu.get_compute_sets_from_report(s)

            ok = [
                '__seed*',
                'z1/Initializer/truncated_normal/TruncatedNormal/custom-call*/truncatedNormal',
                'z1/Initializer/truncated_normal/mul/multiply.*/Op/Multiply',
                'z1/Initializer/truncated_normal/add.*/AddTo'
            ]
            self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
Ejemplo n.º 26
0
    def test3DConv3x3x3_WithBias(self):
        with ops.device("/device:IPU:0"):
            pa = array_ops.placeholder(np.float32, [1, 14, 14, 14, 16],
                                       name="a")
            pb = array_ops.placeholder(np.float32, [3, 3, 3, 16, 32], name="b")
            bi = array_ops.placeholder(np.float32, [32], name="b")
            output = nn_ops.convolution(pa, pb, padding="SAME")
            output = nn_ops.bias_add(output, bi)

        with ops.device('cpu'):
            report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system()

        with tu.ipu_session() as sess:
            sess.run(report)

            fd = {
                pa: np.zeros([1, 14, 14, 14, 16]),
                pb: np.zeros([3, 3, 3, 16, 32]),
                bi: np.zeros([32]),
            }
            result = sess.run(output, fd)
            self.assertAllClose(result, np.zeros([1, 14, 14, 14, 32]))

            result = sess.run(report)

            s = tu.extract_all_strings_from_event_trace(result)
            cs_list = tu.get_compute_sets_from_report(s)

            ok = [
                '__seed*', 'host-exchange-local-copy-', 'Copy_',
                'convolution/convolution.*/Conv_3x3x3',
                'BiasAdd/fusion/addToChannel'
            ]
            self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
Ejemplo n.º 27
0
    def testInTopK(self):

        batchsize = 4
        n_categories = 1200
        topn = 8

        def model(a, b):
            return nn.in_top_k(a, b, topn)

        with ops.device('cpu'):
            pa = array_ops.placeholder(np.float32, [batchsize, n_categories])
            pb = array_ops.placeholder(np.int32, [batchsize])
            report = gen_ipu_ops.ipu_event_trace()

        with ops.device("/device:IPU:0"):
            out = model(pa, pb)

        tu.configure_ipu_system()

        with tu.ipu_session() as sess:
            sess.run(report)

            input = np.random.rand(batchsize, n_categories)
            input = input / np.sqrt(np.sum(input**2))

            ref = (-input).argsort(axis=1)[:, :1]
            ref = ref.reshape([batchsize])

            expected = [True] * batchsize

            fd = {pa: input, pb: ref}
            result = sess.run(out, fd)
            self.assertAllClose(result, [True, True, True, True])

            result = sess.run(report)
            self.assertTrue(len(result) == 3)
    def testReportEveryNthExecution(self):
        with ops.device("/device:IPU:0"):
            pa = array_ops.placeholder(np.float32, [2, 2], name="a")
            pb = array_ops.placeholder(np.float32, [2, 2], name="b")
            out = math_ops.add(pa, pb)

        with ops.device('cpu'):
            report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system(compilation_trace=False)

        with tu.ipu_session() as sess:
            fd = {pa: [[1., 1.], [2., 3.]], pb: [[0., 1.], [4., 5.]]}
            sess.run(report, fd)

            sess.run(out, fd)
            sess.run(out, fd)
            sess.run(out, fd)
            sess.run(out, fd)
            sess.run(out, fd)

            rep = sess.run(report, fd)
            evts = tu.extract_all_execute_events(rep)
            self.assertEqual(len(evts), 5)  # execute x 5

            for i, e in enumerate(evts):
                if i > 0:
                    self.assertTrue(len(e.execute.execution_report) == 0)

            sess.close()

        tu.configure_ipu_system(compilation_trace=False,
                                report_every_nth_execution=2)

        with tu.ipu_session() as sess:
            fd = {pa: [[1., 1.], [2., 3.]], pb: [[0., 1.], [4., 5.]]}
            sess.run(report, fd)

            sess.run(out, fd)
            sess.run(out, fd)
            sess.run(out, fd)
            sess.run(out, fd)
            sess.run(out, fd)

            rep = sess.run(report, fd)
            evts = tu.extract_all_execute_events(rep)
            self.assertEqual(len(evts), 5)  # execute x 5

            for i, e in enumerate(evts):
                if i % 2 != 0:
                    self.assertTrue(len(e.execute.execution_report) == 0)

            sess.close()

        tu.configure_ipu_system(compilation_trace=False,
                                report_every_nth_execution=1)

        with tu.ipu_session() as sess:
            fd = {pa: [[1., 1.], [2., 3.]], pb: [[0., 1.], [4., 5.]]}
            sess.run(report, fd)

            sess.run(out, fd)
            sess.run(out, fd)
            sess.run(out, fd)
            sess.run(out, fd)
            sess.run(out, fd)

            rep = sess.run(report, fd)
            evts = tu.extract_all_execute_events(rep)
            self.assertEqual(len(evts), 5)  # execute x 5

            for e in evts:
                self.assertTrue(len(e.execute.execution_report) > 0)

            sess.close()
Ejemplo n.º 29
0
    def testResourceCountsAreCorrect(self):
        with ops.device("/device:IPU:0"):
            with variable_scope.variable_scope("vs", use_resource=True):
                w1 = variable_scope.get_variable(
                    "w1",
                    shape=[4, 2],
                    dtype=np.float32,
                    initializer=init_ops.constant_initializer(
                        np.array([[1, 2], [3, 4], [5, 6], [7, 8]],
                                 dtype=np.float32)))
                b1 = variable_scope.get_variable(
                    "b1",
                    shape=[2],
                    dtype=np.float32,
                    trainable=False,
                    initializer=init_ops.constant_initializer(
                        np.array([2, 3], dtype=np.float32)))
                w2 = variable_scope.get_variable(
                    "w2",
                    shape=[2, 2],
                    dtype=np.float32,
                    initializer=init_ops.constant_initializer(
                        np.array([[1, 2], [3, 4]], dtype=np.float32)))
                b2 = variable_scope.get_variable(
                    "b2",
                    shape=[2],
                    dtype=np.float32,
                    trainable=False,
                    initializer=init_ops.constant_initializer(
                        np.array([2, 3], dtype=np.float32)))

            x = array_ops.placeholder(np.float32, shape=[1, 4])
            y = math_ops.matmul(x, w1) + b1
            y = math_ops.matmul(y, w2) + b2

            loss = math_ops.reduce_sum(y)
            optimizer = gradient_descent.GradientDescentOptimizer(0.1)
            train = optimizer.minimize(loss)

            with ops.device('cpu'):
                report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system(True, True, True)

        with tu.ipu_session() as sess:
            sess.run(variables.global_variables_initializer())

            sess.run(report)

            sess.run([train, loss],
                     {x: np.array([[7, 3, 5, 9]], dtype=np.float32)})
            sess.run([train, loss],
                     {x: np.array([[1, 2, 3, 4]], dtype=np.float32)})
            sess.run([train, loss],
                     {x: np.array([[7, 3, 5, 9]], dtype=np.float32)})
            sess.run([train, loss],
                     {x: np.array([[1, 2, 3, 4]], dtype=np.float32)})
            sess.run([train, loss],
                     {x: np.array([[7, 3, 5, 9]], dtype=np.float32)})

            d_dl = "0.0"
            w1_dl = "1.0"
            b1_dl = "2.0"
            w2_dl = "3.0"
            b2_dl = "4.0"

            # biases are not outputs of the graph
            d_ul = "out_0.0"
            w1_ul = "out_1.0"
            w2_ul = "out_2.0"

            rep = sess.run(report)
            io_evts = tu.extract_all_io_events(rep)

            # The initialization is constant, so there are no events generated on the
            # IPU.

            host_to_device = list(
                filter(lambda x: x[0] == IpuTraceEvent.HOST_TO_DEVICE_TRANSFER,
                       io_evts))
            device_to_host = list(
                filter(lambda x: x[0] == IpuTraceEvent.DEVICE_TO_HOST_TRANSFER,
                       io_evts))
            self.assertEqual(len(list(io_evts)), 4)

            # Weights/biases should be downloaded once, and the input no times
            # because it is streamed
            self.assertEqual(
                len(list(filter(lambda x: x[1] == d_dl, host_to_device))), 0)
            self.assertEqual(
                len(list(filter(lambda x: x[1] == w1_dl, host_to_device))), 1)
            self.assertEqual(
                len(list(filter(lambda x: x[1] == b1_dl, host_to_device))), 1)
            self.assertEqual(
                len(list(filter(lambda x: x[1] == w2_dl, host_to_device))), 1)
            self.assertEqual(
                len(list(filter(lambda x: x[1] == b2_dl, host_to_device))), 1)

            # Weights should not be uploaded, and the loss is streamed
            self.assertEqual(
                len(list(filter(lambda x: x[1] == d_ul, device_to_host))), 0)
            self.assertEqual(
                len(list(filter(lambda x: x[1] == w1_ul, device_to_host))), 0)
            self.assertEqual(
                len(list(filter(lambda x: x[1] == w2_ul, device_to_host))), 0)

            # Explicitly fetch the first set of weights and biases
            vw, vb = sess.run([w1, b1])

            self.assertAllClose(np.array(
                [[100.00576782, 86.60944366], [57.62784195, 51.23856354],
                 [93.45920563, 82.40240479], [155.36032104, 135.74447632]],
                dtype=np.float32),
                                vw,
                                rtol=1e-4)

            self.assertAllClose(np.array([2, 3], dtype=np.float32),
                                vb,
                                rtol=1e-4)

            rep = sess.run(report)
            io_evts = tu.extract_all_io_events(rep)

            host_to_device = list(
                filter(lambda x: x[0] == IpuTraceEvent.HOST_TO_DEVICE_TRANSFER,
                       io_evts))
            device_to_host = list(
                filter(lambda x: x[0] == IpuTraceEvent.DEVICE_TO_HOST_TRANSFER,
                       io_evts))
            self.assertEqual(len(list(io_evts)), 2)

            # Weights/biases/inputs should not be downloaded at all
            self.assertEqual(
                len(list(filter(lambda x: x[1] == d_dl, host_to_device))), 0)
            self.assertEqual(
                len(list(filter(lambda x: x[1] == w1_dl, host_to_device))), 0)
            self.assertEqual(
                len(list(filter(lambda x: x[1] == b1_dl, host_to_device))), 0)
            self.assertEqual(
                len(list(filter(lambda x: x[1] == w2_dl, host_to_device))), 0)
            self.assertEqual(
                len(list(filter(lambda x: x[1] == b2_dl, host_to_device))), 0)

            # Weights should be uploaded once (explicitly fetched)
            # Note all weights are fetched as a group
            self.assertEqual(
                len(list(filter(lambda x: x[1] == d_ul, device_to_host))), 0)
            self.assertEqual(
                len(list(filter(lambda x: x[1] == w1_ul, device_to_host))), 1)
            self.assertEqual(
                len(list(filter(lambda x: x[1] == w2_ul, device_to_host))), 1)
Ejemplo n.º 30
0
    def testVariablesRemainResident(self):
        with ops.device("/device:IPU:0"):
            with variable_scope.variable_scope("vs", use_resource=True):

                w = variable_scope.get_variable(
                    "w",
                    shape=[4, 2],
                    dtype=np.float32,
                    initializer=init_ops.constant_initializer(
                        np.array([[1, 2], [3, 4], [5, 6], [7, 8]],
                                 dtype=np.float32)))
                b = variable_scope.get_variable(
                    "b",
                    shape=[2],
                    dtype=np.float32,
                    initializer=init_ops.constant_initializer(
                        np.array([2, 3], dtype=np.float32)))

            x = array_ops.placeholder(np.float32, shape=[1, 4])
            y = math_ops.matmul(x, w) + b

            loss = math_ops.reduce_sum(y)
            optimizer = gradient_descent.GradientDescentOptimizer(0.1)
            train = optimizer.minimize(loss)

            with ops.device('cpu'):
                report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system(True, True, True)

        with tu.ipu_session() as sess:
            sess.run(variables.global_variables_initializer())

            sess.run(report)

            sess.run([train, loss],
                     {x: np.array([[7, 3, 5, 9]], dtype=np.float32)})
            sess.run([train, loss],
                     {x: np.array([[1, 2, 3, 4]], dtype=np.float32)})
            sess.run([train, loss],
                     {x: np.array([[7, 3, 5, 9]], dtype=np.float32)})
            sess.run([train, loss],
                     {x: np.array([[1, 2, 3, 4]], dtype=np.float32)})
            sess.run([train, loss],
                     {x: np.array([[7, 3, 5, 9]], dtype=np.float32)})

            d_dl = "0.0"
            d_ul = "out_0.0"
            w_dl = "1.0"
            w_ul = "out_1.0"
            b_dl = "2.0"
            b_ul = "out_2.0"

            rep = sess.run(report)
            io_evts = tu.extract_all_io_events(rep)
            self.assertEqual(len(list(io_evts)), 2)
            # The initialization is constant, so there are no events generated on the
            # IPU.

            host_to_device = list(
                filter(lambda x: x[0] == IpuTraceEvent.HOST_TO_DEVICE_TRANSFER,
                       io_evts))
            device_to_host = list(
                filter(lambda x: x[0] == IpuTraceEvent.DEVICE_TO_HOST_TRANSFER,
                       io_evts))

            # Weights/biases should be downloaded once, and the input no times
            # because it is streamed
            self.assertEqual(
                len(list(filter(lambda x: x[1] == d_dl, host_to_device))), 0)
            self.assertEqual(
                len(list(filter(lambda x: x[1] == w_dl, host_to_device))), 1)
            self.assertEqual(
                len(list(filter(lambda x: x[1] == b_dl, host_to_device))), 1)

            # Weights/biases should not be uploaded, and the loss is streamed
            self.assertEqual(
                len(list(filter(lambda x: x[1] == d_ul, device_to_host))), 0)
            self.assertEqual(
                len(list(filter(lambda x: x[1] == w_ul, device_to_host))), 0)
            self.assertEqual(
                len(list(filter(lambda x: x[1] == b_ul, device_to_host))), 0)

            # Explicitly fetch the weights
            vw, vb = sess.run([w, b])

            self.assertAllClose(np.array(
                [[-1.3, -0.3], [1.7, 2.7], [2.9, 3.9], [3.5, 4.5]],
                dtype=np.float32),
                                vw,
                                rtol=1e-4)

            self.assertAllClose(np.array([1.5, 2.5], dtype=np.float32),
                                vb,
                                rtol=1e-4)

            rep = sess.run(report)
            io_evts = tu.extract_all_io_events(rep)

            host_to_device = list(
                filter(lambda x: x[0] == IpuTraceEvent.HOST_TO_DEVICE_TRANSFER,
                       io_evts))
            device_to_host = list(
                filter(lambda x: x[0] == IpuTraceEvent.DEVICE_TO_HOST_TRANSFER,
                       io_evts))
            self.assertEqual(len(list(io_evts)), 2)

            # Weights/biases/inputs should not be downloaded at all
            self.assertEqual(
                len(list(filter(lambda x: x[1] == d_dl, host_to_device))), 0)
            self.assertEqual(
                len(list(filter(lambda x: x[1] == w_dl, host_to_device))), 0)
            self.assertEqual(
                len(list(filter(lambda x: x[1] == b_dl, host_to_device))), 0)

            # Weights/biases should be uploaded once (explicitly fetched)
            self.assertEqual(
                len(list(filter(lambda x: x[1] == d_ul, device_to_host))), 0)
            self.assertEqual(
                len(list(filter(lambda x: x[1] == w_ul, device_to_host))), 1)
            self.assertEqual(
                len(list(filter(lambda x: x[1] == b_ul, device_to_host))), 1)