예제 #1
0
  def testGRUNotCached(self):
    with self.session() as sess:
      # Note here the second GRU is larger.
      pinputs1 = array_ops.placeholder(dataType,
                                       [seq_len, batch_size, input_size],
                                       name="inputs1")
      pinputs2 = array_ops.placeholder(dataType,
                                       [seq_len * 2, batch_size, input_size],
                                       name="inputs2")
      plabels = array_ops.placeholder(np.int32, [batch_size], name="labels")

      with ops.device("/device:IPU:0"):

        def gru_layer(inputs, name):
          initial_state = _get_variable(
              "initial_state",
              shape=[batch_size, num_channels],
              initializer=init_ops.constant_initializer(0.1, dataType))
          return self._GRULayer(inputs=inputs,
                                weights_value=1.,
                                initial_state=initial_state,
                                training=True,
                                name=name)

        with variable_scope.variable_scope("gru_layer1", use_resource=True):
          logits1 = gru_layer(pinputs1, "layer1")
        with variable_scope.variable_scope("gru_layer2", use_resource=True):
          logits2 = gru_layer(pinputs2, "layer2")

        logits = (math_ops.reduce_mean(logits1, axis=0) +
                  math_ops.reduce_mean(logits2, axis=0))
        softmax = nn.sparse_softmax_cross_entropy_with_logits_v2(
            logits=logits, labels=array_ops.stop_gradient(plabels))
        loss = math_ops.reduce_mean(softmax)
        train = gradient_descent.GradientDescentOptimizer(0.01).minimize(loss)

      report = ReportJSON(self, sess)

      sess.run(variables.global_variables_initializer())

      report.reset()
      sess.run(
          [loss, train], {
              pinputs1: _createGRUInput(0.5, batch_size, seq_len, input_size),
              pinputs2: _createGRUInput(1.5, batch_size, seq_len * 2,
                                        input_size),
              plabels: np.ones(shape=[batch_size], dtype=np.int32),
          })

      report.parse_log()
      report.assert_compute_sets_matches(
          '*BasicGruCell/ProcessUnits/Weight/Conv*/Convolve', 4,
          "There should be four fwd GRUs")
      report.assert_compute_sets_matches('*/MulOGate/Op/Multiply', 2,
                                         "There should be two bwd GRUs")
예제 #2
0
  def testMultipleReduces(self):
    with self.session() as sess:
      with ops.device("/device:IPU:0"):
        pa = array_ops.placeholder(np.float16, [3])
        pb = array_ops.placeholder(np.float16, [3])
        a = math_ops.cast(pa, np.float32)
        a = math_ops.reduce_sum(a)
        a = math_ops.cast(a, np.float16)
        b = math_ops.cast(pb, np.float32)
        b = math_ops.reduce_sum(b)
        b = math_ops.cast(b, np.float16)
        c = a + b

      report = ReportJSON(self, sess)
      report.reset()

      fd = {pa: [2.0, 0.5, 1.0], pb: [1.0, 1.0, 2.0]}
      result = sess.run(c, fd)
      self.assertAllClose(result, 7.5)

      report.parse_log()

      ok = [
          '__seed*', 'host-exchange-local-copy-', 'Sum/reduce*/Reduce',
          'Sum_1/reduce*/Reduce', 'add/add*/Add'
      ]
      report.assert_all_compute_sets_and_list(ok)
예제 #3
0
  def testGRULayerInference(self):
    ReportJSON(self)
    np.random.seed(0)
    # Run with all-0 weights
    weight0 = 0.
    for init_state_value in [0., 1.]:
      self._RunInferenceComparison('ones',
                                   input_value=0.,
                                   weights_value=weight0,
                                   init_state_value=init_state_value)

    # Run with all-1 weights
    weight1 = 1.
    for init_state_value in [0., 1.]:
      self._RunInferenceComparison('ones',
                                   input_value=0.,
                                   weights_value=weight1,
                                   init_state_value=init_state_value)

    # Run with random weights
    for weight in np.random.rand(3):
      for init_state_value in [0., 1.]:
        self._RunInferenceComparison('rand',
                                     input_value=0.,
                                     weights_value=weight,
                                     init_state_value=init_state_value)
예제 #4
0
    def testBatchNormalizeFused(self):
        with self.session() as sess:
            a = array_ops.placeholder(np.float32, [4, 64, 64, 4],
                                      name="input_a")

            def my_graph(a):
                with ops.device("/device:IPU:0"):
                    with variable_scope.variable_scope("", use_resource=True):

                        beta = variable_scope.get_variable(
                            "x",
                            dtype=np.float32,
                            shape=[4],
                            initializer=init_ops.constant_initializer(0.0))
                        gamma = variable_scope.get_variable(
                            "y",
                            dtype=np.float32,
                            shape=[4],
                            initializer=init_ops.constant_initializer(1.0))

                        b_mean, b_var = nn.moments(a, [0, 1, 2],
                                                   name='moments')

                        normed = nn.fused_batch_norm(a,
                                                     gamma,
                                                     beta,
                                                     b_mean,
                                                     b_var,
                                                     is_training=False)
                        return normed

            report = ReportJSON(self, sess)
            out = ipu.ipu_compiler.compile(my_graph, [a])
            sess.run(variables.global_variables_initializer())

            report.reset()
            result, _, _ = sess.run(out, {a: np.zeros([4, 64, 64, 4])})
            self.assertAllClose(result, np.zeros([4, 64, 64, 4]))
            report.parse_log()

            bl = ['*convert*/Cast*']
            report.assert_compute_sets_not_in_blacklist(bl)

            report.assert_tensor_input_names("input_a", "x", "y")
예제 #5
0
  def testGRULayerTraining(self):
    ReportJSON(self)
    np.random.seed(42)

    # Run with random weights
    for weight in np.random.rand(3):
      for init_state_value in [0., 1.]:
        self._RunTrainingComparison('rand',
                                    input_value=0.,
                                    weights_value=weight,
                                    init_state_value=init_state_value,
                                    training_steps=3)
예제 #6
0
    def testBatchNormalizeLayerFusedFp16(self):
        with self.session() as sess:
            with ops.device("/device:IPU:0"):
                with variable_scope.variable_scope("", use_resource=True):
                    a = array_ops.placeholder(np.float16, [4, 64, 64, 4],
                                              name="input_a")

                    normed = layers_norm.batch_normalization(a, fused=True)

            report = ReportJSON(self, sess)
            sess.run(variables.global_variables_initializer())

            report.reset()
            result = sess.run(normed, {a: np.zeros([4, 64, 64, 4])})
            self.assertAllClose(result, np.zeros([4, 64, 64, 4]))

            report.parse_log()

            bl = ['*convert*/Cast*']
            report.assert_compute_sets_not_in_blacklist(bl)
            report.assert_tensor_input_names("input_a")
예제 #7
0
    def testSimpleCaching(self):
        with self.session() as sess:

            def f_1(x):
                return math_ops.square(x, name="namef1")

            def f_cond(x1, z):
                cond_1 = control_flow_ops.cond(math_ops.less(z[0], z[1]),
                                               lambda: f_1(x1),
                                               lambda: f_1(x1))
                return cond_1

            with ops.device('cpu'):
                x1 = array_ops.placeholder(dtypes.int32, [2, 2])
                z = array_ops.placeholder(dtypes.int32, [2])

            with ipu.scopes.ipu_scope("/device:IPU:0"):
                r1 = ipu.ipu_compiler.compile(f_cond, inputs=[x1, z])
                i_x1 = np.full((2, 2), 10)
                i_z = np.full((2), 8)

                report = ReportJSON(self, sess)
                sess.run(r1, {x1: i_x1, z: i_z})
                report.parse_log()

                report.assert_compute_sets_matches(
                    '*namef1*', 1, "There should be only one f_1 due to cash.")
예제 #8
0
    def testWhenSideEffect(self):
        with self.session() as sess:

            def f_1(x):
                rand_num = 10 * random_ops.random_uniform(shape=[2, 2],
                                                          minval=1,
                                                          maxval=9,
                                                          dtype=dtypes.int32,
                                                          name="namef1")
                return rand_num * x

            def f_cond(x1, z):
                cond_1 = control_flow_ops.cond(math_ops.less(z[0], z[1]),
                                               lambda: f_1(x1),
                                               lambda: f_1(x1))
                return cond_1

            with ops.device('cpu'):
                x1 = array_ops.placeholder(dtypes.int32, [2, 2])
                z = array_ops.placeholder(dtypes.int32, [2])

            with ipu.scopes.ipu_scope("/device:IPU:0"):
                r1 = ipu.ipu_compiler.compile(f_cond, inputs=[x1, z])
                i_x1 = np.full((2, 2), 10)
                i_z = np.full((2), 8)

                report = ReportJSON(self, sess)
                sess.run(r1, {x1: i_x1, z: i_z})
                report.parse_log()

                report.assert_compute_sets_matches(
                    '*namef1*', 2,
                    "f1 should be on the list twice as it should not be cashed "
                    "due to SideEffect.")
예제 #9
0
  def testNoCastsF32ToF16ToF32(self):
    with self.session() as sess:
      with ops.device("/device:IPU:0"):
        pa = array_ops.placeholder(np.float32, [3])
        b = math_ops.cast(pa, np.float16)
        c = math_ops.cast(b, np.float32)

      report = ReportJSON(self, sess)
      report.reset()

      fd = {pa: [2.0, 0.5, 1.0]}
      result = sess.run(c, fd)
      self.assertAllClose(result, [2.0, 0.5, 1.0])

      report.parse_log(assert_len=0)
      report.assert_no_compute_set()
예제 #10
0
  def testArgMaxHalf(self, dtype):
    def model(a):
      return math_ops.argmax(a, output_type=dtypes.int32)

    with self.session() as sess:
      ReportJSON(self, sess)

      with ops.device('cpu'):
        pa = array_ops.placeholder(dtype, [3, 5, 2])

      with ops.device("/device:IPU:0"):
        out = model(pa)

      input = _get_random_input(dtype, (3, 5, 2))

      fd = {pa: input}
      result = sess.run(out, fd)
      self.assertAllClose(result, np.argmax(input, axis=0))
예제 #11
0
  def testReduceMean(self):
    with self.session() as sess:
      shape = [2, 10000]
      with ops.device("/device:IPU:0"):
        pa = array_ops.placeholder(np.float16, shape)
        output = math_ops.reduce_mean(pa, axis=[1])

      report = ReportJSON(self, sess)
      report.reset()

      val = np.finfo(np.float16).max / 2
      result = sess.run(output, {pa: np.full(shape, val)})
      self.assertAllClose(result, [val, val])

      report.parse_log(assert_len=4)

      ok = [
          '__seed*', 'host-exchange-local-copy-', 'Mean/fusion/Reduce',
          'Mean/fusion*/Op/Multiply', 'Mean/convert*/Cast'
      ]
      report.assert_all_compute_sets_and_list(ok)
예제 #12
0
  def testNoCastsF16ReduceWithReshape(self):
    with self.session() as sess:
      with ops.device("/device:IPU:0"):
        pa = array_ops.placeholder(np.float16, [3, 4])
        a = gen_array_ops.reshape(pa, [4, 3])
        a = math_ops.reduce_sum(a, axis=(1))

      report = ReportJSON(self, sess)
      report.reset()

      fd = {pa: np.ones([3, 4])}
      result = sess.run(a, fd)
      self.assertAllClose(result, [3.0, 3.0, 3.0, 3.0])

      report.parse_log()

      ok = [
          '__seed*',
          'Sum/reduce*/Reduce',
      ]
      report.assert_all_compute_sets_and_list(ok)
예제 #13
0
    def testBatchNormalizeLayerFusedTrainingFp16(self):
        with self.session() as sess:
            # This test checks for the correct behaviour in batch norm grad when
            # perofrming training, but the batch norm attribute `training` is False
            with ops.device("/device:IPU:0"):
                with variable_scope.variable_scope("", use_resource=True):
                    a = array_ops.placeholder(np.float16, [4, 64, 64, 4],
                                              name="input_a")
                    normed = layers_norm.batch_normalization(a,
                                                             fused=True,
                                                             training=False)
                loss = math_ops.reduce_sum(normed)
                optimizer = gradient_descent.GradientDescentOptimizer(0.1)
                train = optimizer.minimize(loss)

            ReportJSON(self, sess)

            sess.run(variables.global_variables_initializer())
            result = sess.run([normed, train], {a: np.zeros([4, 64, 64, 4])})
            self.assertAllClose(result[0], np.zeros([4, 64, 64, 4]))
예제 #14
0
  def testArgMaxMultiDimensional(self, dtype):
    def model(a, axis):
      return math_ops.argmax(a, axis=axis, output_type=dtypes.int32)

    for axis in range(6):
      with self.session() as sess:
        ReportJSON(self, sess)

        with ops.device('cpu'):
          pa = array_ops.placeholder(dtype, [1, 2, 3, 4, 5, 6])
          p_axis = array_ops.placeholder(np.int32, shape=())

        with ops.device("/device:IPU:0"):
          out = model(pa, p_axis)

        input = _get_random_input(dtype, (1, 2, 3, 4, 5, 6))

        fd = {pa: input, p_axis: axis}
        result = sess.run(out, fd)
        self.assertAllClose(result, np.argmax(input, axis=axis))
예제 #15
0
    def testSameFunctions(self):
        # f_1, f_2 are the same
        with self.session() as sess:

            def f_1(x):
                return math_ops.square(x, name="namef1")

            def f_2(x):
                return math_ops.square(x, name="namef2")

            def f_cond(x1):
                cond_1 = control_flow_ops.cond(math_ops.less(1, 0),
                                               lambda: f_1(x1),
                                               lambda: f_1(x1))
                cond_2 = control_flow_ops.cond(math_ops.less(1, 0),
                                               lambda: f_2(x1),
                                               lambda: f_2(x1))

                return cond_1 + cond_2

            with ops.device('cpu'):
                x1 = array_ops.placeholder(dtypes.int32, [2, 2])

            with ipu.scopes.ipu_scope("/device:IPU:0"):
                r1 = ipu.ipu_compiler.compile(f_cond, inputs=[x1])
                i_x1 = np.full((2, 2), 10)

                report = ReportJSON(self, sess)
                sess.run(r1, {x1: i_x1})
                report.parse_log()

                report.assert_compute_sets_matches(
                    '*namef1*', 1, "There should be only one f_1 due to cash.")

                report.assert_compute_sets_matches(
                    '*namef2*', 0,
                    "There should not be f_2, as it is the same as f_1, due to cash."
                )
예제 #16
0
  def testReductionSumVectorF16NoConverts(self):
    with self.session() as sess:
      with ops.device("/device:IPU:0"):
        pa = array_ops.placeholder(np.float16, [4096], name="a")
        output = math_ops.reduce_sum(pa, axis=[0])

      report = ReportJSON(self, sess)
      report.reset()

      fd = {pa: np.ones([4096])}
      result = sess.run(output, fd)
      self.assertAllClose(result, 4096)

      report.parse_log()

      # Check that there are no casts to float at the beginning.
      ok = [
          '__seed*', 'host-exchange-local-copy-',
          'Sum/reduce*/ReduceOnTile/InToIntermediateNoExchange/Reduce',
          'Sum/reduce*/ReduceFinalStage/IntermediateToOutput/Reduce'
      ]

      report.assert_all_compute_sets_and_list(ok)
예제 #17
0
  def testDontRemoveCastsIfUsed(self):
    with self.session() as sess:
      with ops.device("/device:IPU:0"):
        pa = array_ops.placeholder(np.float16, [3])
        b = math_ops.cast(pa, np.float32)
        const = array_ops.constant(1.0, np.float32)
        b = b + const
        c = math_ops.cast(b, np.float16)

      report = ReportJSON(self, sess)
      report.reset()

      fd = {pa: [2.0, 0.5, 1.0]}
      result = sess.run(c, fd)
      self.assertAllClose(result, [3.0, 1.5, 2.0])

      report.parse_log(assert_len=4)

      ok = [
          '__seed*', 'host-exchange-local-copy-', 'Cast/convert.*/Cast',
          'add/fusion*/Add', 'Cast_1/convert.*/Cast'
      ]
      report.assert_all_compute_sets_and_list(ok)
예제 #18
0
    def testBatchNormalizeLayerWithStableStatistics(self):
        with self.session() as sess:
            with ops.device("/device:IPU:0"):
                with variable_scope.variable_scope("", use_resource=True):
                    a = array_ops.placeholder(np.float32, [4, 64, 64, 4],
                                              name="input_a")
                    normed = layers_norm.batch_normalization(a, training=True)

            ReportJSON(self, sess, use_stable_norm_statistics=True)
            sess.run(variables.global_variables_initializer())

            # Use a tensor with large mean to test the stability. This blows up with
            # the non-stable implementation (NaN output). Use a power-of-two that can
            # be represented exactly in float32 to make sure we work with an exact
            # mean internally.
            input_mean = 2.0**64
            inputs = input_mean * np.ones([4, 64, 64, 4])

            # y = gamma * (x - mean) / sqrt(variance + epsilon) + beta
            # Both (x - mean) and beta_initializer are zero, so this should be zero.
            result = sess.run(normed, {a: inputs})
            self.assertAllEqual(result, np.zeros([4, 64, 64, 4]))
예제 #19
0
    def testConvolutionsDontMatchDifferentDevices(self):
        with self.session() as sess:
            with ops.device("/device:IPU:0"):
                x = array_ops.placeholder(np.float32, shape=[1, 4, 4, 2])

                with variable_scope.variable_scope("vs", use_resource=True):
                    with ipu.scopes.ipu_shard(0):
                        y = layers.Conv2D(
                            2,
                            1,
                            use_bias=False,
                            kernel_initializer=init_ops.ones_initializer())(x)
                    with ipu.scopes.ipu_shard(1):
                        y = layers.Conv2D(
                            2,
                            1,
                            use_bias=False,
                            kernel_initializer=init_ops.ones_initializer())(y)

            report = ReportJSON(self, sess, sharded=True)

            sess.run(variables.global_variables_initializer())

            report.reset()

            sess.run(y, {x: np.zeros([1, 4, 4, 2])})

            report.parse_log()

            # Note how there are two convolutions
            ok = [
                '__seed*', '*OnTileCopy*', 'vs/conv2d/Conv2D/convolution.*',
                'Copy_vs/conv2d/Conv2D/convolution.*',
                'vs/conv2d_1/Conv2D/convolution.*'
            ]
            report.assert_all_compute_sets_and_list(ok)
예제 #20
0
  def testArgMaxVector(self, dtype):
    def model(a):
      return math_ops.argmax(a, axis=0, output_type=dtypes.int32)

    with self.session() as sess:
      report = ReportJSON(self, sess)
      report.reset()

      with ops.device('cpu'):
        pa = array_ops.placeholder(dtype, [3])

      with ops.device("/device:IPU:0"):
        out = model(pa)

      input = _get_random_input(dtype, (3))

      fd = {pa: input}
      result = sess.run(out, fd)
      self.assertAllClose(result, np.argmax(input))

      report.parse_log(assert_len=4)