Esempio n. 1
0
  def testMultipleReduces(self):
    with self.session() as sess:
      with ops.device("/device:IPU:0"):
        pa = array_ops.placeholder(np.float16, [3])
        pb = array_ops.placeholder(np.float16, [3])
        a = math_ops.cast(pa, np.float32)
        a = math_ops.reduce_sum(a)
        a = math_ops.cast(a, np.float16)
        b = math_ops.cast(pb, np.float32)
        b = math_ops.reduce_sum(b)
        b = math_ops.cast(b, np.float16)
        c = a + b

      report = ReportJSON(self, sess)
      report.reset()

      fd = {pa: [2.0, 0.5, 1.0], pb: [1.0, 1.0, 2.0]}
      result = sess.run(c, fd)
      self.assertAllClose(result, 7.5)

      report.parse_log()

      ok = [
          '__seed*', 'host-exchange-local-copy-', 'Sum/reduce*/Reduce',
          'Sum_1/reduce*/Reduce', 'add/add*/Add'
      ]
      report.assert_all_compute_sets_and_list(ok)
Esempio n. 2
0
  def testGRUNotCached(self):
    with self.session() as sess:
      # Note here the second GRU is larger.
      pinputs1 = array_ops.placeholder(dataType,
                                       [seq_len, batch_size, input_size],
                                       name="inputs1")
      pinputs2 = array_ops.placeholder(dataType,
                                       [seq_len * 2, batch_size, input_size],
                                       name="inputs2")
      plabels = array_ops.placeholder(np.int32, [batch_size], name="labels")

      with ops.device("/device:IPU:0"):

        def gru_layer(inputs, name):
          initial_state = _get_variable(
              "initial_state",
              shape=[batch_size, num_channels],
              initializer=init_ops.constant_initializer(0.1, dataType))
          return self._GRULayer(inputs=inputs,
                                weights_value=1.,
                                initial_state=initial_state,
                                training=True,
                                name=name)

        with variable_scope.variable_scope("gru_layer1", use_resource=True):
          logits1 = gru_layer(pinputs1, "layer1")
        with variable_scope.variable_scope("gru_layer2", use_resource=True):
          logits2 = gru_layer(pinputs2, "layer2")

        logits = (math_ops.reduce_mean(logits1, axis=0) +
                  math_ops.reduce_mean(logits2, axis=0))
        softmax = nn.sparse_softmax_cross_entropy_with_logits_v2(
            logits=logits, labels=array_ops.stop_gradient(plabels))
        loss = math_ops.reduce_mean(softmax)
        train = gradient_descent.GradientDescentOptimizer(0.01).minimize(loss)

      report = ReportJSON(self, sess)

      sess.run(variables.global_variables_initializer())

      report.reset()
      sess.run(
          [loss, train], {
              pinputs1: _createGRUInput(0.5, batch_size, seq_len, input_size),
              pinputs2: _createGRUInput(1.5, batch_size, seq_len * 2,
                                        input_size),
              plabels: np.ones(shape=[batch_size], dtype=np.int32),
          })

      report.parse_log()
      report.assert_compute_sets_matches(
          '*BasicGruCell/ProcessUnits/Weight/Conv*/Convolve', 4,
          "There should be four fwd GRUs")
      report.assert_compute_sets_matches('*/MulOGate/Op/Multiply', 2,
                                         "There should be two bwd GRUs")
Esempio n. 3
0
  def testNoCastsF32ToF16ToF32(self):
    with self.session() as sess:
      with ops.device("/device:IPU:0"):
        pa = array_ops.placeholder(np.float32, [3])
        b = math_ops.cast(pa, np.float16)
        c = math_ops.cast(b, np.float32)

      report = ReportJSON(self, sess)
      report.reset()

      fd = {pa: [2.0, 0.5, 1.0]}
      result = sess.run(c, fd)
      self.assertAllClose(result, [2.0, 0.5, 1.0])

      report.parse_log(assert_len=0)
      report.assert_no_compute_set()
Esempio n. 4
0
    def testBatchNormalizeFused(self):
        with self.session() as sess:
            a = array_ops.placeholder(np.float32, [4, 64, 64, 4],
                                      name="input_a")

            def my_graph(a):
                with ops.device("/device:IPU:0"):
                    with variable_scope.variable_scope("", use_resource=True):

                        beta = variable_scope.get_variable(
                            "x",
                            dtype=np.float32,
                            shape=[4],
                            initializer=init_ops.constant_initializer(0.0))
                        gamma = variable_scope.get_variable(
                            "y",
                            dtype=np.float32,
                            shape=[4],
                            initializer=init_ops.constant_initializer(1.0))

                        b_mean, b_var = nn.moments(a, [0, 1, 2],
                                                   name='moments')

                        normed = nn.fused_batch_norm(a,
                                                     gamma,
                                                     beta,
                                                     b_mean,
                                                     b_var,
                                                     is_training=False)
                        return normed

            report = ReportJSON(self, sess)
            out = ipu.ipu_compiler.compile(my_graph, [a])
            sess.run(variables.global_variables_initializer())

            report.reset()
            result, _, _ = sess.run(out, {a: np.zeros([4, 64, 64, 4])})
            self.assertAllClose(result, np.zeros([4, 64, 64, 4]))
            report.parse_log()

            bl = ['*convert*/Cast*']
            report.assert_compute_sets_not_in_blacklist(bl)

            report.assert_tensor_input_names("input_a", "x", "y")
Esempio n. 5
0
  def testArgMaxBasic(self, dtype):
    def model(a):
      return math_ops.argmax(a, output_type=dtypes.int32)

    with self.session() as sess:
      report = ReportJSON(self, sess)
      report.reset()

      with ops.device('cpu'):
        pa = array_ops.placeholder(dtype, [3, 5, 2])

      with ops.device("/device:IPU:0"):
        out = model(pa)

      input = _get_random_input(dtype, (3, 5, 2))

      fd = {pa: input}
      result = sess.run(out, fd)
      self.assertAllClose(result, np.argmax(input, axis=0))

      report.parse_log(assert_len=4)
Esempio n. 6
0
    def testBatchNormalizeLayerFusedFp16(self):
        with self.session() as sess:
            with ops.device("/device:IPU:0"):
                with variable_scope.variable_scope("", use_resource=True):
                    a = array_ops.placeholder(np.float16, [4, 64, 64, 4],
                                              name="input_a")

                    normed = layers_norm.batch_normalization(a, fused=True)

            report = ReportJSON(self, sess)
            sess.run(variables.global_variables_initializer())

            report.reset()
            result = sess.run(normed, {a: np.zeros([4, 64, 64, 4])})
            self.assertAllClose(result, np.zeros([4, 64, 64, 4]))

            report.parse_log()

            bl = ['*convert*/Cast*']
            report.assert_compute_sets_not_in_blacklist(bl)
            report.assert_tensor_input_names("input_a")
Esempio n. 7
0
  def testNoCastsF16ReduceWithReshape(self):
    with self.session() as sess:
      with ops.device("/device:IPU:0"):
        pa = array_ops.placeholder(np.float16, [3, 4])
        a = gen_array_ops.reshape(pa, [4, 3])
        a = math_ops.reduce_sum(a, axis=(1))

      report = ReportJSON(self, sess)
      report.reset()

      fd = {pa: np.ones([3, 4])}
      result = sess.run(a, fd)
      self.assertAllClose(result, [3.0, 3.0, 3.0, 3.0])

      report.parse_log()

      ok = [
          '__seed*',
          'Sum/reduce*/Reduce',
      ]
      report.assert_all_compute_sets_and_list(ok)
Esempio n. 8
0
  def testReduceMean(self):
    with self.session() as sess:
      shape = [2, 10000]
      with ops.device("/device:IPU:0"):
        pa = array_ops.placeholder(np.float16, shape)
        output = math_ops.reduce_mean(pa, axis=[1])

      report = ReportJSON(self, sess)
      report.reset()

      val = np.finfo(np.float16).max / 2
      result = sess.run(output, {pa: np.full(shape, val)})
      self.assertAllClose(result, [val, val])

      report.parse_log(assert_len=4)

      ok = [
          '__seed*', 'host-exchange-local-copy-', 'Mean/fusion/Reduce',
          'Mean/fusion*/Op/Multiply', 'Mean/convert*/Cast'
      ]
      report.assert_all_compute_sets_and_list(ok)
Esempio n. 9
0
  def testReductionSumVectorF16NoConverts(self):
    with self.session() as sess:
      with ops.device("/device:IPU:0"):
        pa = array_ops.placeholder(np.float16, [4096], name="a")
        output = math_ops.reduce_sum(pa, axis=[0])

      report = ReportJSON(self, sess)
      report.reset()

      fd = {pa: np.ones([4096])}
      result = sess.run(output, fd)
      self.assertAllClose(result, 4096)

      report.parse_log()

      # Check that there are no casts to float at the beginning.
      ok = [
          '__seed*', 'host-exchange-local-copy-',
          'Sum/reduce*/ReduceOnTile/InToIntermediateNoExchange/Reduce',
          'Sum/reduce*/ReduceFinalStage/IntermediateToOutput/Reduce'
      ]

      report.assert_all_compute_sets_and_list(ok)
Esempio n. 10
0
  def testDontRemoveCastsIfUsed(self):
    with self.session() as sess:
      with ops.device("/device:IPU:0"):
        pa = array_ops.placeholder(np.float16, [3])
        b = math_ops.cast(pa, np.float32)
        const = array_ops.constant(1.0, np.float32)
        b = b + const
        c = math_ops.cast(b, np.float16)

      report = ReportJSON(self, sess)
      report.reset()

      fd = {pa: [2.0, 0.5, 1.0]}
      result = sess.run(c, fd)
      self.assertAllClose(result, [3.0, 1.5, 2.0])

      report.parse_log(assert_len=4)

      ok = [
          '__seed*', 'host-exchange-local-copy-', 'Cast/convert.*/Cast',
          'add/fusion*/Add', 'Cast_1/convert.*/Cast'
      ]
      report.assert_all_compute_sets_and_list(ok)
Esempio n. 11
0
    def testConvolutionsDontMatchDifferentDevices(self):
        with self.session() as sess:
            with ops.device("/device:IPU:0"):
                x = array_ops.placeholder(np.float32, shape=[1, 4, 4, 2])

                with variable_scope.variable_scope("vs", use_resource=True):
                    with ipu.scopes.ipu_shard(0):
                        y = layers.Conv2D(
                            2,
                            1,
                            use_bias=False,
                            kernel_initializer=init_ops.ones_initializer())(x)
                    with ipu.scopes.ipu_shard(1):
                        y = layers.Conv2D(
                            2,
                            1,
                            use_bias=False,
                            kernel_initializer=init_ops.ones_initializer())(y)

            report = ReportJSON(self, sess, sharded=True)

            sess.run(variables.global_variables_initializer())

            report.reset()

            sess.run(y, {x: np.zeros([1, 4, 4, 2])})

            report.parse_log()

            # Note how there are two convolutions
            ok = [
                '__seed*', '*OnTileCopy*', 'vs/conv2d/Conv2D/convolution.*',
                'Copy_vs/conv2d/Conv2D/convolution.*',
                'vs/conv2d_1/Conv2D/convolution.*'
            ]
            report.assert_all_compute_sets_and_list(ok)