コード例 #1
0
  def testSimpleXlaCompileTrainingInLoopV1WithEarlySharding(self):

    dataset = tu.create_dual_increasing_dataset(3)

    infeed_queue = ipu_infeed_queue.IPUInfeedQueue(dataset, "feed3")

    def my_net():
      def my_model(loss, x, y):
        with ops.device("/device:IPU:0"):
          inp = x

          x = layers.Conv2D(
              8, 3, padding='same', name="conv1", use_bias=False)(x)
          x = layers.Conv2D(
              8, 3, padding='same', name="conv2", use_bias=False)(x)
          x = layers.Conv2D(
              8, 3, padding='same', name="conv3", use_bias=False)(x)
          x = math_ops.reduce_max(x, axis=[1, 2])

          cross_entropy = nn.softmax_cross_entropy_with_logits_v2(
              logits=x, labels=array_ops.stop_gradient(y))
          loss = math_ops.reduce_mean(cross_entropy)

          optim = so.ShardedOptimizer(gd.GradientDescentOptimizer(0.01))
          train = optim.minimize(cross_entropy)

          autoshard.automatic_sharding(2, inp, loss)

          return [loss, train]

      loss = 0.0
      return loops.repeat(
          10, my_model, [loss], infeed_queue, use_while_v1=True)

    ipu_compiler.compile(my_net, inputs=[])

    op_set = ops.get_default_graph().get_operations()
    op_types = set()

    for o in op_set:
      if o.device == '/device:IPU:0' and o.type not in allowed_op_types:
        op_types.add(o.type)
        self.assertTrue(o.get_attr('_XlaSharding') is not None)

    self.assertTrue(len(op_types) > 10)
    self.assertTrue('Conv2D' in op_types)
    self.assertTrue('Conv2DBackpropInput' in op_types)
    self.assertTrue('Conv2DBackpropFilter' in op_types)
    self.assertTrue('ResourceApplyGradientDescent' in op_types)
コード例 #2
0
  def testReadResourceVar(self):
    def my_model(pcond):
      va = variable_scope.get_variable(
          "x",
          shape=[],
          dtype=np.float32,
          initializer=init_ops.constant_initializer(1))

      o = control_flow_ops.cond(
          pcond,
          true_fn=lambda: va.read_value(),
          false_fn=lambda: constant_op.constant(0.))
      return [o]

    with ops.device("cpu"):
      pcond = array_ops.placeholder(np.bool, [], name="pred")

    with ops.device("/device:IPU:0"):
      r = ipu_compiler.compile(my_model, inputs=[pcond])

    with session_lib.Session() as sess:

      sess.run(variables.global_variables_initializer())

      fd = {pcond: True}
      result = sess.run(r[0], fd)
      self.assertAllClose(result, 1.)

      fd = {pcond: False}
      result = sess.run(r[0], fd)
      self.assertAllClose(result, 0.)
コード例 #3
0
  def testSimpleXlaCompileTraining(self):
    def my_model(inp, lab):

      x = inp
      y = lab

      x = layers.Conv2D(8, 3, padding='same', name="conv1", use_bias=False)(x)
      x = layers.Conv2D(8, 3, padding='same', name="conv2", use_bias=False)(x)
      x = layers.Conv2D(8, 3, padding='same', name="conv3", use_bias=False)(x)
      x = math_ops.reduce_max(x, axis=[1, 2])

      cross_entropy = nn.softmax_cross_entropy_with_logits_v2(
          logits=x, labels=array_ops.stop_gradient(y))
      loss = math_ops.reduce_mean(cross_entropy)
      optim = so.ShardedOptimizer(gd.GradientDescentOptimizer(0.01))
      train = optim.minimize(cross_entropy)

      autoshard.automatic_sharding(2, inp, loss)

      return [loss, train]

    with ops.device("cpu"):
      inp = array_ops.placeholder(np.float32, [1, 12, 12, 4], name="data")
      lab = array_ops.placeholder(np.float32, [1, 8], name="labl")

    with ops.device("/device:IPU:0"):
      out = ipu_compiler.compile(my_model, inputs=[inp, lab])

    op_set = sharding.dependencies([out[0]])

    for o in op_set:
      if o.device == '/device:IPU:0' and o.type not in allowed_op_types:
        self.assertTrue(o.get_attr('_XlaSharding') is not None)
コード例 #4
0
  def testPopnnLstmXlaCompileTrainingInLoop(self):
    dataset = tu.create_dual_increasing_dataset(
        3, data_shape=[16, 2, 8], label_shape=[16, 2, 256])

    infeed_queue = ipu_infeed_queue.IPUInfeedQueue(dataset, "feed1")

    def my_net():
      def my_model(loss, x, y):
        with ops.device("/device:IPU:0"):
          inp = x

          lstm_cell = popnn_rnn.PopnnLSTM(256, dtype=dtypes.float32)
          x, _ = lstm_cell(x, training=True)

          cross_entropy = nn.softmax_cross_entropy_with_logits_v2(
              logits=x, labels=array_ops.stop_gradient(y))
          loss = math_ops.reduce_mean(cross_entropy)

          optim = so.ShardedOptimizer(gd.GradientDescentOptimizer(0.01))
          train = optim.minimize(cross_entropy)

          autoshard.automatic_sharding(2, inp, loss)

          return [loss, train]

      loss = 0.0
      return loops.repeat(
          10, my_model, [loss], infeed_queue, use_while_v1=False)

    ipu_compiler.compile(my_net, inputs=[])

    body = get_single_while_op_body(ops.get_default_graph())
    op_set = body.get_operations()
    op_types = set()

    for o in op_set:
      if o.device == '/device:IPU:0' and o.type not in allowed_op_types:
        op_types.add(o.type)
        self.assertTrue(o.get_attr('_XlaSharding') is not None)

    self.assertTrue(len(op_types) > 10)
    self.assertTrue('PopnnLstmLayer' in op_types)
    self.assertTrue('PopnnLstmLayerBackprop' in op_types)
    self.assertTrue('LogSoftmax' in op_types)
    self.assertTrue('SoftmaxCrossEntropyWithLogits' in op_types)
    self.assertTrue('ResourceApplyGradientDescent' in op_types)
コード例 #5
0
    def testFusionsInWhileLoops(self):
        def my_net():
            def cond(i, x):
                return i < 3

            def body(i, loss):
                i = i + 1
                init = init_ops.random_normal_initializer(0.0,
                                                          1.0,
                                                          seed=1,
                                                          dtype=np.float32)
                x = variable_scope.get_variable("v2",
                                                dtype=np.float32,
                                                shape=[1, 4, 4, 2],
                                                initializer=init)
                with variable_scope.variable_scope("vs", use_resource=True):
                    y = layers.Conv2D(
                        2,
                        1,
                        use_bias=True,
                        kernel_initializer=init_ops.ones_initializer(),
                        name='conv1')(x)
                    y = layers.Conv2D(
                        2,
                        1,
                        use_bias=True,
                        kernel_initializer=init_ops.ones_initializer(),
                        name='conv2')(y)
                    y = layers.Conv2D(
                        2,
                        1,
                        use_bias=True,
                        kernel_initializer=init_ops.ones_initializer(),
                        name='conv3')(y)
                loss = math_ops.reduce_sum(y)
                optimizer = gradient_descent.GradientDescentOptimizer(0.1)
                train = optimizer.minimize(loss)
                with ops.control_dependencies([train]):
                    i = array_ops.identity(i)
                    loss = array_ops.identity(loss)
                    return (i, loss)

            i = 0
            loss = 0.0
            return control_flow_ops.while_loop(cond,
                                               body, (i, loss),
                                               maximum_iterations=10)

        with ipu.ops.ipu_scope("/device:IPU:0"):
            r = ipu_compiler.compile(my_net, inputs=[])

        with session_lib.Session() as sess:
            sess.run(variables.global_variables_initializer())
            c, val = sess.run(r, {})
            self.assertEqual(c, 3)
コード例 #6
0
  def testSimpleXlaCompileInference(self):
    def my_model(inp):
      output = inp * inp
      return [output]

    with ops.device("cpu"):
      inp = array_ops.placeholder(np.float32, [], name="a")

    with ops.device("/device:IPU:0"):
      out = ipu_compiler.compile(my_model, inputs=[inp])

    autoshard.automatic_sharding(2, inp, out[0])

    op_list = ops.get_default_graph().get_operations()
    for o in op_list:
      if o.device == '/device:IPU:0' and o.type != 'NoOp':
        self.assertTrue(o.get_attr('_XlaSharding') is not None)
コード例 #7
0
    def testGather(self):
        def my_net(p, i):
            # Forward pass
            a = array_ops.gather(p, i, axis=0)
            return [a]

        with ops.device('cpu'):
            X = array_ops.placeholder(dtypes.int32, [2, 4])
            Y = array_ops.placeholder(dtypes.int32, [2])

        with ipu.ops.ipu_scope("/device:IPU:0"):
            r = ipu_compiler.compile(my_net, inputs=[X, Y])

        with session_lib.Session() as sess:
            sess.run(variables.global_variables_initializer())
            result = sess.run(r, {X: [[1, 3, 5, 7], [0, 2, 4, 6]], Y: [1, 0]})
            self.assertAllClose(result[0], [[0, 2, 4, 6], [1, 3, 5, 7]])
コード例 #8
0
    def testSimpleXlaCompileTrainingInLoopWithParam(self):
        dataset = tu.create_dual_increasing_dataset(3)

        infeed_queue = ipu_infeed_queue.IPUInfeedQueue(dataset, "feed")

        def my_net(lr):
            def my_model(lr, loss, x, y):
                with ipu.ops.ipu_scope("/device:IPU:0"):
                    inp = x

                    x = layers.Conv2D(8,
                                      3,
                                      padding='same',
                                      name="conv1",
                                      use_bias=False)(x)
                    x = math_ops.reduce_max(x, axis=[1, 2])

                    cross_entropy = nn.softmax_cross_entropy_with_logits_v2(
                        logits=x, labels=array_ops.stop_gradient(y))
                    loss = math_ops.reduce_mean(cross_entropy)

                    optim = so.ShardedOptimizer(
                        gd.GradientDescentOptimizer(lr))
                    train = optim.minimize(cross_entropy)

                    autoshard.automatic_sharding(2, inp, loss)

                    return [lr, loss, train]

            loss = 0.0
            return loops.repeat(2, my_model, [lr, loss], infeed_queue)

        lr = array_ops.placeholder(dtypes.float32, [])
        out = ipu_compiler.compile(my_net, inputs=[lr])

        cfg = ipu.utils.create_ipu_config(profiling=False)
        cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False)
        cfg = ipu.utils.auto_select_ipus(cfg, 2)
        ipu.utils.configure_ipu_system(cfg)

        with session_lib.Session() as sess:
            sess.run(infeed_queue.initializer)
            sess.run(variables.global_variables_initializer())
            sess.run(out[0], {lr: 0.1})
コード例 #9
0
    def testWhileLoopTupleOfTuples(self):
        # This test makes sure that we can handle tuple of tuples for while loops
        random_seed.set_random_seed(1)
        dataType = dtypes.float32
        num_input = 14
        timesteps = 2
        num_units = 128

        def RNN(x):
            # Define a GRU cell with tensorflow
            gru_cell = nn.rnn_cell.GRUCell(num_units, name="GRU")
            # Get gru cell output
            outputs, states = nn.dynamic_rnn(gru_cell, x, dtype=dataType)
            return outputs[-1]

        def my_net(X, Y):
            # Forward pass
            logits = RNN(X)
            # Loss
            cross_entropy = math_ops.reduce_mean(
                nn.softmax_cross_entropy_with_logits_v2(
                    logits=logits, labels=array_ops.stop_gradient(Y)))
            # Training
            train = gradient_descent.GradientDescentOptimizer(0.01).minimize(
                cross_entropy)
            return [cross_entropy, train]

        with ops.device('cpu'):
            X = array_ops.placeholder(dataType, [1, timesteps, num_input])
            Y = array_ops.placeholder(dataType, [1, timesteps, num_units])

        with ipu.ops.ipu_scope("/device:IPU:0"):
            r = ipu_compiler.compile(my_net, inputs=[X, Y])

        with session_lib.Session() as sess:
            sess.run(variables.global_variables_initializer())
            result = sess.run(r, {X: np.ones(X.shape), Y: np.ones(Y.shape)})
            # Compare the value - check that the loss is within 1 of the expected
            # value obtained by running on XLA_CPU.
            self.assertAllClose(result[0], 621.9, rtol=1)
コード例 #10
0
    def testNestedWhileLoopsSimplified(self):
        def my_net(x):
            def cond(i, x):
                return i < 3

            def cond1(j, x):
                return j < 2

            def body1(j, x):
                j = j + 1
                x = x * 2
                return (j, x)

            def body(i, x):
                i = i + 1
                j = 0
                _, x = control_flow_ops.while_loop(cond1,
                                                   body1, (j, x),
                                                   maximum_iterations=10)
                return (i, x)

            i = 0
            a, b = control_flow_ops.while_loop(cond,
                                               body, (i, x),
                                               maximum_iterations=10)
            return (a, b)

        with ops.device('cpu'):
            x = array_ops.placeholder(dtypes.int32, [4])

        with ipu.ops.ipu_scope("/device:IPU:0"):
            r = ipu_compiler.compile(my_net, inputs=[x])

        with session_lib.Session() as sess:
            sess.run(variables.global_variables_initializer())
            c, val = sess.run(r, {x: np.full([4], 2, dtype=np.int32)})
            self.assertEqual(c, 3)
            self.assertAllClose(val, np.full([4], 128))
コード例 #11
0
  def testDifferentArgs(self):
    def my_model(pcond, pa, pb, pc):
      output = control_flow_ops.cond(
          pcond, true_fn=lambda: pa + pb, false_fn=lambda: pb - pc)
      return [output]

    with ops.device("cpu"):
      pcond = array_ops.placeholder(np.bool, [], name="pred")
      pa = array_ops.placeholder(np.float32, [], name="a")
      pb = array_ops.placeholder(np.float32, [], name="b")
      pc = array_ops.placeholder(np.float32, [], name="c")

    with ops.device("/device:IPU:0"):
      r = ipu_compiler.compile(my_model, inputs=[pcond, pa, pb, pc])

    with session_lib.Session() as sess:

      fd = {pcond: True, pa: 1., pb: 2., pc: 3.}
      result = sess.run(r[0], fd)
      self.assertAllClose(result, 3.)

      fd = {pcond: False, pa: 1., pb: 2., pc: 3.}
      result = sess.run(r[0], fd)
      self.assertAllClose(result, -1.)
コード例 #12
0
    def testInplaceOpsInRepeats(self):
        def my_net(x):
            def cond(i, x):
                return i < 3

            def body(i, x):
                i = i + 1
                x = nn.relu(x * x)
                return (i, x)

            i = 0
            return control_flow_ops.while_loop(cond, body, (i, x))

        with ops.device('cpu'):
            x = array_ops.placeholder(dtypes.float32, [4])

        with ipu.ops.ipu_scope("/device:IPU:0"):
            r = ipu_compiler.compile(my_net, inputs=[x])

        with session_lib.Session() as sess:
            sess.run(variables.global_variables_initializer())
            (c, x) = sess.run(r, {x: np.full([4], 2)})
            self.assertEqual(c, 3)
            self.assertAllClose(x, np.full([4], 256))
コード例 #13
0
    def testTfLstmInWhileV1(self):
        dataset = tu.create_dual_increasing_dataset(3,
                                                    data_shape=[4, 1, 8],
                                                    label_shape=[4, 1, 128])

        infeed_queue = ipu_infeed_queue.IPUInfeedQueue(dataset, "feed")

        def my_net():
            def my_model(loss, x, y):
                with ipu.ops.ipu_scope("/device:IPU:0"):
                    lstm_cell = rnn_cell.LSTMCell(128)
                    x, _ = rnn.dynamic_rnn(cell=lstm_cell,
                                           inputs=x,
                                           dtype=dtypes.float32,
                                           time_major=True)

                    cross_entropy = nn.softmax_cross_entropy_with_logits_v2(
                        logits=x, labels=array_ops.stop_gradient(y))
                    loss = math_ops.reduce_mean(cross_entropy)

                    optim = gradient_descent.GradientDescentOptimizer(0.01)
                    train = optim.minimize(cross_entropy)

                    return [loss, train]

            loss = 0.0
            return loops.repeat(10,
                                my_model, [loss],
                                infeed_queue,
                                use_while_v1=True)

        out = ipu_compiler.compile(my_net, inputs=[])

        cfg = ipu.utils.create_ipu_config(profiling=True)
        cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False)
        cfg = ipu.utils.auto_select_ipus(cfg, 1)
        ipu.utils.configure_ipu_system(cfg)

        with session_lib.Session() as sess:
            sess.run(infeed_queue.initializer)
            sess.run(variables.global_variables_initializer())
            sess.run(out[0], {})

        def testRepeatLoopGradient(self):
            def model(features):
                a = variable_scope.get_variable("a", initializer=1.0)

                def body(x):
                    return a * x

                logits = ipu.loops.repeat(5, body, [features])
                loss = math_ops.reduce_sum(logits)
                optimizer = momentum.MomentumOptimizer(learning_rate=.001,
                                                       momentum=0.9)
                grads_and_vars = optimizer.compute_gradients(loss)
                train_op = optimizer.apply_gradients(grads_and_vars)
                return a, loss, train_op

            with ops.device('cpu'):
                features = array_ops.placeholder(dtypes.float32, shape=[10])

            with ipu.ops.ipu_scope('/device:IPU:0'):
                ret = ipu.ipu_compiler.compile(model, [features])

                options = ipu.utils.create_ipu_config()
                options = ipu.utils.auto_select_ipus(options, 1)
                ipu.utils.configure_ipu_system(options)

            with session_lib.Session() as sess:
                sess.run(variables.global_variables_initializer())
                x, z = sess.run(ret, feed_dict={features: np.ones([10])})
                self.assertEqual(x, 1)