Beispiel #1
0
def get_config(opts):
    """Builds ipu_options"""
    profile = opts.report

    config = utils.create_ipu_config(profiling=profile,
                                     profile_execution=profile,
                                     report_every_nth_execution=1)
    if opts.device_id == -1:
        config = utils.auto_select_ipus(config, opts.shards * opts.replicas)
    else:
        config = utils.select_ipus(config, [opts.device_id])

    if opts.convolution_options:
        config = utils.set_convolution_options(
            config, json.loads(opts.convolution_options))

    if opts.matmul_options:
        config = utils.set_matmul_options(config,
                                          json.loads(opts.matmul_options))

    if opts.enable_half_partials:
        config = utils.set_matmul_options(config, {"partialsType": 'half'})
        config = utils.set_convolution_options(config,
                                               {"partialsType": 'half'})
    return config
Beispiel #2
0
    def testCrossReplicaAndStatefulGradientAccumulate(self):
        with self.session() as sess:
            dtype = np.float32

            def my_net(y):
                def cond(i, y):
                    del y
                    return i < 10

                def body(i, y):
                    cr = gen_popops_ops.ipu_cross_replica_sum(
                        array_ops.ones_like(y))
                    ga = gen_poputil_ops.ipu_stateful_gradient_accumulate(
                        cr, num_mini_batches=5)
                    y = y + ga
                    i = i + 1
                    return (i, y)

                i = 0
                return control_flow_ops.while_loop(cond, body, (i, y))

            with ops.device('cpu'):
                y = array_ops.placeholder(dtype, [1])

            opts = utils.create_ipu_config()
            opts = utils.auto_select_ipus(opts, num_ipus=2)
            utils.configure_ipu_system(opts)

            with ops.device("/device:IPU:0"):
                r = xla.compile(my_net, inputs=[y])

            y = sess.run(r, {y: [10]})
            self.assertEqual(y[0], 10)
            self.assertAllEqual(y[1], [30])
Beispiel #3
0
    def testCborReport(self):
        with self.session() as sess:
            with ops.device("/device:IPU:0"):
                pa = array_ops.placeholder(np.float32, [2, 2], name="a")
                pb = array_ops.placeholder(np.float32, [2, 2], name="b")
                out = math_ops.add(pa, pb)

            with ops.device('cpu'):
                report = gen_ipu_ops.ipu_event_trace()

            opts = utils.create_ipu_config(profiling=True,
                                           profile_execution=True,
                                           use_poplar_text_report=False,
                                           use_poplar_cbor_report=True)
            utils.configure_ipu_system(opts)

            fd = {pa: [[1., 1.], [2., 3.]], pb: [[0., 1.], [4., 5.]]}
            sess.run(report, fd)

            sess.run(out, fd)

            rep = sess.run(report, fd)
            evts = utils.extract_all_events(rep)
            self.assertEqual(len(evts), 4)  # engine, begin, end, execute

            self.assertEqual(evts[1].compile_end.compilation_report[0],
                             bytes(bytearray([217]))[0])
            self.assertEqual(evts[3].execute.execution_report[0],
                             bytes(bytearray([217]))[0])
Beispiel #4
0
    def testIpuModelDeviceWithMultipleReport(self):
        with self.session() as sess:
            with ops.device("/device:IPU:0"):
                pa = array_ops.placeholder(np.float32, [2, 2], name="a")
                pb = array_ops.placeholder(np.float32, [2, 2], name="b")
                out1 = pa + pb
                out2 = pa - pb

            with ops.device('cpu'):
                with ops.control_dependencies([out1, out2]):
                    report = gen_ipu_ops.ipu_event_trace()

            opts = utils.create_ipu_config(profiling=True,
                                           profile_execution=True)
            utils.configure_ipu_system(opts)

            fd = {pa: [[1., 1.], [2., 3.]], pb: [[0., 1.], [4., 5.]]}
            sess.run(report, fd)

            result = sess.run(out1, fd)
            self.assertAllClose(result, [[1., 2.], [6., 8.]])

            result, rep = sess.run([out2, report], fd)
            self.assertAllClose(result, [[1., 0.], [-2., -2.]])

            # 2x engine, 2x compile_begin, 2x compile_end, 2x load engine
            self.assertEqual(len(rep), 8)
Beispiel #5
0
    def testPrefixPathWithTranspose(self):
        with self.session() as sess:
            with ops.device("/device:IPU:0"):
                x = array_ops.placeholder(np.float32, shape=[1, 4, 4, 2])
                z = array_ops.placeholder(np.float32, shape=[4, 4, 2, 1])

                with variable_scope.variable_scope("vs", use_resource=True):
                    y = layers.Conv2D(
                        2,
                        1,
                        use_bias=True,
                        kernel_initializer=init_ops.ones_initializer())(x)
                res = array_ops.transpose(y, [1, 2, 3, 0]) + z

            opts = utils.create_ipu_config()
            utils.configure_ipu_system(opts)

            sess.run(variables.global_variables_initializer())

            result = sess.run(
                res, {
                    x: np.reshape(np.arange(32), [1, 4, 4, 2]),
                    z: np.ones([4, 4, 2, 1])
                })
            self.assertAllClose(result, [[[[2.], [2.]], [[6.], [6.]],
                                          [[10.], [10.]], [[14.], [14.]]],
                                         [[[18.], [18.]], [[22.], [22.]],
                                          [[26.], [26.]], [[30.], [30.]]],
                                         [[[34.], [34.]], [[38.], [38.]],
                                          [[42.], [42.]], [[46.], [46.]]],
                                         [[[50.], [50.]], [[54.], [54.]],
                                          [[58.], [58.]], [[62.], [62.]]]])
Beispiel #6
0
def get_config(report_n=1):
    """Builds ipu_options"""

    config = utils.create_ipu_config(profiling=False, use_poplar_text_report=False, report_every_nth_execution=report_n)
    config = utils.auto_select_ipus(config, [1])

    return config
Beispiel #7
0
  def testIoTilesAreExcludedFromShard(self):
    def my_net(a, b):
      with ipu_shard(0):
        aa = math_ops.matmul(a, a, transpose_b=True, name="aa")
      with ipu_shard(1):
        bb = math_ops.matmul(b, b, transpose_b=True, name="bb")
      return aa, bb

    input_a = array_ops.placeholder(np.float32, [1216, 1])
    input_b = array_ops.placeholder(np.float32, [1216, 1])

    with ops.device("/device:IPU:0"):
      compiled_net = ipu_compiler.compile(my_net, inputs=[input_a, input_b])

    num_io_tiles = 128
    cfg = ipu_utils.create_ipu_config(profiling=True)
    cfg = ipu_utils.set_gcl_options(cfg, num_io_tiles=num_io_tiles)
    cfg = ipu_utils.auto_select_ipus(cfg, num_ipus=2)
    ipu_utils.configure_ipu_system(cfg)

    with session.Session() as sess:
      report = ReportJSON(self, sess, configure_device=False)
      report.reset()

      sess.run(compiled_net, {
          input_a: np.ones(input_a.shape),
          input_b: np.ones(input_b.shape)
      })

      report.parse_log()
      num_compute_tiles = report.get_num_tiles_per_ipu() - num_io_tiles
      for t in report.get_tensor_map().all_tensors():
        self.assertLessEqual(len(t.tiles), num_compute_tiles)
Beispiel #8
0
    def testPrefixPathWithReshape(self):
        with self.session() as sess:
            with ops.device("/device:IPU:0"):
                x = array_ops.placeholder(np.float32, shape=[1, 4, 4, 2])
                z = array_ops.placeholder(np.float32, shape=[32])

                with variable_scope.variable_scope("vs", use_resource=True):
                    y = layers.Conv2D(
                        2,
                        1,
                        use_bias=True,
                        kernel_initializer=init_ops.ones_initializer())(x)
                res = gen_array_ops.reshape(y, [32]) + z

            opts = utils.create_ipu_config()
            utils.configure_ipu_system(opts)

            sess.run(variables.global_variables_initializer())

            result = sess.run(res, {
                x: np.reshape(np.arange(32), [1, 4, 4, 2]),
                z: np.ones([32])
            })
            # Confirmed with values on the CPU.
            self.assertAllClose(result, [
                2., 2., 6., 6., 10., 10., 14., 14., 18., 18., 22., 22., 26.,
                26., 30., 30., 34., 34., 38., 38., 42., 42., 46., 46., 50.,
                50., 54., 54., 58., 58., 62., 62.
            ])
Beispiel #9
0
    def testStatefulGradientAccumulate(self):
        with self.session() as sess:
            dtype = np.float32

            def my_net(y):
                def cond(i, x, y):
                    del x
                    del y
                    return i < 10

                def body(i, x, y):
                    x = x + gen_poputil_ops.ipu_stateful_gradient_accumulate(
                        array_ops.ones_like(x),
                        num_mini_batches=5,
                        verify_usage=False)
                    y = y + array_ops.ones_like(x)
                    i = i + 1
                    return (i, x, y)

                i = 0
                return control_flow_ops.while_loop(cond, body, (i, y, y))

            with ops.device('cpu'):
                y = array_ops.placeholder(dtype, [1])

            opts = utils.create_ipu_config()
            utils.configure_ipu_system(opts)

            with ops.device("/device:IPU:0"):
                r = xla.compile(my_net, inputs=[y])

            y = sess.run(r, {y: [10]})
            self.assertEqual(y[0], 10)
            self.assertAllEqual(y[1], [20])
            self.assertAllEqual(y[2], [20])
Beispiel #10
0
    def testStatefulGradientAccumulateInvalidUse(self):
        with self.session() as sess:
            dtype = np.float32

            def my_net(y):
                def cond(i, x, y):
                    del x
                    del y
                    return i < 10

                def body(i, x, y):
                    x = x + gen_poputil_ops.ipu_stateful_gradient_accumulate(
                        array_ops.ones_like(x), num_mini_batches=5)
                    y = y + array_ops.ones_like(x)
                    i = i + 1
                    return (i, x, y)

                i = 0
                return control_flow_ops.while_loop(cond, body, (i, y, y))

            with ops.device('cpu'):
                y = array_ops.placeholder(dtype, [1])

            opts = utils.create_ipu_config()
            utils.configure_ipu_system(opts)

            with ops.device("/device:IPU:0"):
                r = xla.compile(my_net, inputs=[y])

            with self.assertRaisesRegex(
                    errors.FailedPreconditionError,
                    "The .*IpuStatefulGradientAccumulate op"):
                sess.run(r, {y: [10]})
Beispiel #11
0
    def testTrainReplicated(self):
        if ipu_utils.running_on_ipu_model():
            self.skipTest(
                "Replicated top level graphs are not supported on the "
                "IPU_MODEL target")

        def my_model_fn(features, labels, mode):  # pylint: disable=unused-argument
            self.assertEqual(model_fn_lib.ModeKeys.TRAIN, mode)

            loss = ipu.ops.cross_replica_ops.cross_replica_sum(features,
                                                               name="loss")

            train_op = array_ops.identity(loss)

            return model_fn_lib.EstimatorSpec(mode=mode,
                                              loss=loss,
                                              train_op=train_op)

        def my_input_fn():
            dataset = tu.create_dual_increasing_dataset(10,
                                                        data_shape=[1],
                                                        label_shape=[1])
            dataset = dataset.batch(batch_size=1, drop_remainder=True)
            return dataset

        ipu_options = ipu_utils.create_ipu_config()
        ipu_options = ipu_utils.auto_select_ipus(ipu_options, 4)
        config = ipu_run_config.RunConfig(
            ipu_run_config=ipu_run_config.IPURunConfig(
                iterations_per_loop=2, num_replicas=4,
                ipu_options=ipu_options),
            log_step_count_steps=1,
            save_summary_steps=1)

        estimator = ipu_estimator.IPUEstimator(model_fn=my_model_fn,
                                               config=config)

        session_run_counter = _SessionRunCounter()

        num_steps = 6
        estimator.train(input_fn=my_input_fn,
                        steps=num_steps,
                        hooks=[session_run_counter])

        self.assertEqual(
            session_run_counter.num_session_runs,
            num_steps // config.ipu_run_config.iterations_per_loop)

        model_dir = estimator.model_dir
        events_file = glob.glob(model_dir + "/*tfevents*")
        assert len(events_file) == 1
        events_file = events_file[0]
        loss_output = list()
        for e in summary_iterator.summary_iterator(events_file):
            for v in e.summary.value:
                if "loss" in v.tag:
                    loss_output.append(v.simple_value)

        # loss is averaged across iterations per loop
        self.assertEqual(loss_output, [14.0, 16.0, 18.0])
Beispiel #12
0
  def testNumUniqueDevicesBelowNumShardsRange(self):
    def model_fn_with_zero_stages(mode):
      def optimizer_function():
        pass

      return IPUPipelineEstimatorSpec(mode,
                                      computational_stages=[],
                                      gradient_accumulation_count=1,
                                      device_mapping=[0, 1, 0],
                                      optimizer_function=optimizer_function)

    def my_input_fn():
      return dataset_ops.Dataset.from_tensor_slices(([0], [0]))

    ipu_options = ipu_utils.create_ipu_config()
    ipu_options = ipu_utils.auto_select_ipus(ipu_options, num_ipus=4)
    config = ipu_run_config.RunConfig(
        ipu_run_config=ipu_run_config.IPURunConfig(
            num_shards=4, iterations_per_loop=1, ipu_options=ipu_options))

    estimator = IPUPipelineEstimator(model_fn=model_fn_with_zero_stages,
                                     config=config)

    with self.assertRaisesRegex(
        ValueError, r"This pipeline requires 2 devices, but "
        "`IPURunConfig.num_shards` was set to 4"):
      estimator.train(input_fn=my_input_fn, steps=1)
Beispiel #13
0
    def testPrefixPathWithElementwiseInPath(self):
        with self.session() as sess:
            with ops.device("/device:IPU:0"):
                x = array_ops.placeholder(np.float32, shape=[1, 4, 4, 2])
                z = array_ops.placeholder(np.float32, shape=[1, 4, 4, 2])
                s = array_ops.placeholder(np.float32, shape=[])

                with variable_scope.variable_scope("vs", use_resource=True):
                    y = layers.Conv2D(
                        2,
                        1,
                        use_bias=True,
                        kernel_initializer=init_ops.ones_initializer())(x)
                res = y + z * s

            opts = utils.create_ipu_config()
            utils.configure_ipu_system(opts)

            sess.run(variables.global_variables_initializer())

            result = sess.run(
                res, {
                    x: np.reshape(np.arange(32), [1, 4, 4, 2]),
                    z: np.reshape(np.arange(32), [1, 4, 4, 2]),
                    s: 2.0
                })
            # Confirmed with values on the CPU.
            self.assertAllClose(
                result,
                [[[[1., 3.], [9., 11.], [17., 19.], [25., 27.]],
                  [[33., 35.], [41., 43.], [49., 51.], [57., 59.]],
                  [[65., 67.], [73., 75.], [81., 83.], [89., 91.]],
                  [[97., 99.], [105., 107.], [113., 115.], [121., 123.]]]])
Beispiel #14
0
    def testIpuEventsWithoutPoplarReporting(self):
        with self.session() as sess:
            with ops.device("/device:IPU:0"):
                pa = array_ops.placeholder(np.float32, [2, 2], name="a")
                pb = array_ops.placeholder(np.float32, [2, 2], name="b")
                out = math_ops.add(pa, pb)

            with ops.device('cpu'):
                report = gen_ipu_ops.ipu_event_trace()

            opts = utils.create_ipu_config(profiling=False,
                                           enable_ipu_events=True)
            utils.configure_ipu_system(opts)

            fd = {pa: [[1., 1.], [2., 3.]], pb: [[0., 1.], [4., 5.]]}
            sess.run(report, fd)

            sess.run(out, fd)

            rep = sess.run(report, fd)
            evts = utils.extract_all_events(rep)
            self.assertEqual(len(evts),
                             3)  # compile begin, compile end, execute

            for e in evts:
                if e.type == IpuTraceEvent.COMPILE_END:
                    self.assertFalse(e.compile_end.compilation_report)
                if e.type == IpuTraceEvent.EXECUTE:
                    self.assertFalse(e.execute.execution_report)

            sess.close()
Beispiel #15
0
    def testSendScalar(self, dtype):
        with self.session() as sess:

            def device_fn(x):
                return gen_sendrecv_ops.ipu_send_to_host(
                    x,
                    tensor_name="test_tensor",
                    send_device="/device:IPU:0",
                    send_device_incarnation=0,
                    recv_device="/device:CPU:0")

            inputs = array_ops.placeholder(dtype=dtype, shape=())

            with ipu_scope("/device:IPU:0"):
                send_op = ipu_compiler.compile(device_fn, inputs=[inputs])

            with ops.device("/device:CPU:0"):
                recv_op = gen_sendrecv_ops.ipu_recv_at_host(
                    T=dtype,
                    tensor_name="test_tensor",
                    send_device="/device:IPU:0",
                    send_device_incarnation=0,
                    recv_device="/device:CPU:0")

            opts = utils.create_ipu_config()
            utils.configure_ipu_system(opts)

            sent, received = sess.run([send_op, recv_op],
                                      feed_dict={inputs: 1})

            self.assertIsNone(sent)  # Send op has no output
            self.assertEqual(dtype, received.dtype)
            self.assertEqual(0, len(received.shape))
            self.assertEqual(1, received)
Beispiel #16
0
    def test_ipu_horovod_strategy(self):
        hvd_size = hvd.size()
        hvd_rank = hvd.rank()

        strategy = IPUHorovodStrategy()
        self.assertEqual(strategy.num_replicas_in_sync, hvd_size)

        cfg = ipu_utils.create_ipu_config()
        cfg = ipu_utils.auto_select_ipus(cfg, num_ipus=1)
        ipu_utils.configure_ipu_system(cfg)

        with strategy.scope():

            def per_replica_fn():
                w = variable_scope.get_variable(name="w",
                                                initializer=hvd_rank + 1.0)
                self.assertEqual("/replica:0/task:0/device:IPU:0", w.device)
                return w * w

            per_replica_val = strategy.experimental_run_v2(per_replica_fn)
            strategy_sum = strategy.reduce(ReduceOp.SUM, per_replica_val)
            strategy_mean = strategy.reduce(ReduceOp.MEAN, per_replica_val)

            with session.Session() as sess:
                sess.run(variables.global_variables_initializer())

                # All workers should have the initial value from the first worker.
                self.assertEqual([1.0], sess.run(variables.global_variables()))
                self.assertEqual(1.0 * hvd_size, strategy_sum.eval())
                self.assertEqual(1.0, strategy_mean.eval())
Beispiel #17
0
    def testVectorInputOutput(self):
        with self.session() as sess:

            def device_fn(x):
                with ipu_scope("/device:IPU:0"):
                    x = x + x
                    with outside_compilation_scope():
                        # Use float64 which is not supported on IPU
                        x = math_ops.cast(x, dtype=dtypes.float64)
                        c = constant_op.constant(2.0,
                                                 dtype=dtypes.float64,
                                                 shape=(2, ))
                        x += c
                        x = math_ops.cast(x, dtype=dtypes.float32)
                    x = x + 2.0
                return x

            inputs = array_ops.placeholder(dtype=dtypes.float32, shape=(2, ))
            [device_out] = ipu_compiler.compile(device_fn, inputs=[inputs])

            opts = utils.create_ipu_config()
            utils.configure_ipu_system(opts)
            result = sess.run(device_out, feed_dict={inputs: [1.0, 2.0]})
            self.assertEqual((2, ), result.shape)
            self.assertAllEqual([6.0, 8.0], result)
Beispiel #18
0
    def testSentTensorIsUsedAfterReceive(self):
        with self.session() as sess:

            def device_fn(x):
                with ipu_scope("/device:IPU:0"):
                    x *= x  # 4

                    with outside_compilation_scope():
                        y = x + 1.0  # 5

                    # Use `x` after receiving `y` and make sure that we still have the correct
                    # value of `x` (i.e. it is not overwritten by the receive, in which case
                    # we would get 25).
                    z = x * y  # 20

                    return z

            inputs = array_ops.placeholder(dtype=dtypes.float32, shape=())
            [out] = ipu_compiler.compile(device_fn, inputs=[inputs])

            opts = utils.create_ipu_config()
            utils.configure_ipu_system(opts)

            res = sess.run(out, feed_dict={inputs: 2.0})
            self.assertEqual(20.0, res)
Beispiel #19
0
    def testTwoInputsTwoOutputs(self):
        with self.session() as sess:

            def device_fn(x1, x2):
                with ipu_scope("/device:IPU:0"):
                    x1 *= x1
                    x2 *= x2
                    with outside_compilation_scope():
                        x1 += 1.0
                        x2 += 2.0
                    x1 *= 1.0
                    x2 *= 2.0
                    return x1, x2

            input1 = array_ops.placeholder(dtype=dtypes.float32, shape=())
            input2 = array_ops.placeholder(dtype=dtypes.float32, shape=())
            out1, out2 = ipu_compiler.compile(device_fn,
                                              inputs=[input1, input2])

            opts = utils.create_ipu_config()
            opts = utils.set_optimization_options(opts,
                                                  max_send_recv_cluster_size=8)
            utils.configure_ipu_system(opts)

            res1, res2 = sess.run([out1, out2],
                                  feed_dict={
                                      input1: 1.0,
                                      input2: 2.0
                                  })
            self.assertEqual(2.0, res1)
            self.assertEqual(12.0, res2)
Beispiel #20
0
    def testReportEveryNthExecution_Every1(self):
        with self.session() as sess:
            with ops.device("/device:IPU:0"):
                pa = array_ops.placeholder(np.float32, [2, 2], name="a")
                pb = array_ops.placeholder(np.float32, [2, 2], name="b")
                out = math_ops.add(pa, pb)

            with ops.device('cpu'):
                report = gen_ipu_ops.ipu_event_trace()

            opts = utils.create_ipu_config(profiling=True,
                                           profile_execution=True,
                                           report_every_nth_execution=1,
                                           use_poplar_text_report=False)
            utils.configure_ipu_system(opts)

            fd = {pa: [[1., 1.], [2., 3.]], pb: [[0., 1.], [4., 5.]]}
            sess.run(report, fd)

            sess.run(out, fd)
            sess.run(out, fd)
            sess.run(out, fd)
            sess.run(out, fd)
            sess.run(out, fd)

            rep = sess.run(report, fd)
            r = tu.ReportJSON(self)
            types = r.parse_events(rep)
            self.assertEqual(types[IpuTraceEvent.EXECUTE], 5)
            self.assertEqual(len(r.get_execution_reports()), 5,
                             "Every execution should have generated a report")
Beispiel #21
0
    def testPipelineIterationsNotMultiple(self):
        dataset = tu.create_single_increasing_dataset(5, shape=[4, 4, 2])
        dataset = dataset.batch(batch_size=2, drop_remainder=True)

        def dataset_parser(value):
            a = value
            b = (value + 10.) / 2.0
            return {"a": a, "b": b}

        dataset = dataset.map(dataset_parser)
        infeed_queue = ipu_infeed_queue.IPUInfeedQueue(dataset, "__feed1")
        outfeed_queue = ipu_outfeed_queue.IPUOutfeedQueue("__feed1")

        def stage1(c, **kwargs):
            with variable_scope.variable_scope("vs", use_resource=True):
                y = layers.Conv2D(
                    2,
                    1,
                    use_bias=True,
                    kernel_initializer=init_ops.ones_initializer(),
                    name='conv1')(kwargs["a"])
                return y + kwargs["b"], c

        def stage2(x, c):
            return math_ops.reduce_sum(x) + c

        def stage3(x):
            return x

        def my_net(c):
            return pipelining_ops.pipeline(
                [stage1, stage2, stage3],
                10,
                inputs=[c],
                infeed_queue=infeed_queue,
                outfeed_queue=outfeed_queue,
                pipeline_schedule=pipelining_ops.PipelineSchedule.Grouped)

        with ops.device('cpu'):
            c = array_ops.placeholder(np.float32, shape=[])

        with tu.ipu_session() as sess:

            with ops.device("/device:IPU:0"):
                r = ipu_compiler.compile(my_net, inputs=[c])

            cfg = utils.create_ipu_config(profiling=True,
                                          profile_execution=True)
            cfg = utils.auto_select_ipus(cfg, 4)
            utils.configure_ipu_system(cfg)
            utils.move_variable_initialization_to_cpu()

            sess.run(variables.global_variables_initializer())
            sess.run(infeed_queue.initializer)
            with self.assertRaisesRegex(
                    errors.FailedPreconditionError,
                    'The pipeline depth of the pipeline must be a multiple of 3'
            ):
                sess.run(r, {c: 10.01})
Beispiel #22
0
def _gradient_accumulation_loop(test_wrapper,
                                fwd_fn,
                                inputs_fn,
                                input_values,
                                repeat_count,
                                num_batches_to_accumulate,
                                dataset_fn,
                                optimizer,
                                num_iterations=None):
  g = ops.Graph()

  if num_iterations is None:
    num_iterations = repeat_count * num_batches_to_accumulate

  with g.as_default(), test_wrapper.test_session(graph=g) as session:
    dataset = dataset_fn()
    inputs = inputs_fn()
    infeed_queue = ipu_infeed_queue.IPUInfeedQueue(dataset, next_feed_id())
    outfeed_queue = ipu_outfeed_queue.IPUOutfeedQueue(next_feed_id())

    with variable_scope.variable_scope("ipu", use_resource=True, reuse=False):

      def model(*args):
        loss = fwd_fn(*functional_ops._convert_to_list(args))  # pylint: disable=W0212
        enqueue_op = outfeed_queue.enqueue(loss)
        opt = gradient_accumulation_optimizer.GradientAccumulationOptimizerV2(
            optimizer, num_batches_to_accumulate)
        outs = list(args[:len(args) - infeed_queue.number_of_tuple_elements])
        outs.append(enqueue_op)
        outs.append(opt.minimize(loss))
        return outs

      def my_net(*args):
        return loops.repeat(num_iterations,
                            model,
                            inputs=args,
                            infeed_queue=infeed_queue)

    with ops.device("/device:IPU:0"):
      loop_ret = ipu_compiler.compile(my_net, inputs=inputs)

    outfeed_op = outfeed_queue.dequeue()

    profiling = utils.running_on_ipu_model()

    cfg = utils.create_ipu_config(profiling=profiling,
                                  profile_execution=profiling)
    cfg = utils.set_ipu_model_options(cfg,
                                      compile_ipu_code=True,
                                      tiles_per_ipu=128)
    cfg = utils.auto_select_ipus(cfg, 1)
    utils.configure_ipu_system(cfg)
    utils.move_variable_initialization_to_cpu()

    session.run(variables.global_variables_initializer())
    session.run(infeed_queue.initializer)
    session.run(loop_ret, feed_dict=dict(zip(inputs, input_values)))
    return session.run(outfeed_op)
Beispiel #23
0
    def testTrainWithAutomaticSharding(self):
        if ipu_utils.running_on_ipu_model():
            self.skipTest(
                "Replicated top level graphs are not supported on the "
                "IPU_MODEL target")

        def my_model_fn(features, labels, mode):
            self.assertEqual(model_fn_lib.ModeKeys.TRAIN, mode)

            with variable_scope.variable_scope("vs", use_resource=True):
                predictions = layers.Dense(units=1)(features)

            loss = losses.mean_squared_error(labels=labels,
                                             predictions=predictions)
            sharded_optimizer_obj = sharded_optimizer.ShardedOptimizer(
                gradient_descent.GradientDescentOptimizer(0.1))
            train_op = sharded_optimizer_obj.minimize(loss)

            return model_fn_lib.EstimatorSpec(mode=mode,
                                              loss=loss,
                                              train_op=train_op)

        def my_input_fn():
            dataset = dataset_ops.Dataset.from_tensor_slices(
                _create_regression_dataset(num_samples=1000, num_features=5))
            dataset = dataset.batch(batch_size=2, drop_remainder=True).repeat()
            return dataset

        ipu_options = ipu_utils.create_ipu_config()
        ipu_options = ipu_utils.auto_select_ipus(ipu_options, 4)

        config = ipu_run_config.RunConfig(
            ipu_run_config=ipu_run_config.IPURunConfig(
                iterations_per_loop=2,
                num_shards=4,
                autosharding=True,
                ipu_options=ipu_options),
            log_step_count_steps=1,
            save_summary_steps=1)

        estimator = ipu_estimator.IPUEstimator(model_fn=my_model_fn,
                                               config=config)

        estimator.train(input_fn=my_input_fn, steps=10)

        model_dir = estimator.model_dir
        events_file = glob.glob(model_dir + "/*tfevents*")
        assert len(events_file) == 1
        events_file = events_file[0]
        loss_output = list()
        for e in summary_iterator.summary_iterator(events_file):
            for v in e.summary.value:
                if "loss" in v.tag:
                    loss_output.append(v.simple_value)

        self.assertTrue(loss_output[0] > loss_output[-1])
Beispiel #24
0
def get_ipu_config(fp_exceptions=True,
                   stochastic_rounding=True,
                   xla_recompute=False,
                   available_memory_proportion=None,
                   disable_graph_outlining=False,
                   num_ipus_required=0,
                   max_cross_replica_sum_buffer_size=0,
                   scheduler_selection='',
                   compile_only=False,
                   partials_type="half"):
    """Builds ipu_options"""
    config = utils.create_ipu_config(
        max_report_size=3001819596000,
        merge_infeed_io_copies=True,
        always_rearrange_copies_on_the_host=False,
        selection_order=utils.SelectionOrder.AUTO,
        disable_graph_outlining=disable_graph_outlining,
        max_cross_replica_sum_buffer_size=max_cross_replica_sum_buffer_size,
        scheduler_selection=scheduler_selection)

    config = utils.auto_select_ipus(config, num_ipus_required)

    config = utils.set_matmul_options(config, clear_pass_type=True)

    if available_memory_proportion is not None:
        config = utils.set_convolution_options(
            config, {
                "availableMemoryProportion": str(available_memory_proportion),
                "partialsType": partials_type
            })
        config = utils.set_matmul_options(
            config, {
                "availableMemoryProportion": str(available_memory_proportion),
                "partialsType": partials_type
            })

    config = utils.set_norm_options(config, use_stable_statistics=True)

    config = utils.set_recomputation_options(config,
                                             allow_recompute=xla_recompute)

    if compile_only:
        config = utils.set_ipu_connection_type(
            config,
            utils.DeviceConnectionType.NEVER,
            ipu_version=2,
            enable_remote_buffers=True)

    config = utils.set_floating_point_behaviour_options(
        config,
        inv=fp_exceptions,
        div0=fp_exceptions,
        oflo=fp_exceptions,
        esr=stochastic_rounding,
        nanoo=fp_exceptions)
    return config
Beispiel #25
0
 def configureIPU(self,
                  serialization_folder=None,
                  offline_compilation=True):
     opts = utils.create_ipu_config()
     if offline_compilation:
         opts = utils.set_ipu_connection_type(
             opts, utils.DeviceConnectionType.NEVER, 1)
     if serialization_folder:
         opts = utils.set_serialization_options(opts, serialization_folder)
     utils.configure_ipu_system(opts)
Beispiel #26
0
 def _configureIPU(self, serialization_folder, verification_options=None):
     opts = utils.create_ipu_config()
     opts = utils.set_ipu_connection_type(opts,
                                          utils.DeviceConnectionType.NEVER,
                                          1)
     opts = utils.set_serialization_options(opts, serialization_folder)
     if verification_options:
         opts = utils.set_transfer_options(opts, True)
         opts = utils.set_verification_options(opts, verification_options)
     utils.configure_ipu_system(opts)
Beispiel #27
0
def testInput():
    config = utils.create_ipu_config()
    config = utils.auto_select_ipus(config, 1)
    config = utils.create_ipu_config(profiling=True,
                                     use_poplar_text_report=True)
    utils.configure_ipu_system(config)

    # config = utils.set_convolution_options(config, {"partialsType": str('half')})
    # config = utils.set_matmul_options(config, {"partialsType": str('half')})

    gdv = tf.Graph()
    with gdv.as_default():
        g1 = tf.GraphDef()
        # Load model with pywrap isntead? https://github.com/graphcore/examples/blob/master/applications/tensorflow/cnns/training/weight_avg.py#L33
        with tf.gfile.GFile('model.pb', 'rb') as fid:
            serialized_graph = fid.read()
            g1.ParseFromString(serialized_graph)
            tf.import_graph_def(g1, name='')

    with tf.Session(graph=gdv) as sess:
        inp_tensor = gdv.get_tensor_by_name('input:0')
        out_tensor = gdv.get_tensor_by_name(
            'InceptionV3/Predictions/Softmax:0')
        image_np = getExamples()
        #image_np = getSyntheticExamples()
        np.set_printoptions(threshold=np.inf)

        import time
        tic = time.time()

        # This is new and doesn't crash
        # But doesn't seem to do anything either
        with ipu_scope("/device:IPU:0"):
            proba = sess.run(out_tensor, {inp_tensor: image_np})
            print(proba)

        toc = time.time()
        duration = toc - tic
        num_images = len(image_np)

        print("Total time taken: {0} seconds".format(duration))
        print("Number of examples: {0}".format(num_images))
        print("Throughput: {0} im/s".format(num_images / duration))
Beispiel #28
0
    def testReplicatedEvaluationOnHost(self):
        if ipu_utils.running_on_ipu_model():
            self.skipTest(
                "Replicated top level graphs are not supported on the "
                "IPU_MODEL target")

        def my_input_fn():
            features = [0, 0, 0, 1, 0, 0, 0, 1]
            labels = [0, 1, 0, 1, 0, 1, 0, 1]
            return dataset_ops.Dataset.from_tensor_slices(
                (features, labels)).batch(2, drop_remainder=True)

        def my_metrics_fn(features, labels):
            labels64 = math_ops.cast(labels, np.int64)
            return {
                "accuracy": metrics_impl.accuracy(labels, features),
                "precision": metrics_impl.precision(labels, features),
                "recall": metrics_impl.recall(labels, features),
                "recall_at_1": metrics_impl.recall_at_k(labels64,
                                                        features,
                                                        k=1),
                "recall_at_2": metrics_impl.recall_at_k(labels64,
                                                        features,
                                                        k=2),
                "mse": metrics_impl.mean_squared_error(labels, features),
                "rmse": metrics_impl.root_mean_squared_error(labels, features),
            }

        def my_model_fn(features, labels, mode):
            loss = math_ops.cast(replication_ops.replication_index(),
                                 np.float32)
            eval_metrics = (my_metrics_fn, [features, labels])
            return ipu_estimator.IPUEstimatorSpec(mode,
                                                  loss=loss,
                                                  eval_metrics=eval_metrics)

        ipu_options = ipu_utils.create_ipu_config()
        ipu_options = ipu_utils.auto_select_ipus(ipu_options, num_ipus=4)
        config = ipu_run_config.RunConfig(
            ipu_run_config=ipu_run_config.IPURunConfig(
                iterations_per_loop=1, num_replicas=4,
                ipu_options=ipu_options))

        estimator = ipu_estimator.IPUEstimator(model_fn=my_model_fn,
                                               config=config)
        scores = estimator.evaluate(my_input_fn, steps=1)
        self.assertEqual(0.75, scores["accuracy"])
        self.assertEqual(1.0, scores["precision"])
        self.assertEqual(0.5, scores["recall"])
        self.assertEqual(0.5, scores["recall_at_1"])
        self.assertEqual(1.0, scores["recall_at_2"])
        self.assertEqual(0.25, scores["mse"])
        self.assertEqual(0.5, scores["rmse"])
        self.assertEqual(1.5, scores[model_fn_lib.LOSS_METRIC_KEY])
Beispiel #29
0
    def testResetSeed(self):
        # The dataset for feeding the graphs
        ds = dataset_ops.Dataset.from_tensors(
            array_ops.constant(1.0, shape=[SIZE]))
        ds = ds.map(lambda x: [x, x])
        ds = ds.repeat()

        # The host side queues
        infeed_queue = ipu_infeed_queue.IPUInfeedQueue(
            ds, feed_name="infeed", replication_factor=REPLICAS)
        outfeed_queue = ipu_outfeed_queue.IPUOutfeedQueue(
            feed_name="outfeed", replication_factor=REPLICAS)

        # The device side
        def body(x1, x2):
            d1 = rand_ops.dropout(x1)
            d2 = rand_ops.dropout(x2)
            outfeed = outfeed_queue.enqueue({'d1': d1, 'd2': d2})
            return outfeed

        def my_net():
            r = loops.repeat(REPEATS, body, [], infeed_queue)
            return r

        with scopes.ipu_scope('/device:IPU:0'):
            res = ipu_compiler.compile(my_net, inputs=[])

        # The outfeed dequeue has to happen after the outfeed enqueue
        dequeue_outfeed = outfeed_queue.dequeue()

        # Configure the hardware
        config = utils.create_ipu_config(profiling=True)
        config = utils.auto_select_ipus(config, REPLICAS)
        config = utils.set_floating_point_behaviour_options(config)
        utils.configure_ipu_system(config)

        with session.Session() as sess:
            res_all = set()
            total = 0

            sess.run(infeed_queue.initializer)

            for _ in range(EXECS):
                sess.run(res)
                outfed_result = sess.run(dequeue_outfeed)
                for r in np.array(list(outfed_result.values())).reshape(
                    [-1, SIZE]):
                    total += 1
                    res_all.add(r.tostring())

            # 2 dropouts per replica * REPLICAS * REPEATS * EXECS
            expected = 2 * REPLICAS * REPEATS * EXECS
            self.assertEqual(total, expected)
            self.assertEqual(len(res_all), expected)
Beispiel #30
0
def generic_train_graph(opts, is_training):
    data_type = 'float32'
    train_graph = tf.Graph()
    with train_graph.as_default():
        placeholders = {}
        placeholders["learning_rate"] = tf.compat.v1.placeholder(data_type, shape=[])
        uid_embedding, mid_embedding, cat_embedding = id_embedding(opts, is_training, seed)

        if opts['use_synthetic_data']:
            dataset_train = get_synthetic_dataset(opts)
        else:
            dataset_train = get_dataset_embed(opts, is_training=True)

        infeed_train = ipu_infeed_queue.IPUInfeedQueue(dataset_train, feed_name = 'DIN_dataset_infeed_train', replication_factor = (opts['replicas']))

        with ipu_scope('/device:IPU:0'):
            def comp_fn():
                def body(total_loss, total_aux_loss, total_accuracy, uids, mids, cats, mid_his, cat_his, mid_mask, target, seqlen):
                    prob, loss, aux_loss, accuracy, grad_op = graph_builder(opts, uid_embedding, mid_embedding, cat_embedding, placeholders['learning_rate'], uids, mids, cats, mid_his, cat_his, mid_mask, target, seqlen, use_negsampling=False)

                    with tf.control_dependencies([grad_op]):
                        return total_loss + loss, total_aux_loss + aux_loss, total_accuracy + accuracy

                return loops.repeat(opts['batches_per_step'], body, [tf.constant(0, getattr(np, 'float32'))] * 3, infeed_train)

            outputs_train = ipu_compiler.compile(comp_fn, [])
            avg_loss, avg_aux_loss, avg_accuracy = [x / opts['batches_per_step'] for x in outputs_train]
            outfeed = None

        saver = tf.compat.v1.train.Saver()
        utils.move_variable_initialization_to_cpu()
        init = tf.compat.v1.global_variables_initializer()

    if opts['use_ipu_model']:
        os.environ["TF_POPLAR_FLAGS"] = "--use_ipu_model"
    ipu_options = utils.create_ipu_config()
    ipu_options = utils.set_optimization_options(ipu_options,
                                                 combine_embedding_lookups=True)
    ipu_options = utils.set_recomputation_options(ipu_options, allow_recompute=True)
    ipu_options = utils.auto_select_ipus(ipu_options, [opts['replicas']])
    utils.configure_ipu_system(ipu_options)
    if seed is not None:
        utils.reset_ipu_seed(seed)

    ops_train = [avg_loss, avg_aux_loss, avg_accuracy]
    sess = tf.compat.v1.Session(graph=train_graph)

    return GraphOps(sess,
                    init,
                    ops_train,
                    placeholders,
                    infeed_train,
                    outfeed,
                    saver), uid_embedding, mid_embedding, cat_embedding