Example #1
0
def _gradient_accumulation_loop(test_wrapper,
                                fwd_fn,
                                inputs_fn,
                                input_values,
                                repeat_count,
                                num_batches_to_accumulate,
                                dataset_fn,
                                optimizer,
                                num_iterations=None):
  g = ops.Graph()

  if num_iterations is None:
    num_iterations = repeat_count * num_batches_to_accumulate

  with g.as_default(), test_wrapper.test_session(graph=g) as session:
    dataset = dataset_fn()
    inputs = inputs_fn()
    infeed_queue = ipu_infeed_queue.IPUInfeedQueue(dataset, next_feed_id())
    outfeed_queue = ipu_outfeed_queue.IPUOutfeedQueue(next_feed_id())

    with variable_scope.variable_scope("ipu", use_resource=True, reuse=False):

      def model(*args):
        loss = fwd_fn(*functional_ops._convert_to_list(args))  # pylint: disable=W0212
        enqueue_op = outfeed_queue.enqueue(loss)
        opt = gradient_accumulation_optimizer.GradientAccumulationOptimizerV2(
            optimizer, num_batches_to_accumulate)
        outs = list(args[:len(args) - infeed_queue.number_of_tuple_elements])
        outs.append(enqueue_op)
        outs.append(opt.minimize(loss))
        return outs

      def my_net(*args):
        return loops.repeat(num_iterations,
                            model,
                            inputs=args,
                            infeed_queue=infeed_queue)

    with ops.device("/device:IPU:0"):
      loop_ret = ipu_compiler.compile(my_net, inputs=inputs)

    outfeed_op = outfeed_queue.dequeue()

    profiling = utils.running_on_ipu_model()

    cfg = utils.create_ipu_config(profiling=profiling,
                                  profile_execution=profiling)
    cfg = utils.set_ipu_model_options(cfg,
                                      compile_ipu_code=True,
                                      tiles_per_ipu=128)
    cfg = utils.auto_select_ipus(cfg, 1)
    utils.configure_ipu_system(cfg)
    utils.move_variable_initialization_to_cpu()

    session.run(variables.global_variables_initializer())
    session.run(infeed_queue.initializer)
    session.run(loop_ret, feed_dict=dict(zip(inputs, input_values)))
    return session.run(outfeed_op)
Example #2
0
def _make_config(iterations_per_loop=1):
  num_ipus_in_pipeline = 2

  ipu_options = ipu_utils.create_ipu_config()
  ipu_options = ipu_utils.set_ipu_model_options(ipu_options,
                                                compile_ipu_code=True,
                                                tiles_per_ipu=128)
  ipu_options = ipu_utils.auto_select_ipus(ipu_options,
                                           num_ipus=num_ipus_in_pipeline)
  return ipu_run_config.RunConfig(ipu_run_config=ipu_run_config.IPURunConfig(
      num_shards=num_ipus_in_pipeline,
      iterations_per_loop=iterations_per_loop,
      ipu_options=ipu_options))
Example #3
0
from tensorflow.python import ipu
from tensorflow.python.ipu.scopes import ipu_scope, ipu_shard
from tensorflow.compiler.plugin.poplar.ops import gen_ipu_ops
import tensorflow._api.v1.compat.v1 as tf
from tensorflow.python.ipu.utils import (create_ipu_config,
                                         set_ipu_model_options,
                                         auto_select_ipus,
                                         configure_ipu_system)

NUM_IPUS = 4

cfg = create_ipu_config(
    profiling=True,
    use_poplar_text_report=True,
)
cfg = set_ipu_model_options(cfg, compile_ipu_code=True)
cfg = auto_select_ipus(cfg, NUM_IPUS)
configure_ipu_system(cfg)

with tf.device("cpu"):
    pa = tf.placeholder(np.float32, [2], name="a")
    pb = tf.placeholder(np.float32, [2], name="d")
    pc = tf.placeholder(np.float32, [2], name="c")

with tf.device("cpu"):
    report = gen_ipu_ops.ipu_event_trace()


def shard_graph(pa, pb, pc):
    with ipu_shard(0):
        o1 = pa + pb
Example #4
0
import numpy as np

# IPU imports
from tensorflow.compiler.plugin.poplar.ops import gen_ipu_ops
from tensorflow.python.ipu import utils
from tensorflow.python.ipu.scopes import ipu_scope
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

# Configure argument for targeting the IPU
cfg = utils.create_ipu_config(profiling=True, use_poplar_text_report=True)
cfg = utils.set_ipu_model_options(cfg, compile_ipu_code=False)
cfg = utils.auto_select_ipus(cfg, 1)
utils.configure_ipu_system(cfg)

with tf.device("cpu"):
    pa = tf.placeholder(np.float32, [2], name="a")
    pb = tf.placeholder(np.float32, [2], name="b")
    pc = tf.placeholder(np.float32, [2], name="c")

    # Create a trace event
    report = gen_ipu_ops.ipu_event_trace()


def basic_graph(pa, pb, pc):
    # Do basic addition with tensors
    o1 = pa + pb
    o2 = pa + pc
    simple_graph_output = o1 + o2
    return simple_graph_output
Example #5
0
    def pipeline_on_ipu(stages,
                        inputs_fn,
                        input_values,
                        repeat_count,
                        gradient_accumulation_count,
                        dataset_fn,
                        optimizer,
                        test_wrapper,
                        expected_max_tile_memory,
                        recomp,
                        schedule,
                        device_mapping=None,
                        batch_serialization_iterations=1):

        g = ops.Graph()
        with g.as_default(), test_wrapper.test_session(graph=g) as session:
            dataset = dataset_fn()
            inputs = inputs_fn()
            infeed_queue = ipu_infeed_queue.IPUInfeedQueue(
                dataset, next_feed_id())
            outfeed_queue = ipu_outfeed_queue.IPUOutfeedQueue(next_feed_id())

            with variable_scope.variable_scope("ipu",
                                               use_resource=True,
                                               reuse=False):

                def optimizer_function(loss):
                    return pipelining_ops.OptimizerFunctionOutput(
                        optimizer, loss)

                def my_net(*args):
                    return pipelining_ops.pipeline(
                        stages,
                        gradient_accumulation_count,
                        repeat_count=repeat_count,
                        batch_serialization_iterations=
                        batch_serialization_iterations,
                        inputs=args,
                        optimizer_function=optimizer_function,
                        infeed_queue=infeed_queue,
                        outfeed_queue=outfeed_queue,
                        pipeline_schedule=schedule,
                        device_mapping=device_mapping)

            with ops.device("/device:IPU:0"):
                compiled_model_pipeline = ipu_compiler.compile(my_net,
                                                               inputs=inputs)

            # Execution profiles of code with dynamic control flow are not supported
            # on real HW.
            profiling = utils.running_on_ipu_model()
            cfg = utils.create_ipu_config(profiling=profiling,
                                          profile_execution=profiling)
            cfg = utils.set_ipu_model_options(cfg,
                                              compile_ipu_code=True,
                                              tiles_per_ipu=128)
            num_ipus = get_num_ipus(device_mapping) if device_mapping else 4
            cfg = utils.auto_select_ipus(cfg, num_ipus)
            if recomp:
                cfg = utils.set_recomputation_options(cfg,
                                                      allow_recompute=True)
            utils.configure_ipu_system(cfg)
            utils.move_variable_initialization_to_cpu()

            outfeed_op = outfeed_queue.dequeue()
            report = tu.ReportJSON(test_wrapper,
                                   session,
                                   configure_device=False)

            session.run(variables.global_variables_initializer())
            session.run(infeed_queue.initializer)
            report.reset()
            session.run(compiled_model_pipeline,
                        feed_dict=dict(zip(inputs, input_values)))
            out = session.run(outfeed_op)[0]
            if profiling:
                report.parse_log()
                if not device_mapping:
                    device_mapping = [
                        i - (i % 4) + ((i % 4) if (i % 4) < 2 else 5 - (i % 4))
                        for i in range(len(stages))
                    ]
                report.assert_pipeline_stages_on_expected_ipu(device_mapping)
                report.assert_max_tile_memory(expected_max_tile_memory,
                                              tolerance=0.3)
            return out
Example #6
0
    def _sharded_on_ipu(stages, inputs_fn, input_values, repeat_count,
                        num_batches_to_accumulate, dataset_fn, optimizer,
                        test_wrapper, recomp, device_mapping):

        g = ops.Graph()
        with g.as_default(), test_wrapper.test_session(graph=g) as session:
            dataset = dataset_fn()
            inputs = inputs_fn()
            infeed_queue = ipu_infeed_queue.IPUInfeedQueue(
                dataset, next_feed_id())
            outfeed_queue = ipu_outfeed_queue.IPUOutfeedQueue(next_feed_id())

            with variable_scope.variable_scope("ipu_sharded",
                                               use_resource=True,
                                               reuse=False):
                if device_mapping is None:
                    device_mapping = range(len(stages))

                def pipeline(*args):
                    outputs = args
                    for i, stage in zip(device_mapping, stages):
                        with scopes.ipu_shard(i):
                            outputs = stage(
                                *functional_ops._convert_to_list(outputs))  # pylint: disable=W0212
                    loss = outputs
                    enqueue_op = outfeed_queue.enqueue(loss)
                    opt = gradient_accumulation_optimizer.GradientAccumulationOptimizer(
                        optimizer, num_batches_to_accumulate)
                    outs = list(args[:len(args) -
                                     infeed_queue.number_of_tuple_elements])
                    outs.append(enqueue_op)
                    outs.append(opt.minimize(loss))
                    return outs

                def my_net(*args):
                    return loops.repeat(num_batches_to_accumulate,
                                        pipeline,
                                        inputs=args,
                                        infeed_queue=infeed_queue)

            with ops.device("/device:IPU:0"):
                compiled_model_pipeline = ipu_compiler.compile(my_net,
                                                               inputs=inputs)

            outfeed_op = outfeed_queue.dequeue()

            # Execution profiles of code with dynamic control flow are not supported on real HW
            profiling = utils.running_on_ipu_model()

            cfg = utils.create_ipu_config(profiling=profiling,
                                          profile_execution=profiling)
            cfg = utils.set_ipu_model_options(cfg,
                                              compile_ipu_code=True,
                                              tiles_per_ipu=128)
            num_ipus = get_num_ipus(device_mapping) if device_mapping else 4
            cfg = utils.auto_select_ipus(cfg, num_ipus)
            if recomp:
                cfg = utils.set_recomputation_options(cfg,
                                                      allow_recompute=True)
            utils.configure_ipu_system(cfg)
            utils.move_variable_initialization_to_cpu()

            session.run(variables.global_variables_initializer())
            session.run(infeed_queue.initializer)
            for _ in range(repeat_count):
                session.run(compiled_model_pipeline,
                            feed_dict=dict(zip(inputs, input_values)))
            return session.run(outfeed_op)