Exemple #1
0
    def testTrainReplicated(self):
        if ipu_utils.running_on_ipu_model():
            self.skipTest(
                "Replicated top level graphs are not supported on the "
                "IPU_MODEL target")

        def my_model_fn(features, labels, mode):  # pylint: disable=unused-argument
            self.assertEqual(model_fn_lib.ModeKeys.TRAIN, mode)

            loss = ipu.ops.cross_replica_ops.cross_replica_sum(features,
                                                               name="loss")

            train_op = array_ops.identity(loss)

            return model_fn_lib.EstimatorSpec(mode=mode,
                                              loss=loss,
                                              train_op=train_op)

        def my_input_fn():
            dataset = tu.create_dual_increasing_dataset(10,
                                                        data_shape=[1],
                                                        label_shape=[1])
            dataset = dataset.batch(batch_size=1, drop_remainder=True)
            return dataset

        ipu_options = ipu_utils.create_ipu_config()
        ipu_options = ipu_utils.auto_select_ipus(ipu_options, 4)
        config = ipu_run_config.RunConfig(
            ipu_run_config=ipu_run_config.IPURunConfig(
                iterations_per_loop=2, num_replicas=4,
                ipu_options=ipu_options),
            log_step_count_steps=1,
            save_summary_steps=1)

        estimator = ipu_estimator.IPUEstimator(model_fn=my_model_fn,
                                               config=config)

        session_run_counter = _SessionRunCounter()

        num_steps = 6
        estimator.train(input_fn=my_input_fn,
                        steps=num_steps,
                        hooks=[session_run_counter])

        self.assertEqual(
            session_run_counter.num_session_runs,
            num_steps // config.ipu_run_config.iterations_per_loop)

        model_dir = estimator.model_dir
        events_file = glob.glob(model_dir + "/*tfevents*")
        assert len(events_file) == 1
        events_file = events_file[0]
        loss_output = list()
        for e in summary_iterator.summary_iterator(events_file):
            for v in e.summary.value:
                if "loss" in v.tag:
                    loss_output.append(v.simple_value)

        # loss is averaged across iterations per loop
        self.assertEqual(loss_output, [14.0, 16.0, 18.0])
Exemple #2
0
    def test_all_reduce(self):
        if ipu_utils.running_on_ipu_model():
            self.skipTest(
                "Replicated top level graphs are not supported on the "
                "IPU_MODEL target")

        strategy = ipu_strategy.IPUStrategy()

        def make_all_reduce_function(reduce_op):
            @def_function.function(experimental_compile=True)
            def all_reduce_function():
                replica_ctx = distribution_strategy_context.get_replica_context(
                )
                x = math_ops.cast(replication_ops.replication_index(),
                                  np.float32)
                return replica_ctx.all_reduce(reduce_op, x)

            return all_reduce_function

        report = tu.ReportJSON(self, eager_mode=True, replicated=True)
        report.reset()

        with strategy.scope():
            summed = strategy.experimental_run_v2(
                make_all_reduce_function(reduce_util.ReduceOp.SUM))
            self.assertEqual(1.0, summed.numpy())

            mean = strategy.experimental_run_v2(
                make_all_reduce_function(reduce_util.ReduceOp.MEAN))
            self.assertEqual(0.5, mean.numpy())
Exemple #3
0
    def testPipelineCompare3(self):
        if utils.running_on_ipu_model():
            self.skipTest(
                "Replicated top level graphs are not supported on the "
                "IPU_MODEL target")

        def dataset_fn():
            dataset = tu.create_single_increasing_dataset(10, shape=[4])
            dataset = dataset.batch(batch_size=2, drop_remainder=True)

            def my_dataset_parser(value):
                label = math_ops.reduce_mean(value, axis=[1])
                return math_ops.cast(value, np.int32), math_ops.cast(
                    label / 10, np.int32)

            return dataset.map(my_dataset_parser)

        gradient_accumulation_count = 20
        repeat_count = 2
        optimizer = gradient_descent.GradientDescentOptimizer(0.01)

        def stage1(idx, label):
            with variable_scope.variable_scope("stage1", use_resource=True):
                embedding = variable_scope.get_variable(
                    "c",
                    shape=[10, 1216],
                    dtype=np.float32,
                    initializer=init_ops.constant_initializer(10.01),
                    trainable=True)
                x = embedding_ops.embedding_lookup(embedding, idx)
                return x, label

        def stage2(x, label):
            with variable_scope.variable_scope("stage2", use_resource=True):
                return x, label

        def stage3(x, label):
            with variable_scope.variable_scope("stage3", use_resource=True):
                return x, label

        def stage4(x, label):
            with variable_scope.variable_scope("stage4", use_resource=True):
                logits = math_ops.reduce_sum(x, axis=[-1])
                loss = math_ops.reduce_mean(
                    nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                                labels=label))
                return loss

        pipelining_test_util.PipelineTester.compare_pipeline_to_cpu(
            [stage1, stage2, stage3, stage4],
            lambda: [], [],
            repeat_count,
            gradient_accumulation_count,
            dataset_fn,
            optimizer,
            self,
            13821,
            True,
            schedule=pipelining_ops.PipelineSchedule.Interleaved)
Exemple #4
0
    def testKerasLenet(self):
        """Check that the output of PoplarExecutableRunner produces the same output as the original Graph execution.
    """
        if utils.running_on_ipu_model():
            self.skipTest(
                "PoplarExecutableRunner only works with physical IPUs")

        with tempfile.TemporaryDirectory() as tmp:
            poplar_binaries_folder = os.path.join(tmp, "poplar")
            model_path = os.path.join(tmp, "model")
            weights_file = os.path.join(tmp, "weights.bin")
            output_path = os.path.join(tmp, "output")
            input_values = np.random.uniform(size=(1, 32, 32, 1))
            input_file = "%s/input.bin" % tmp

            with self.session() as sess:

                self.configureIPU(poplar_binaries_folder, False)
                with ops.device("/device:IPU:0"):
                    out, inp, model = instantiate_lenet()

                utils.move_variable_initialization_to_cpu()
                sess.run(global_variables_initializer())

                utils.export_inputs_to_file([inp], input_file,
                                            {inp: input_values})

                # Run the model once to generate the poplar binaries.
                reference_values = sess.run(out, {inp: input_values})

                # Export the model & weights.
                saved_model.save(model, model_path)

            metadata_file = self.getSingleFileWithExt(poplar_binaries_folder,
                                                      "json")
            executable_file = self.getSingleFileWithExt(
                poplar_binaries_folder, "ipu_bin")

            self.runPythonCommand(
                (("./tensorflow/compiler/plugin/poplar/tools/"
                  "tensorflow_weights_extractor.py -o %s -s %s -m %s") %
                 (weights_file, model_path, metadata_file)).split())

            self.runCommand((("./third_party/ipus/tools/PoplarExecutableRunner"
                              " --binaries %s,%s,%s "
                              "--output_folder=%s --strict") % (
                                  executable_file,
                                  weights_file,
                                  input_file,
                                  output_path,
                              )).split())

            output_file = self.getSingleFileWithExt(output_path, "data")
            with open(output_file, 'r') as f:
                runner_values = np.array(json.load(f))
                logging.info("Reference %s\nRunner: %s", reference_values,
                             runner_values)
                self.assertAllClose(reference_values, runner_values)
Exemple #5
0
def _gradient_accumulation_loop(test_wrapper,
                                fwd_fn,
                                inputs_fn,
                                input_values,
                                repeat_count,
                                num_batches_to_accumulate,
                                dataset_fn,
                                optimizer,
                                num_iterations=None):
  g = ops.Graph()

  if num_iterations is None:
    num_iterations = repeat_count * num_batches_to_accumulate

  with g.as_default(), test_wrapper.test_session(graph=g) as session:
    dataset = dataset_fn()
    inputs = inputs_fn()
    infeed_queue = ipu_infeed_queue.IPUInfeedQueue(dataset, next_feed_id())
    outfeed_queue = ipu_outfeed_queue.IPUOutfeedQueue(next_feed_id())

    with variable_scope.variable_scope("ipu", use_resource=True, reuse=False):

      def model(*args):
        loss = fwd_fn(*functional_ops._convert_to_list(args))  # pylint: disable=W0212
        enqueue_op = outfeed_queue.enqueue(loss)
        opt = gradient_accumulation_optimizer.GradientAccumulationOptimizerV2(
            optimizer, num_batches_to_accumulate)
        outs = list(args[:len(args) - infeed_queue.number_of_tuple_elements])
        outs.append(enqueue_op)
        outs.append(opt.minimize(loss))
        return outs

      def my_net(*args):
        return loops.repeat(num_iterations,
                            model,
                            inputs=args,
                            infeed_queue=infeed_queue)

    with ops.device("/device:IPU:0"):
      loop_ret = ipu_compiler.compile(my_net, inputs=inputs)

    outfeed_op = outfeed_queue.dequeue()

    profiling = utils.running_on_ipu_model()

    cfg = utils.create_ipu_config(profiling=profiling,
                                  profile_execution=profiling)
    cfg = utils.set_ipu_model_options(cfg,
                                      compile_ipu_code=True,
                                      tiles_per_ipu=128)
    cfg = utils.auto_select_ipus(cfg, 1)
    utils.configure_ipu_system(cfg)
    utils.move_variable_initialization_to_cpu()

    session.run(variables.global_variables_initializer())
    session.run(infeed_queue.initializer)
    session.run(loop_ret, feed_dict=dict(zip(inputs, input_values)))
    return session.run(outfeed_op)
Exemple #6
0
    def testTrainWithAutomaticSharding(self):
        if ipu_utils.running_on_ipu_model():
            self.skipTest(
                "Replicated top level graphs are not supported on the "
                "IPU_MODEL target")

        def my_model_fn(features, labels, mode):
            self.assertEqual(model_fn_lib.ModeKeys.TRAIN, mode)

            with variable_scope.variable_scope("vs", use_resource=True):
                predictions = layers.Dense(units=1)(features)

            loss = losses.mean_squared_error(labels=labels,
                                             predictions=predictions)
            sharded_optimizer_obj = sharded_optimizer.ShardedOptimizer(
                gradient_descent.GradientDescentOptimizer(0.1))
            train_op = sharded_optimizer_obj.minimize(loss)

            return model_fn_lib.EstimatorSpec(mode=mode,
                                              loss=loss,
                                              train_op=train_op)

        def my_input_fn():
            dataset = dataset_ops.Dataset.from_tensor_slices(
                _create_regression_dataset(num_samples=1000, num_features=5))
            dataset = dataset.batch(batch_size=2, drop_remainder=True).repeat()
            return dataset

        ipu_options = ipu_utils.create_ipu_config()
        ipu_options = ipu_utils.auto_select_ipus(ipu_options, 4)

        config = ipu_run_config.RunConfig(
            ipu_run_config=ipu_run_config.IPURunConfig(
                iterations_per_loop=2,
                num_shards=4,
                autosharding=True,
                ipu_options=ipu_options),
            log_step_count_steps=1,
            save_summary_steps=1)

        estimator = ipu_estimator.IPUEstimator(model_fn=my_model_fn,
                                               config=config)

        estimator.train(input_fn=my_input_fn, steps=10)

        model_dir = estimator.model_dir
        events_file = glob.glob(model_dir + "/*tfevents*")
        assert len(events_file) == 1
        events_file = events_file[0]
        loss_output = list()
        for e in summary_iterator.summary_iterator(events_file):
            for v in e.summary.value:
                if "loss" in v.tag:
                    loss_output.append(v.simple_value)

        self.assertTrue(loss_output[0] > loss_output[-1])
Exemple #7
0
    def testReplicatedEvaluationOnHost(self):
        if ipu_utils.running_on_ipu_model():
            self.skipTest(
                "Replicated top level graphs are not supported on the "
                "IPU_MODEL target")

        def my_input_fn():
            features = [0, 0, 0, 1, 0, 0, 0, 1]
            labels = [0, 1, 0, 1, 0, 1, 0, 1]
            return dataset_ops.Dataset.from_tensor_slices(
                (features, labels)).batch(2, drop_remainder=True)

        def my_metrics_fn(features, labels):
            labels64 = math_ops.cast(labels, np.int64)
            return {
                "accuracy": metrics_impl.accuracy(labels, features),
                "precision": metrics_impl.precision(labels, features),
                "recall": metrics_impl.recall(labels, features),
                "recall_at_1": metrics_impl.recall_at_k(labels64,
                                                        features,
                                                        k=1),
                "recall_at_2": metrics_impl.recall_at_k(labels64,
                                                        features,
                                                        k=2),
                "mse": metrics_impl.mean_squared_error(labels, features),
                "rmse": metrics_impl.root_mean_squared_error(labels, features),
            }

        def my_model_fn(features, labels, mode):
            loss = math_ops.cast(replication_ops.replication_index(),
                                 np.float32)
            eval_metrics = (my_metrics_fn, [features, labels])
            return ipu_estimator.IPUEstimatorSpec(mode,
                                                  loss=loss,
                                                  eval_metrics=eval_metrics)

        ipu_options = ipu_utils.create_ipu_config()
        ipu_options = ipu_utils.auto_select_ipus(ipu_options, num_ipus=4)
        config = ipu_run_config.RunConfig(
            ipu_run_config=ipu_run_config.IPURunConfig(
                iterations_per_loop=1, num_replicas=4,
                ipu_options=ipu_options))

        estimator = ipu_estimator.IPUEstimator(model_fn=my_model_fn,
                                               config=config)
        scores = estimator.evaluate(my_input_fn, steps=1)
        self.assertEqual(0.75, scores["accuracy"])
        self.assertEqual(1.0, scores["precision"])
        self.assertEqual(0.5, scores["recall"])
        self.assertEqual(0.5, scores["recall_at_1"])
        self.assertEqual(1.0, scores["recall_at_2"])
        self.assertEqual(0.25, scores["mse"])
        self.assertEqual(0.5, scores["rmse"])
        self.assertEqual(1.5, scores[model_fn_lib.LOSS_METRIC_KEY])
Exemple #8
0
    def testReplicatedPrediction(self):
        if ipu_utils.running_on_ipu_model():
            self.skipTest(
                "Replicated top level graphs are not supported on the "
                "IPU_MODEL target")

        def my_input_fn():
            features = [
                [1.0],  # IPU0
                [3.0],  # IPU0
                [5.0],  # IPU1
                [3.0],  # IPU1
                [7.0],  # IPU2
                [3.0],  # IPU2
                [9.0],  # IPU3
                [3.0],  # IPU3
            ]
            return dataset_ops.Dataset.from_tensor_slices(features).batch(
                batch_size=2, drop_remainder=True)

        hook = ipu_session_run_hooks.IPULoggingTensorHook(every_n_iter=1,
                                                          replication_factor=4)

        def my_model_fn(features, mode):
            logging_op = hook.log({"features": features})
            with ops.control_dependencies([logging_op]):
                predictions = math_ops.reduce_max(features)

            return model_fn_lib.EstimatorSpec(
                mode,
                predictions=predictions,
            )

        ipu_options = ipu_utils.create_ipu_config()
        ipu_options = ipu_utils.auto_select_ipus(ipu_options, num_ipus=4)
        config = ipu_run_config.RunConfig(
            ipu_run_config=ipu_run_config.IPURunConfig(
                iterations_per_loop=1, num_replicas=4,
                ipu_options=ipu_options))
        estimator = ipu_estimator.IPUEstimator(model_fn=my_model_fn,
                                               config=config)

        outputs = estimator.predict(input_fn=my_input_fn,
                                    yield_single_examples=True)
        self.assertEqual(3.0, next(outputs))
        self.assertEqual(5.0, next(outputs))

        outputs = estimator.predict(input_fn=my_input_fn,
                                    yield_single_examples=False,
                                    hooks=[hook])
        np.testing.assert_array_equal([3.0, 5.0, 7.0, 9.0], next(outputs))
Exemple #9
0
    def testPipelineCompare7(self):
        if utils.running_on_ipu_model():
            self.skipTest(
                "Replicated top level graphs are not supported on the "
                "IPU_MODEL target")
        # Stage 1 and 2 don't have a backward stage.
        def dataset_fn():
            dataset = tu.create_single_increasing_dataset(7, shape=[4, 4, 2])
            dataset = dataset.batch(batch_size=2, drop_remainder=True)

            def dataset_parser(value):
                img = value / 7
                label = value[0][0][0][0]
                return img, label

            return dataset.map(dataset_parser)

        gradient_accumulation_count = 16
        repeat_count = 2
        optimizer = gradient_descent.GradientDescentOptimizer(0.01)

        def stage1(c, img, label):
            with variable_scope.variable_scope("stage1", use_resource=True):
                return img, c, label

        def stage2(x, c, label):
            with variable_scope.variable_scope("stage2", use_resource=True):
                with ops.control_dependencies([internal_ops.print_tensor(x)]):
                    return x * 20, c, label

        def stage3(x, c, label):
            with variable_scope.variable_scope("stage3", use_resource=True):
                return layers.Dense(
                    2,
                    kernel_initializer=init_ops.constant_initializer(0.5),
                    bias_initializer=init_ops.constant_initializer(0.5))(
                        x), c, label

        def stage4(x, c, label):
            with variable_scope.variable_scope("stage4", use_resource=True):
                return math_ops.reduce_sum(x) + c + label

        def inputs_fn():
            with ops.device('cpu'):
                return [array_ops.placeholder(np.float32, shape=[])]

        pipelining_test_util.PipelineTester.compare_pipeline_to_cpu(
            [stage1, stage2, stage3, stage4], inputs_fn, [10.01], repeat_count,
            gradient_accumulation_count, dataset_fn, optimizer, self, 14502,
            True, pipelining_ops.PipelineSchedule.Grouped)
Exemple #10
0
    def testReplicatedEvaluation(self):
        if ipu_utils.running_on_ipu_model():
            self.skipTest(
                "Replicated top level graphs are not supported on the "
                "IPU_MODEL target")

        def my_input_fn():
            # IPU0 mean: 2, max: 3
            # IPU1 mean: 4, max: 5
            features = [
                [1.0],  # IPU0
                [3.0],  # IPU0
                [5.0],  # IPU1
                [3.0],  # IPU1
                [1.0],  # IPU2
                [3.0],  # IPU2
                [5.0],  # IPU3
                [3.0],  # IPU3
            ]
            return dataset_ops.Dataset.from_tensor_slices(features).batch(
                batch_size=2, drop_remainder=True)

        def my_model_fn(features, mode):
            loss = math_ops.reduce_max(features)
            eval_metric_ops = {
                "feature_mean": metrics_impl.mean(features),
            }
            return model_fn_lib.EstimatorSpec(mode,
                                              loss=loss,
                                              eval_metric_ops=eval_metric_ops)

        ipu_options = ipu_utils.create_ipu_config()
        ipu_options = ipu_utils.auto_select_ipus(ipu_options, num_ipus=4)
        config = ipu_run_config.RunConfig(
            ipu_run_config=ipu_run_config.IPURunConfig(
                iterations_per_loop=1, num_replicas=4,
                ipu_options=ipu_options))

        estimator = ipu_estimator.IPUEstimator(model_fn=my_model_fn,
                                               config=config)
        scores = estimator.evaluate(my_input_fn, steps=1)
        self.assertEqual(3., scores["feature_mean"])
        self.assertEqual(4., scores[model_fn_lib.LOSS_METRIC_KEY])
Exemple #11
0
    def test_optimizer(self):
        if ipu_utils.running_on_ipu_model():
            self.skipTest(
                "Replicated top level graphs are not supported on the "
                "IPU_MODEL target")

        strategy = ipu_strategy.IPUStrategy()

        report = tu.ReportJSON(self, eager_mode=True, replicated=True)
        report.reset()

        with strategy.scope():
            initial_variable = 2.0
            variable = variables.Variable(initial_variable)
            learning_rate = 0.5
            num_iterations = 3

            data = [1.0, 2.0]
            dataset = dataset_ops.Dataset.from_tensor_slices((data))
            dataset = dataset.repeat(num_iterations)
            infeed = ipu_infeed_queue.IPUInfeedQueue(dataset,
                                                     feed_name="feed",
                                                     replication_factor=2)

            optimizer = keras.optimizer_v2.gradient_descent.SGD(learning_rate)

            @def_function.function(experimental_compile=True)
            def apply_gradient():
                gradient = infeed._dequeue()  # pylint: disable=protected-access
                optimizer.apply_gradients([(gradient, variable)])

            # The optimizers in v2 will sum the gradients, and not average them.
            expected_gradient = np.sum(data)
            expected_variable = initial_variable

            infeed.initializer  # pylint: disable=pointless-statement

            for _ in range(num_iterations):
                strategy.experimental_run_v2(apply_gradient)
                expected_variable -= learning_rate * expected_gradient
                self.assertEqual(expected_variable, variable.numpy())
Exemple #12
0
    def testPipelineCompare1(self):
        if utils.running_on_ipu_model():
            self.skipTest(
                "Replicated top level graphs are not supported on the "
                "IPU_MODEL target")

        def dataset_fn():
            dataset = tu.create_single_increasing_dataset(7, shape=[4, 4, 2])
            dataset = dataset.batch(batch_size=2, drop_remainder=True)

            def my_dataset_parser(value):
                img = value / 7
                label = value[0][0][0][0]
                return img, label

            return dataset.map(my_dataset_parser)

        gradient_accumulation_count = 20
        repeat_count = 2
        optimizer = gradient_descent.GradientDescentOptimizer(0.01)

        def stage1(c, img, label):
            with variable_scope.variable_scope("stage1", use_resource=True):
                y = layers.Conv2D(
                    2,
                    1,
                    use_bias=True,
                    kernel_initializer=init_ops.constant_initializer(0.5),
                    bias_initializer=init_ops.constant_initializer(0.5),
                    name='conv1')(img)
                return y, c, label

        def stage2(x, c, label):
            with variable_scope.variable_scope("stage2", use_resource=True):
                return x * 20, c, label

        def stage3(x, c, label):
            with variable_scope.variable_scope("stage3", use_resource=True):
                return layers.Dense(
                    2,
                    kernel_initializer=init_ops.constant_initializer(0.5),
                    bias_initializer=init_ops.constant_initializer(0.5))(
                        x), c, label

        def stage4(x, c, label):
            with variable_scope.variable_scope("stage4", use_resource=True):
                return math_ops.reduce_sum(
                    layers.Dense(
                        2,
                        kernel_initializer=init_ops.constant_initializer(0.5),
                        bias_initializer=init_ops.constant_initializer(0.5))
                    (x)) + c + label

        def inputs_fn():
            with ops.device('cpu'):
                return [array_ops.placeholder(np.float32, shape=[])]

        pipelining_test_util.PipelineTester.compare_pipeline_to_cpu(
            [stage1, stage2, stage3, stage4],
            inputs_fn, [10.01],
            repeat_count,
            gradient_accumulation_count,
            dataset_fn,
            optimizer,
            self,
            14374,
            True,
            schedule=pipelining_ops.PipelineSchedule.Interleaved)
Exemple #13
0
    def pipeline_on_ipu(stages,
                        inputs_fn,
                        input_values,
                        repeat_count,
                        gradient_accumulation_count,
                        dataset_fn,
                        optimizer,
                        test_wrapper,
                        expected_max_tile_memory,
                        recomp,
                        schedule,
                        device_mapping=None,
                        batch_serialization_iterations=1):

        g = ops.Graph()
        with g.as_default(), test_wrapper.test_session(graph=g) as session:
            dataset = dataset_fn()
            inputs = inputs_fn()
            infeed_queue = ipu_infeed_queue.IPUInfeedQueue(
                dataset, next_feed_id())
            outfeed_queue = ipu_outfeed_queue.IPUOutfeedQueue(next_feed_id())

            with variable_scope.variable_scope("ipu",
                                               use_resource=True,
                                               reuse=False):

                def optimizer_function(loss):
                    return pipelining_ops.OptimizerFunctionOutput(
                        optimizer, loss)

                def my_net(*args):
                    return pipelining_ops.pipeline(
                        stages,
                        gradient_accumulation_count,
                        repeat_count=repeat_count,
                        batch_serialization_iterations=
                        batch_serialization_iterations,
                        inputs=args,
                        optimizer_function=optimizer_function,
                        infeed_queue=infeed_queue,
                        outfeed_queue=outfeed_queue,
                        pipeline_schedule=schedule,
                        device_mapping=device_mapping)

            with ops.device("/device:IPU:0"):
                compiled_model_pipeline = ipu_compiler.compile(my_net,
                                                               inputs=inputs)

            # Execution profiles of code with dynamic control flow are not supported
            # on real HW.
            profiling = utils.running_on_ipu_model()
            cfg = utils.create_ipu_config(profiling=profiling,
                                          profile_execution=profiling)
            cfg = utils.set_ipu_model_options(cfg,
                                              compile_ipu_code=True,
                                              tiles_per_ipu=128)
            num_ipus = get_num_ipus(device_mapping) if device_mapping else 4
            cfg = utils.auto_select_ipus(cfg, num_ipus)
            if recomp:
                cfg = utils.set_recomputation_options(cfg,
                                                      allow_recompute=True)
            utils.configure_ipu_system(cfg)
            utils.move_variable_initialization_to_cpu()

            outfeed_op = outfeed_queue.dequeue()
            report = tu.ReportJSON(test_wrapper,
                                   session,
                                   configure_device=False)

            session.run(variables.global_variables_initializer())
            session.run(infeed_queue.initializer)
            report.reset()
            session.run(compiled_model_pipeline,
                        feed_dict=dict(zip(inputs, input_values)))
            out = session.run(outfeed_op)[0]
            if profiling:
                report.parse_log()
                if not device_mapping:
                    device_mapping = [
                        i - (i % 4) + ((i % 4) if (i % 4) < 2 else 5 - (i % 4))
                        for i in range(len(stages))
                    ]
                report.assert_pipeline_stages_on_expected_ipu(device_mapping)
                report.assert_max_tile_memory(expected_max_tile_memory,
                                              tolerance=0.3)
            return out
Exemple #14
0
    def _sharded_on_ipu(stages, inputs_fn, input_values, repeat_count,
                        num_batches_to_accumulate, dataset_fn, optimizer,
                        test_wrapper, recomp, device_mapping):

        g = ops.Graph()
        with g.as_default(), test_wrapper.test_session(graph=g) as session:
            dataset = dataset_fn()
            inputs = inputs_fn()
            infeed_queue = ipu_infeed_queue.IPUInfeedQueue(
                dataset, next_feed_id())
            outfeed_queue = ipu_outfeed_queue.IPUOutfeedQueue(next_feed_id())

            with variable_scope.variable_scope("ipu_sharded",
                                               use_resource=True,
                                               reuse=False):
                if device_mapping is None:
                    device_mapping = range(len(stages))

                def pipeline(*args):
                    outputs = args
                    for i, stage in zip(device_mapping, stages):
                        with scopes.ipu_shard(i):
                            outputs = stage(
                                *functional_ops._convert_to_list(outputs))  # pylint: disable=W0212
                    loss = outputs
                    enqueue_op = outfeed_queue.enqueue(loss)
                    opt = gradient_accumulation_optimizer.GradientAccumulationOptimizer(
                        optimizer, num_batches_to_accumulate)
                    outs = list(args[:len(args) -
                                     infeed_queue.number_of_tuple_elements])
                    outs.append(enqueue_op)
                    outs.append(opt.minimize(loss))
                    return outs

                def my_net(*args):
                    return loops.repeat(num_batches_to_accumulate,
                                        pipeline,
                                        inputs=args,
                                        infeed_queue=infeed_queue)

            with ops.device("/device:IPU:0"):
                compiled_model_pipeline = ipu_compiler.compile(my_net,
                                                               inputs=inputs)

            outfeed_op = outfeed_queue.dequeue()

            # Execution profiles of code with dynamic control flow are not supported on real HW
            profiling = utils.running_on_ipu_model()

            cfg = utils.create_ipu_config(profiling=profiling,
                                          profile_execution=profiling)
            cfg = utils.set_ipu_model_options(cfg,
                                              compile_ipu_code=True,
                                              tiles_per_ipu=128)
            num_ipus = get_num_ipus(device_mapping) if device_mapping else 4
            cfg = utils.auto_select_ipus(cfg, num_ipus)
            if recomp:
                cfg = utils.set_recomputation_options(cfg,
                                                      allow_recompute=True)
            utils.configure_ipu_system(cfg)
            utils.move_variable_initialization_to_cpu()

            session.run(variables.global_variables_initializer())
            session.run(infeed_queue.initializer)
            for _ in range(repeat_count):
                session.run(compiled_model_pipeline,
                            feed_dict=dict(zip(inputs, input_values)))
            return session.run(outfeed_op)