Example #1
0
def test_manual_poptorch_opts_inference_grad_accum(tmpdir):
    """
    Ensure if the user passes manual poptorch Options
    and grad accumulation is set greater than 1 for inference, we warn and set to 1.
    """

    model = IPUModel()
    inference_opts = poptorch.Options()
    inference_opts.Training.gradientAccumulation(4)

    training_opts = poptorch.Options()
    training_opts.Training.gradientAccumulation(1)

    trainer = Trainer(default_root_dir=tmpdir,
                      ipus=1,
                      fast_dev_run=True,
                      plugins=IPUPlugin(inference_opts=inference_opts,
                                        training_opts=training_opts))
    with pytest.warns(
            UserWarning,
            match=
            "Inference poptorch.Options should set gradientAccumulation to 1. "
            "Setting gradientAccumulation to 1 for inference options.",
    ):
        trainer.fit(model)
        assert isinstance(trainer.accelerator.training_type_plugin, IPUPlugin)
        assert trainer.accelerator.training_type_plugin.inference_opts.Training.gradient_accumulation == 1
Example #2
0
def test_manual_poptorch_opts_train_grad_accum(tmpdir):
    """
    Ensure if the user passes manual poptorch Options
    and grad accumulation differs to accumulate_grad_batches, we
    """

    model = IPUModel()
    inference_opts = poptorch.Options()
    inference_opts.Training.gradientAccumulation(1)

    training_opts = poptorch.Options()
    training_opts.Training.gradientAccumulation(2)

    trainer = Trainer(default_root_dir=tmpdir,
                      ipus=1,
                      fast_dev_run=True,
                      accumulate_grad_batches=1,
                      plugins=IPUPlugin(inference_opts=inference_opts,
                                        training_opts=training_opts))
    with pytest.warns(
            UserWarning,
            match=f"Training poptorch.Options set gradientAccumulation to {2}. "
            f"This is different to accumulate_grad_batches which was set to {1}. "
            f"To change gradientAccumulation, please set accumulate_grad_batches in the Trainer. "
            f"Setting poptorch.Options gradientAccumulation to {1}",
    ):
        trainer.fit(model)
        assert isinstance(trainer.accelerator.training_type_plugin, IPUPlugin)
        assert trainer.accelerator.training_type_plugin.inference_opts.Training.gradient_accumulation == 1
Example #3
0
def test_manual_poptorch_opts_ipu_count(tmpdir):
    """
    Ensure if the user passes manual poptorch Options
    and the number of ipus do not match, we warn and we set it for the user.
    """

    manual_ipus = 1
    expected_ipus = 2
    model = IPUModel()
    inference_opts = poptorch.Options()
    inference_opts.replicationFactor(manual_ipus)

    training_opts = poptorch.Options()
    training_opts.replicationFactor(manual_ipus)

    trainer = Trainer(default_root_dir=tmpdir,
                      ipus=expected_ipus,
                      fast_dev_run=True,
                      plugins=IPUPlugin(inference_opts=inference_opts,
                                        training_opts=training_opts))
    with pytest.warns(
            UserWarning,
            match=
            f"Manual poptorch.Options set replicationFactor to {manual_ipus} "
            f"which differs to the ipus={expected_ipus} flag passed to the Trainer. "
            f"Setting to {expected_ipus} in the poptorch.Options."):
        trainer.fit(model)
        assert isinstance(trainer.accelerator.training_type_plugin, IPUPlugin)
        assert trainer.accelerator.training_type_plugin.training_opts.replication_factor == 2
        assert trainer.accelerator.training_type_plugin.inference_opts.replication_factor == 2
Example #4
0
def test_manual_poptorch_opts(tmpdir):
    """Ensure if the user passes manual poptorch Options, we run with the correct object."""
    model = IPUModel()
    inference_opts = poptorch.Options()
    training_opts = poptorch.Options()

    trainer = Trainer(
        default_root_dir=tmpdir,
        accelerator="ipu",
        devices=2,
        fast_dev_run=True,
        strategy=IPUStrategy(inference_opts=inference_opts,
                             training_opts=training_opts),
    )
    trainer.fit(model)

    assert isinstance(trainer.strategy, IPUStrategy)
    assert trainer.strategy.training_opts == training_opts
    assert trainer.strategy.inference_opts == inference_opts

    dataloader = trainer.train_dataloader.loaders
    assert isinstance(dataloader, poptorch.DataLoader)
    assert dataloader.options == training_opts
    assert trainer.num_devices > 1  # testing this only makes sense in a distributed setting
    assert not isinstance(dataloader.sampler, DistributedSampler)
Example #5
0
def test_manual_poptorch_dataloader(tmpdir):
    model_options = poptorch.Options()

    class IPUTestModel(IPUModel):
        def train_dataloader(self):
            dataloader = super().train_dataloader()
            # save to instance to compare the reference later
            self.poptorch_dataloader = poptorch.DataLoader(model_options, dataloader.dataset, drop_last=True)
            return self.poptorch_dataloader

    model = IPUTestModel()
    other_options = poptorch.Options()
    trainer = Trainer(
        default_root_dir=tmpdir,
        fast_dev_run=True,
        accelerator="ipu",
        devices=2,
        strategy=IPUStrategy(training_opts=other_options),
    )
    trainer.fit(model)

    assert isinstance(trainer.strategy, IPUStrategy)
    assert trainer.strategy.training_opts is other_options
    dataloader = trainer.train_dataloader.loaders
    assert dataloader is model.poptorch_dataloader  # exact object, was not recreated
    # dataloader uses the options in the model, not the strategy
    assert dataloader.options is model_options
    assert dataloader.options is not other_options
    assert dataloader.drop_last  # was kept
Example #6
0
def test_manual_poptorch_opts_custom(tmpdir):
    """
    Ensure if the user passes manual poptorch Options with custom parameters set,
    we respect them in our poptorch options and the dataloaders.
    """

    model = IPUModel()
    training_opts = poptorch.Options()
    training_opts.deviceIterations(8)
    training_opts.replicationFactor(2)
    training_opts.Training.gradientAccumulation(2)

    inference_opts = poptorch.Options()
    inference_opts.deviceIterations(16)
    inference_opts.replicationFactor(1)
    inference_opts.Training.gradientAccumulation(1)

    class TestCallback(Callback):
        def on_fit_end(self, trainer: Trainer,
                       pl_module: LightningModule) -> None:
            # ensure dataloaders were correctly set up during training.
            plugin = trainer.accelerator.training_type_plugin
            assert isinstance(plugin, IPUPlugin)
            assert plugin.training_opts.replication_factor == 2
            assert plugin.inference_opts.replication_factor == 1

            val_dataloader = trainer.val_dataloaders[0]
            train_dataloader = trainer.train_dataloader
            assert isinstance(train_dataloader, CombinedLoader)
            train_dataloader = train_dataloader.loaders
            assert isinstance(val_dataloader, poptorch.DataLoader)
            assert isinstance(train_dataloader, poptorch.DataLoader)
            assert train_dataloader.options.replication_factor == 2
            assert val_dataloader.options.replication_factor == 1

    plugin = IPUPlugin(inference_opts=inference_opts,
                       training_opts=training_opts)
    # ensure we default to the training options replication factor
    assert plugin.replication_factor == 2
    trainer = Trainer(default_root_dir=tmpdir,
                      fast_dev_run=True,
                      plugins=plugin,
                      callbacks=TestCallback())
    trainer.fit(model)

    plugin = trainer.accelerator.training_type_plugin
    assert isinstance(plugin, IPUPlugin)

    training_opts = plugin.training_opts
    assert training_opts.device_iterations == 8
    assert training_opts.replication_factor == 2
    assert training_opts.Training.gradient_accumulation == 2

    inference_opts = plugin.inference_opts
    assert inference_opts.device_iterations == 16
    assert inference_opts.replication_factor == 1
    assert inference_opts.Training.gradient_accumulation == 1
Example #7
0
def test_convpadding_3ch_vs_4ch_forward(precision, bias):
    torch.manual_seed(0)
    conv_3ch = torch.nn.Conv2d(3, 32, (3, 3), bias=bias)
    if precision == "half":
        conv_3ch.half()
    conv_4ch = models.PaddedConv(conv_3ch)
    sample_input = torch.rand(4, 3, 32, 32)
    if precision == "half":
        sample_input.half()
        conv_4ch.half()
    ipu_conv_3ch = poptorch.inferenceModel(conv_3ch, poptorch.Options())
    ipu_conv_4ch = poptorch.inferenceModel(conv_4ch, poptorch.Options())
    result3 = ipu_conv_3ch(sample_input)
    result4 = ipu_conv_4ch(sample_input)
    assert torch.allclose(result3, result4, rtol=1e-03, atol=1e-04)
Example #8
0
 def common_opts():
     opts = poptorch.Options()
     opts.Training.accumulationAndReplicationReductionType(poptorch.ReductionType.Mean)
     opts.outputMode(poptorch.OutputMode.All)
     opts.randomSeed(0)
     opts.Training.gradientAccumulation(1)
     return opts
Example #9
0
def _run_process_test(shape=None,
                      num_tensors=100,
                      batch_size=1,
                      num_workers=0,
                      device_iterations=1,
                      replication_factor=1,
                      num_runs=1):
    shape = shape or [2, 3]

    opts = poptorch.Options()
    opts.deviceIterations(device_iterations)
    opts.replicationFactor(replication_factor)

    data = poptorch.DataLoader(opts,
                               IncrementDataset(shape, num_tensors),
                               batch_size=batch_size,
                               num_workers=num_workers)

    loader = poptorch.AsynchronousDataAccessor(data)
    assert len(loader) == num_tensors // (device_iterations * batch_size *
                                          replication_factor)

    model = poptorch.inferenceModel(DoubleData(), opts)

    for _ in range(0, num_runs):
        for it, d in enumerate(loader):
            out = model(d)

            expected = torch.stack([
                torch.full(shape, i * 2, dtype=torch.float32)
                for i in range(data.combinedBatchSize *
                               it, data.combinedBatchSize * (it + 1))
            ])

            assert torch.equal(expected, out)
Example #10
0
def test_set_popart_options():
    class Network(nn.Module):
        def forward(self, x, y):
            return x + y

    # Create our model.
    model = Network()
    opts = poptorch.Options()
    opts.Popart.set("hardwareInstrumentations", set([0, 1]))
    opts.Popart.set("dotChecks", [0, 1])
    opts.Popart.set("engineOptions", {
        "debug.allowOutOfMemory": "true",
        "exchange.streamBufferOverlap": "any"
    })
    opts.Popart.set("customCodelets", [])
    opts.Popart.set("autoRecomputation", 1)
    opts.Popart.set("cachePath", "/tmp")
    opts.Popart.set("enableOutlining", True)
    opts.Popart.set("batchSerializationSettings.factor", 1)
    opts.Popart.set("batchSerializationSettings.concatOnVirtualGraphChange",
                    True)
    opts.Popart.set("batchSerializationSettings.concatOnExecutionPhaseChange",
                    True)
    opts.Popart.set("batchSerializationSettings.concatOnPipelineStageChange",
                    True)
    opts.Popart.set("batchSerializationSettings.transformContext", 0)
    opts.Popart.set("batchSerializationSettings.method", 0)
    opts.Popart.set("batchSerializationSettings.batchSchedule", 1)
    inference_model = poptorch.inferenceModel(model, opts)
    x = torch.ones(2)
    y = torch.zeros(2)

    inference_model(x, y)
Example #11
0
def _run_test(shape=None,
              num_tensors=100,
              batch_size=1,
              num_workers=0,
              device_iterations=1,
              replication_factor=1):
    shape = shape or [2, 3]

    opts = poptorch.Options()
    opts.deviceIterations(device_iterations)
    opts.replicationFactor(replication_factor)

    data = poptorch.DataLoader(opts,
                               IncrementDataset(shape, num_tensors),
                               batch_size=batch_size,
                               num_workers=num_workers)

    assert len(data) == num_tensors // (device_iterations * batch_size *
                                        replication_factor)
    model = poptorch.inferenceModel(CheckOrderModel(), opts)
    for it, d in enumerate(data):
        expected = torch.from_numpy(
            numpy.stack([
                numpy.full(shape, i, dtype=numpy.float32)
                for i in range(data.combinedBatchSize *
                               it, data.combinedBatchSize * (it + 1))
            ]))
        diff = torch.sum(model(d, expected))

    numpy.testing.assert_array_equal(diff.numpy(), [0.])
Example #12
0
def test_manual_poptorch_opts(tmpdir):
    """Ensure if the user passes manual poptorch Options, we run with the correct object."""
    model = IPUModel()
    inference_opts = poptorch.Options()
    training_opts = poptorch.Options()

    trainer = Trainer(default_root_dir=tmpdir,
                      ipus=1,
                      fast_dev_run=True,
                      plugins=IPUPlugin(inference_opts=inference_opts,
                                        training_opts=training_opts))
    trainer.fit(model)

    assert isinstance(trainer.accelerator.training_type_plugin, IPUPlugin)
    assert trainer.accelerator.training_type_plugin.training_opts == training_opts
    assert trainer.accelerator.training_type_plugin.inference_opts == inference_opts
Example #13
0
def test_input_8bit(dataset, precision):
    """ Test 8-bit input vs usual input precision
    """
    def run_model(dl, eight_bit_io=False):
        class MyModel(torch.nn.Module):
            def forward(self, x):
                return x * 2.0
        cast_op = "half" if precision == "16.16" else "full"
        model = NormalizeInputModel(MyModel(), mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], output_cast=cast_op) if eight_bit_io else MyModel()
        poptorch_model = poptorch.inferenceModel(model, poptorch.Options())
        input_data = next(iter(dl))[0]
        return poptorch_model(input_data)

    class HelperClass:
        def __init__(self):
            pass
    args = HelperClass()
    opts = poptorch.Options()
    args.batch_size = 1
    args.dataloader_worker = 4
    args.data = dataset
    args.model = "resnet18"
    args.precision = precision
    args.eight_bit_io = False
    args.normalization_location = "host"
    dataloader = get_data(args, opts, train=False)
    result_normal = run_model(dataloader, eight_bit_io=False)
    args.eight_bit_io = True
    args.normalization_location = "ipu"
    dataloader8 = get_data(args, opts, train=False)
    result_8bit = run_model(dataloader8, eight_bit_io=True)
    if not dataset == "generated":
        assert torch.allclose(result_8bit, result_normal, atol=4e-03, rtol=1e-03)
    assert result_normal.type() == result_8bit.type()
Example #14
0
def rng_harness(rng_op, stat_funs):
    class Model(torch.nn.Module):
        def __init__(self):
            super(Model, self).__init__()
            self.rng_op = rng_op

        def forward(self):
            return self.rng_op()

    model = Model()

    # Run on CPU
    native_out = model()

    # Run on IPU
    opts = poptorch.Options().randomSeed(8)
    pop_model = poptorch.inferenceModel(model, opts)
    pop_out = pop_model()

    assert native_out.size() == pop_out.size()

    # PRNG depends on HW implementation so we just check
    # that the distribution statistics are consistent
    print("Checking summary statistics for generated random numbers:")
    for ss in stat_funs:
        print("  {} = {}".format(ss.__name__, ss(pop_out)))
        torch.testing.assert_allclose(ss(native_out),
                                      ss(pop_out),
                                      atol=1e-2,
                                      rtol=0.1)
Example #15
0
def test_uniform_half(t):
    class Model(torch.nn.Module):
        def forward(self, x):
            torch.manual_seed(42)
            x.uniform_(-1, 1)
            return x

    model = Model()
    input_data = torch.ones(3, 5, 1000, dtype=t)

    # Run on IPU and check that the result has the correct dtype
    opts = poptorch.Options().randomSeed(8)
    pop_model = poptorch.inferenceModel(model, opts)
    pop_out = pop_model(input_data)
    assert pop_out.dtype == t

    if t is not torch.half:
        # Run on CPU
        native_out = model(input_data)
        assert native_out.size() == pop_out.size()

    # Test summary stats - promoting half to float to workaround
    # torch limitations with half
    torch.testing.assert_allclose(-1.0,
                                  torch.min(pop_out.float()),
                                  atol=1e-2,
                                  rtol=0.1)

    torch.testing.assert_allclose(1.0,
                                  torch.max(pop_out.float()),
                                  atol=1e-2,
                                  rtol=0.1)
Example #16
0
def ipu_options(opt: argparse.ArgumentParser, cfg: yacs.config.CfgNode,
                model: Detector):
    """Configurate the IPU options using cfg and opt options.
    Parameters:
        opt: opt object containing options introduced in the command line
        cfg: yacs object containing the config
        model[Detector]: a torch Detector Model
    Returns:
        ipu_opts: Options for the IPU configuration
    """
    batches_per_step = cfg.ipuopts.batches_per_step
    half = cfg.model.half

    ipu_opts = poptorch.Options()
    ipu_opts.deviceIterations(batches_per_step)
    ipu_opts.autoRoundNumIPUs(True)

    if opt.benchmark:
        ipu_opts.Distributed.disable()

    if half:
        ipu_opts.Precision.setPartialsType(torch.float16)
        model.half()

    return ipu_opts
Example #17
0
    def test_webdata_cache(self):
        """Test cache for webdata
        """
        class HelperClass:
            def __init__(self):
                pass
        args = HelperClass()
        args.precision = "16.16"
        args.model = 'resnet50'
        args.device_iterations = 1
        args.replicas = 1
        args.batch_size = 31
        args.dataloader_worker = 8
        args.normalization_location = 'ipu'
        args.eight_bit_io = False
        args.webdataset_percentage_to_use = 100
        args.data = "imagenet"
        args.webdataset_memory_cache_ratio = 0.8
        args.imagenet_data_path = Path(__file__).parent.parent.absolute().joinpath("data").joinpath("cifar10_webdata")

        dataloader = get_data(args, poptorch.Options(), train=False, async_dataloader=True, return_remaining=True)
        total_samples = 0
        for data, label in dataloader:
            total_samples += label.size()[0]
        assert total_samples == 10000
Example #18
0
def test_pretrained_prediction(precision, model_name):
    ground_truth = [('zebra.jpg', 340), ('jay.jpg', 17), ('polar_bear.jpg', 296), ('banana.jpg', 954),
                    ('hippo.jpg', 344), ('ostrich.jpg', 9), ('ping-pong_ball.jpg', 722), ('pelican.jpg', 144)]

    class HelperClass:
        def __init__(self):
            pass
    args = HelperClass()
    args.model = model_name
    args.data = "imagenet"
    args.norm_type = "batch"
    args.norm_eps = 1e-5
    args.batchnorm_momentum = 0.1
    args.pipeline_splits = []
    args.normalization_location = "host"
    args.precision = precision
    args.efficientnet_expand_ratio = 6
    args.efficientnet_group_dim = 1
    args.num_io_tiles = 0
    model = models.get_model(args, datasets.datasets_info[args.data], pretrained=True)
    model.eval()
    opts = poptorch.Options()
    if precision == "16.16":
        opts.Precision.setPartialsType(torch.float16)
    else:
        opts.Precision.setPartialsType(torch.float32)

    poptorch_model = poptorch.inferenceModel(model, opts)

    input_size = models.model_input_shape(args)[1]
    augment = datasets.get_preprocessing_pipeline(train=False, half_precision=True if precision == "16.16" else False, input_size=input_size)
    for img_name, class_id in ground_truth:
        sample = augment(Image.open(os.path.join(Path(__file__).parent.parent.absolute(), "data/images/", img_name))).view(1, 3, input_size, input_size)
        pred = poptorch_model(sample)
        assert class_id == torch.argmax(pred), f"Prediction for {img_name} was incorrect."
Example #19
0
def test_trainingBatching():
    torch.manual_seed(4424242)

    # 10 Batches of 10.
    input = torch.randn(10, 10)

    # 10 batches of 1
    label = torch.randint(0, 10, [1])
    label = label.expand([10])
    model = torch.nn.Linear(10, 10)

    # Run on IPU batch size 1 * 10 popart batches.
    opts = poptorch.Options().deviceIterations(10)
    poptorch_model = helpers.trainingModelWithLoss(
        model, options=opts, loss=torch.nn.CrossEntropyLoss())

    # Run all 10 batches as batchsize 10.
    out = model(input)

    # Sanity check we weren't already matching the label.
    assert not torch.equal(torch.argmax(out, dim=1), label)

    for _ in range(0, 1000):
        _, loss = poptorch_model(input, label)

        # Each batch should NOT report its own loss. As by default training model should have a "Final" anchor.
        assert len(loss.size()) == 0

    # Run with trained weights.
    out = model(input)

    # Check we are now equal with labels.
    assert torch.equal(torch.argmax(out, dim=1), label)
Example #20
0
def _run_dataset_test(shape=None,
                      num_tensors=100,
                      batch_size=1,
                      num_workers=0,
                      device_iterations=1,
                      replication_factor=1,
                      host_id=0,
                      num_hosts=1):
    shape = shape or [2, 3]

    opts = poptorch.Options()
    opts.deviceIterations(device_iterations)
    opts.replicationFactor(replication_factor)
    opts.Distributed.configureProcessId(host_id, num_hosts)

    data = poptorch.DataLoader(opts,
                               IncrementDataset(shape, num_tensors),
                               batch_size=batch_size,
                               num_workers=num_workers)
    loader = poptorch.AsynchronousDataAccessor(data)

    offset = host_id * (num_tensors // num_hosts)
    assert len(data) == num_tensors // (device_iterations * batch_size *
                                        replication_factor * num_hosts)
    for it, d in enumerate(loader):
        expected = torch.from_numpy(
            numpy.stack([
                numpy.full(shape, offset + i, dtype=numpy.float32)
                for i in range(data.combinedBatchSize *
                               it, data.combinedBatchSize * (it + 1))
            ]))
        diff = torch.sum(torch.sum(d - expected))

    numpy.testing.assert_array_equal(diff.numpy(), [0.])
Example #21
0
def setupOptions(args, train=True):
    """
    Setup poptorch options for either training or inference runs.
    """
    opts = poptorch.Options().deviceIterations(args.batches_per_step)

    if args.cache_dir:
        # Separate caches for training/inference to prevent overwriting.
        prefix = args.conv_mode
        suffix = "-train" if train else "-inference"
        cache = args.cache_dir + f"/{prefix}{suffix}"
        opts.enableExecutableCaching(cache)

    if args.profile_dir:
        # Enable profiling if supported
        assert not args.cache_dir, "Profiling is not supported with executable caching"

        engine_opts = {
            "autoReport.all": "true",
            "autoReport.directory": args.profile_dir,
            "profiler.format": "v3"
        }

        os.environ["POPLAR_ENGINE_OPTIONS"] = json.dumps(engine_opts)

        # Use synthetic data when profiling
        data_mode = int(popart.SyntheticDataMode.RandomNormal)
        opts.Popart.set("syntheticDataMode", data_mode)

    return opts
Example #22
0
 def common_model_opts():
     model_opts = poptorch.Options()
     model_opts.Training.accumulationReductionType(
         poptorch.ReductionType.Mean)
     model_opts.anchorMode(poptorch.AnchorMode.All)
     model_opts.randomSeed(0)
     model_opts.Training.gradientAccumulation(1)
     return model_opts
def ipu_options(cfg: yacs.config.CfgNode, model: Detector):
    batches_per_step = cfg.ipuopts.batches_per_step

    ipu_opts = poptorch.Options()
    ipu_opts.deviceIterations(batches_per_step)
    ipu_opts.autoRoundNumIPUs(True)

    return ipu_opts
Example #24
0
def test_replication_factor(tmpdir):
    """Ensure if the user passes manual poptorch Options with custom parameters set, we set them correctly in the
    dataloaders."""

    plugin = IPUStrategy()
    trainer = Trainer(accelerator="ipu",
                      devices=2,
                      default_root_dir=tmpdir,
                      fast_dev_run=True,
                      strategy=plugin)
    assert isinstance(trainer.accelerator, IPUAccelerator)
    assert trainer.num_devices == 2
    assert trainer.strategy.replication_factor == 2

    model = BoringModel()
    training_opts = poptorch.Options()
    inference_opts = poptorch.Options()
    training_opts.replicationFactor(8)
    inference_opts.replicationFactor(7)
    plugin = IPUStrategy(inference_opts=inference_opts,
                         training_opts=training_opts)

    trainer = Trainer(default_root_dir=tmpdir,
                      accelerator="ipu",
                      devices=1,
                      strategy=plugin)
    trainer.optimizers = model.configure_optimizers()[0]
    plugin.model = model
    model.trainer = trainer
    trainer.state.fn = TrainerFn.FITTING
    trainer.strategy.setup(trainer)

    trainer.state.stage = RunningStage.TRAINING
    assert trainer.strategy.replication_factor == 8
    trainer.state.stage = RunningStage.VALIDATING
    assert trainer.strategy.replication_factor == 7

    for fn, stage in (
        (TrainerFn.VALIDATING, RunningStage.VALIDATING),
        (TrainerFn.TESTING, RunningStage.TESTING),
        (TrainerFn.PREDICTING, RunningStage.PREDICTING),
    ):
        trainer.state.fn = fn
        trainer.state.stage = stage
        trainer.strategy.setup(trainer)
        assert trainer.strategy.replication_factor == 7
Example #25
0
def test_float16_activations_float32_weights():
    torch.manual_seed(42)

    input = torch.ones(10)

    model = torch.nn.Linear(10, 20)

    # Float 32 act, float 32 weights
    pop_model = poptorch.inferenceModel(model, poptorch.Options())
    pop_out = pop_model(input)

    assert pop_out.dtype == torch.float

    # Float 16 act, float 32 weights
    pop_model = poptorch.inferenceModel(model, poptorch.Options())
    pop_out = pop_model(input.half())
    assert pop_out.dtype == torch.half
Example #26
0
def test_2x2_parallel_phased_execution_opts(capfd):
    poptorch.setLogLevel(1)  # Force debug logging
    N = 3
    size = 10

    class Model(torch.nn.Module):
        def __init__(self):
            super().__init__()
            self.weights = []
            for n in range(N * 6):
                weight = torch.nn.Parameter(torch.rand(size, size),
                                            requires_grad=True)
                self.register_parameter(f"w{n}", weight)
                self.weights.append(weight)

        def forward(self, in0, target=None):
            phase = 0
            weight = iter(self.weights)
            with poptorch.Block("phase0_ipu0"):
                ins = torch.split(in0, size)
            for n in range(N * 3):
                out = []
                for ipu in range(2):
                    x = ins[ipu]
                    with poptorch.Block(f"phase{phase}_ipu{ipu}"):
                        x = torch.matmul(next(weight), x)
                        out.append(F.relu(x))
                ins = out[1], out[0]
                # We want 2 matmuls in the same phase
                if n % 3 != 1:
                    phase += 1
            with poptorch.Block(f"phase{N*2-1}_ipu1"):
                res = ins[0] + ins[1]
                if target is None:
                    return res
                return res, torch.nn.L1Loss(reduction="mean")(res, target)

    input = torch.rand(size * 2, 1)
    target = torch.rand(size, 1)
    model = Model()
    opts = poptorch.Options()
    phases = []
    # Alternate between 0-2 and 1-3
    for n in range(N):
        phases.append([
            poptorch.Stage(f"phase{2*n}_ipu0").ipu(0),
            poptorch.Stage(f"phase{2*n}_ipu1").ipu(2)
        ])
        phases.append([
            poptorch.Stage(f"phase{2*n+1}_ipu0").ipu(1),
            poptorch.Stage(f"phase{2*n+1}_ipu1").ipu(3)
        ])
    opts.setExecutionStrategy(poptorch.ParallelPhasedExecution(*phases))
    poptorch_model = poptorch.trainingModel(model, opts)
    poptorch_model.compile(input, target)

    testlog = LogChecker(capfd)
    testlog.validate_2x2_parallel_phased_execution()
Example #27
0
 def run_model(dl, eight_bit_io=False):
     class MyModel(torch.nn.Module):
         def forward(self, x):
             return x * 2.0
     cast_op = "half" if precision == "16.16" else "full"
     model = NormalizeInputModel(MyModel(), mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], output_cast=cast_op) if eight_bit_io else MyModel()
     poptorch_model = poptorch.inferenceModel(model, poptorch.Options())
     input_data = next(iter(dl))[0]
     return poptorch_model(input_data)
Example #28
0
def test_manual_poptorch_opts(tmpdir):
    """Ensure if the user passes manual poptorch Options, we run with the correct object."""
    model = IPUModel()
    inference_opts = poptorch.Options()
    training_opts = poptorch.Options()

    trainer = Trainer(
        default_root_dir=tmpdir,
        ipus=1,
        fast_dev_run=True,
        strategy=IPUStrategy(inference_opts=inference_opts,
                             training_opts=training_opts),
    )
    trainer.fit(model)

    assert isinstance(trainer.strategy, IPUStrategy)
    assert trainer.strategy.training_opts == training_opts
    assert trainer.strategy.inference_opts == inference_opts
Example #29
0
def test_offline_ipu():
    class Network(nn.Module):
        def forward(self, x, y):
            return x + y

    model = Network()
    # Force-disable the IPU model
    opts = poptorch.Options().useOfflineIpuTarget()
    poptorch.inferenceModel(model, opts)
Example #30
0
    def _create_opts(self, training: bool) -> "poptorch.Options":
        opts = poptorch.Options()
        opts.deviceIterations(self.device_iterations)
        opts.replicationFactor(self.replication_factor)
        gradient_accumulation = self.accumulate_grad_batches if training else 1
        opts.Training.gradientAccumulation(gradient_accumulation)

        if os.environ.get("PL_GLOBAL_SEED"):
            opts.randomSeed(int(os.environ["PL_GLOBAL_SEED"]))
        return opts