Esempio n. 1
0
def test_reuse_workers(DatasetType):
    shape = [2, 3]
    num_tensors = 10

    opts = poptorch.Options()
    data = poptorch.DataLoader(opts,
                               DatasetType(shape, num_tensors),
                               batch_size=1,
                               num_workers=2)
    data_no_reuse = poptorch.DataLoader(opts,
                                        DatasetType(shape, num_tensors),
                                        batch_size=1,
                                        persistent_workers=False,
                                        num_workers=2)

    loader = poptorch.AsynchronousDataAccessor(data)
    loader_no_reuse = poptorch.AsynchronousDataAccessor(data_no_reuse)

    start = None
    # Workers will be created while fetching the first element
    # so start the timer after the first element is fetched.
    num_tensors = 0
    for _ in loader_no_reuse:
        num_tensors += 1
        if start is None:
            start = time.perf_counter()

    end = time.perf_counter()
    print(f"First epoch no reuse: {end - start} {num_tensors}")

    for _ in range(3):
        start = time.perf_counter()
        for _ in loader_no_reuse:
            num_tensors += 1
        end = time.perf_counter()
        print(f"Other epoch no reuse: {end - start}  {num_tensors}")

    start = None
    # Workers will be created while fetching the first element
    # so start the timer after the first element is fetched.
    num_tensors_reuse = 0
    for _ in loader:
        num_tensors_reuse += 1
        if start is None:
            start = time.perf_counter()
    end = time.perf_counter()
    print(f"First epoch: {end - start} {num_tensors_reuse}")

    for _ in range(3):
        start = time.perf_counter()
        for _ in loader:
            num_tensors_reuse += 1
        end = time.perf_counter()
        print(f"Other epoch: {end - start} {num_tensors_reuse}")
Esempio n. 2
0
def _run_dataset_test(shape=None,
                      num_tensors=100,
                      batch_size=1,
                      num_workers=0,
                      device_iterations=1,
                      replication_factor=1,
                      host_id=0,
                      num_hosts=1):
    shape = shape or [2, 3]

    opts = poptorch.Options()
    opts.deviceIterations(device_iterations)
    opts.replicationFactor(replication_factor)
    opts.Distributed.configureProcessId(host_id, num_hosts)

    data = poptorch.DataLoader(opts,
                               IncrementDataset(shape, num_tensors),
                               batch_size=batch_size,
                               num_workers=num_workers)
    loader = poptorch.AsynchronousDataAccessor(data)

    offset = host_id * (num_tensors // num_hosts)
    assert len(data) == num_tensors // (device_iterations * batch_size *
                                        replication_factor * num_hosts)
    for it, d in enumerate(loader):
        expected = torch.from_numpy(
            numpy.stack([
                numpy.full(shape, offset + i, dtype=numpy.float32)
                for i in range(data.combinedBatchSize *
                               it, data.combinedBatchSize * (it + 1))
            ]))
        diff = torch.sum(torch.sum(d - expected))

    numpy.testing.assert_array_equal(diff.numpy(), [0.])
Esempio n. 3
0
def _run_process_test(shape=None,
                      num_tensors=100,
                      batch_size=1,
                      num_workers=0,
                      device_iterations=1,
                      replication_factor=1,
                      num_runs=1):
    shape = shape or [2, 3]

    opts = poptorch.Options()
    opts.deviceIterations(device_iterations)
    opts.replicationFactor(replication_factor)

    data = poptorch.DataLoader(opts,
                               IncrementDataset(shape, num_tensors),
                               batch_size=batch_size,
                               num_workers=num_workers)

    loader = poptorch.AsynchronousDataAccessor(data)
    assert len(loader) == num_tensors // (device_iterations * batch_size *
                                          replication_factor)

    model = poptorch.inferenceModel(DoubleData(), opts)

    for _ in range(0, num_runs):
        for it, d in enumerate(loader):
            out = model(d)

            expected = torch.stack([
                torch.full(shape, i * 2, dtype=torch.float32)
                for i in range(data.combinedBatchSize *
                               it, data.combinedBatchSize * (it + 1))
            ])

            assert torch.equal(expected, out)
Esempio n. 4
0
def setupInference(model, args):
    """
    Setup a training run using the CIFAR-10 training dataset.

    Uses the poptorch.DataLoader so that each training iteration executed on the
    IPU will incorporate:

        * (mini-)batch size
        * device iterations
        * replica factor
        * gradient accumulation factor

    Applying the poptorch.AsynchronousDataAccessor allows loading the dataset on
    a separate thread.  This reduces the host/IPU communication overhead by
    using the time that the IPU is running to load the next batch on the CPU.
    """
    opts = setupOptions(args, train=False)
    inference_model = poptorch.inferenceModel(model, opts)
    dataset = cifar10(args.data_dir, train=False)

    loader = poptorch.DataLoader(opts,
                                 dataset,
                                 batch_size=args.test_batch_size,
                                 shuffle=True,
                                 drop_last=True,
                                 num_workers=8)
    loader = poptorch.AsynchronousDataAccessor(loader)

    return inference_model, loader
Esempio n. 5
0
def test_broken_dataset():
    num_tensors = 100

    opts = poptorch.Options()
    data = poptorch.DataLoader(opts,
                               BrokenDataset(num_tensors),
                               batch_size=1,
                               num_workers=32)

    with pytest.raises(RuntimeError, match="worker thread failed to start"):
        poptorch.AsynchronousDataAccessor(data)
Esempio n. 6
0
def test_iterable_dataset():
    shape = [2, 3]
    num_tensors = 100

    loader = poptorch.AsynchronousDataAccessor(
        IncrementIterableDataset(shape, num_tensors))

    for _, _ in enumerate(loader):
        continue

    # Make sure it works for more than 1 epoch
    for _, _ in enumerate(loader):
        continue
Esempio n. 7
0
def test_len():
    shape = [2, 3]
    num_tensors = 10

    opts = poptorch.Options()
    data = poptorch.DataLoader(opts,
                               IncrementIterableDataset(shape, num_tensors),
                               batch_size=None,
                               drop_last=False,
                               num_workers=1)

    loader = poptorch.AsynchronousDataAccessor(data)
    with pytest.raises(TypeError,
                       match="'IncrementIterableDataset' has no len()"):
        len(loader)
    data = poptorch.DataLoader(opts,
                               IncrementIterableDatasetWithLen(
                                   shape, num_tensors),
                               batch_size=None,
                               drop_last=False,
                               num_workers=1)

    loader = poptorch.AsynchronousDataAccessor(data)
    len(loader)
Esempio n. 8
0
def test_single_epoch():
    shape = [2, 3]
    num_tensors = 100

    opts = poptorch.Options()
    data = poptorch.DataLoader(opts,
                               IncrementDataset(shape, num_tensors),
                               batch_size=1,
                               num_workers=32)

    loader = poptorch.AsynchronousDataAccessor(data)
    assert len(loader) == num_tensors

    for _, _ in enumerate(loader):
        continue
Esempio n. 9
0
def test_interrupt_async_loader():
    """Make sure the worker processes are stopped cleanly even when the end of
    the dataset is not reached."""

    shape = [2, 3]
    num_tensors = 100

    opts = poptorch.Options()
    data = poptorch.DataLoader(opts,
                               IncrementDataset(shape, num_tensors),
                               batch_size=1,
                               num_workers=1)

    loader = poptorch.AsynchronousDataAccessor(data)
    assert len(loader) == num_tensors

    for _, _ in enumerate(loader):
        break
Esempio n. 10
0
def test_iterable_dataloader():
    shape = [2, 3]
    num_tensors = 100

    opts = poptorch.Options()
    data = poptorch.DataLoader(opts,
                               IncrementIterableDataset(shape, num_tensors),
                               batch_size=1,
                               num_workers=1)

    loader = poptorch.AsynchronousDataAccessor(data)

    for _, t in enumerate(loader):
        assert t.shape == torch.Size([1, 2, 3])
        continue

    # Make sure it works for more than 1 epoch
    for _, _ in enumerate(loader):
        continue
Esempio n. 11
0
def _run_process_label_test(shape=None,
                            num_tensors=100,
                            batch_size=1,
                            num_workers=0,
                            device_iterations=1,
                            replication_factor=1):
    shape = shape or [2, 3]

    opts = poptorch.Options()
    opts.deviceIterations(device_iterations)
    opts.replicationFactor(replication_factor)

    data = poptorch.DataLoader(opts,
                               IncrementDatasetWithLabels(shape, num_tensors),
                               batch_size=batch_size,
                               num_workers=num_workers)

    loader = poptorch.AsynchronousDataAccessor(data)
    assert len(loader) == num_tensors // (device_iterations * batch_size *
                                          replication_factor)

    model = poptorch.inferenceModel(DoubleDataLabel(), opts)

    total = torch.zeros(shape)
    label_out = torch.zeros(1, dtype=torch.int)
    for _, (data, label) in enumerate(loader):
        out, label = model(data, label)
        total += torch.sum(out, dim=0)
        label_out += torch.sum(label, dim=0)

    actual = 0
    for i in range(0, num_tensors):
        actual += i * 2

    numpy.testing.assert_array_equal(total[0][0].numpy(), [actual])
    numpy.testing.assert_array_equal(label_out[0].item(), [actual])
Esempio n. 12
0
def run_data_loader_example():
    model_batch_size = 2
    # replication_start
    # Create a poptorch.Options instance to override default options
    opts = poptorch.Options()

    # Run a 100 iteration loop on the IPU, fetching a new batch each time
    opts.deviceIterations(100)

    # Duplicate the model over 4 replicas.
    opts.replicationFactor(4)

    training_data = poptorch.DataLoader(opts,
                                        dataset=ExampleDataset(shape=[3, 2],
                                                               length=100000),
                                        batch_size=model_batch_size,
                                        shuffle=True,
                                        drop_last=True)

    model = ExampleModelWithLoss(data_shape=[3, 2], num_classes=2)
    # Wrap the model in a PopTorch training wrapper
    poptorch_model = poptorch.trainingModel(model, options=opts)

    # Run over the training data with "batch_size" 200 essentially.
    for batch_number, (data, labels) in enumerate(training_data):
        # Execute the device with a 100 iteration loop of batchsize 2 across
        # 4 IPUs. "output" and "loss" will be the respective output and loss of the
        # final batch of each replica (the default AnchorMode).
        output, loss = poptorch_model(data, labels)
        print(f"{labels[-1]}, {output}, {loss}")
    # replication_end
    # gradient_acc_start
    # Create a poptorch.Options instance to override default options
    opts = poptorch.Options()

    # Run a 100 iteration loop on the IPU, fetching a new batch each time
    opts.deviceIterations(400)

    # Accumulate the gradient 8 times before applying it.
    opts.Training.gradientAccumulation(8)

    training_data = poptorch.DataLoader(opts,
                                        dataset=ExampleDataset(shape=[3, 2],
                                                               length=100000),
                                        batch_size=model_batch_size,
                                        shuffle=True,
                                        drop_last=True)

    # Wrap the model in a PopTorch training wrapper
    poptorch_model = poptorch.trainingModel(model, options=opts)

    # Run over the training data with "batch_size" 200 essentially.
    for batch_number, (data, labels) in enumerate(training_data):
        # Execute the device with a 100 iteration loop of batchsize 2 across
        # 4 IPUs. "output" and "loss" will be the respective output and loss of the
        # final batch of each replica (the default AnchorMode).
        output, loss = poptorch_model(data, labels)
        print(f"{labels[-1]}, {output}, {loss}")
    # gradient_acc_end

    # Not displayed: just to keep the linter happy
    shape = [3, 2]
    num_tensors = 100
    batch_size = 1
    num_workers = 0
    device_iterations = 1
    replication_factor = 1
    # Example starts here:
    # data_accessor_start
    opts = poptorch.Options()
    opts.deviceIterations(device_iterations)
    opts.replicationFactor(replication_factor)

    data = poptorch.DataLoader(opts,
                               ExampleDataset(shape=shape, length=num_tensors),
                               batch_size=batch_size,
                               num_workers=num_workers)

    loader = poptorch.AsynchronousDataAccessor(data)

    poptorch_model = poptorch.inferenceModel(model, opts)

    for it, (data, _) in enumerate(loader):
        out = poptorch_model(data)
    # data_accessor_end

    # distributed_execution_start
    def process(process_id=0, num_processes=1):
        # Create a poptorch.Options instance to override default options
        opts = poptorch.Options()

        # Run a 100 iteration loop on the IPU, fetching a new batch each time
        opts.deviceIterations(400)

        # Replicate the graph across 2 IPUs in each process.
        opts.replicationFactor(2)

        # Set the id of the current process and the total number of processes.
        opts.Distributed.configureProcessId(process_id, num_processes)

        # Accumulate the gradient 8 times before applying it.
        opts.Training.gradientAccumulation(8)

        # Optional: All the processes must use the same seed if shuffle=True is used for the DataLoader.
        opts.randomSeed(42)

        training_data = poptorch.DataLoader(opts,
                                            dataset=ExampleDataset(
                                                shape=[3, 2], length=100000),
                                            batch_size=model_batch_size,
                                            shuffle=True,
                                            drop_last=True)

        # Wrap the model in a PopTorch training wrapper
        poptorch_model = poptorch.trainingModel(model, options=opts)

        # Run over the training data with "batch_size" 200 essentially.
        for batch_number, (data, labels) in enumerate(training_data):
            # Execute the device with a 100 iteration loop of batchsize 2 across
            # 4 IPUs. "output" and "loss" will be the respective output and loss of the
            # final batch of each replica (the default AnchorMode).
            output, loss = poptorch_model(data, labels)
            print(f"{batch_number} {labels[-1]}, {output}, {loss}")
Esempio n. 13
0
def get_data(opts, model_opts, train=True, async_dataloader=False):
    """
    A factory method to create a dataload responsible for sending data
    to the IPU device. This build the appropriate dataset and wraps it in a dataloader.
    """
    if opts.precision[:3] == "16.":
        half_precision = True
    elif opts.precision[:3] == "32.":
        half_precision = False
    transform = get_preprocessing_pipeline(
        train, models.available_models[opts.model]["input_shape"],
        half_precision)
    # Determine the size of the small datasets
    if hasattr(opts, "iterations"):
        dataset_size = opts.batch_size * \
                       model_opts.device_iterations * \
                       model_opts.replication_factor * \
                       model_opts.Training.gradient_accumulation * \
                       opts.iterations

    # Select the right dataset
    if opts.data == "synthetic":
        if hasattr(opts, "iterations"):
            dataset = SynthDataset(
                models.available_models[opts.model]["input_shape"],
                size=dataset_size,
                half_precision=half_precision)
        else:
            dataset = SynthDataset(
                models.available_models[opts.model]["input_shape"],
                half_precision=half_precision)
    elif opts.data == "real":
        data_path = Path(__file__).parent.absolute().joinpath("images")
        if hasattr(opts, "iterations"):
            dataset = SampleDataset(img_dir=data_path,
                                    transform=transform,
                                    size=dataset_size)
        else:
            dataset = SampleDataset(img_dir=data_path, transform=transform)
    elif opts.data == "imagenet":
        if train:
            data_folder = 'train'
        else:
            data_folder = 'validation'
        dataset = torchvision.datasets.ImageFolder(os.path.join(
            opts.imagenet_data_path, data_folder),
                                                   transform=transform)
    elif opts.data == "cifar10":
        data_path = Path(__file__).parent.absolute().joinpath("cifar10")
        dataset = torchvision.datasets.CIFAR10(root=data_path,
                                               train=train,
                                               download=True,
                                               transform=transform)

    num_loader_workers = min(32, multiprocessing.cpu_count())
    dataloader = poptorch.DataLoader(model_opts,
                                     dataset,
                                     batch_size=opts.batch_size,
                                     num_workers=num_loader_workers,
                                     shuffle=train,
                                     drop_last=True)
    if async_dataloader:
        return poptorch.AsynchronousDataAccessor(dataloader,
                                                 load_indefinitely=True)
    else:
        return dataloader
Esempio n. 14
0
def get_data(opts, model_opts, train=True, async_dataloader=False):
    """
    A factory method to create a dataload responsible for sending data
    to the IPU device. This build the appropriate dataset and wraps it in a dataloader.
    """
    if opts.precision[:3] == "16.":
        half_precision = True
    elif opts.precision[:3] == "32.":
        half_precision = False
    transform = get_preprocessing_pipeline(
        train, models.available_models[opts.model]["input_shape"][-1],
        half_precision, opts.normalization_location == "host")
    # Determine the size of the small datasets
    if hasattr(opts, "iterations"):
        dataset_size = opts.batch_size * \
                       model_opts.device_iterations * \
                       model_opts.replication_factor * \
                       model_opts.Training.gradient_accumulation * \
                       opts.iterations

    # Select the right dataset
    if opts.data in ["synthetic", "generated"]:
        if hasattr(opts, "iterations"):
            dataset = GeneratedDataset(
                models.available_models[opts.model]["input_shape"],
                size=dataset_size,
                half_precision=half_precision)
        else:
            dataset = GeneratedDataset(
                models.available_models[opts.model]["input_shape"],
                half_precision=half_precision)
    elif opts.data == "real":
        data_path = Path(__file__).parent.parent.absolute().joinpath(
            "data").joinpath("images")
        if hasattr(opts, "iterations"):
            dataset = SampleDataset(img_dir=data_path,
                                    transform=transform,
                                    size=dataset_size)
        else:
            dataset = SampleDataset(img_dir=data_path, transform=transform)
    elif opts.data == "imagenet":
        if os.path.exists(
                os.path.join(opts.imagenet_data_path, 'metadata.json')):
            # WebDataset format
            dataset = get_webdataset(opts,
                                     model_opts,
                                     train,
                                     half_precision,
                                     transform=transform)
        else:
            # Original ImageNet format
            data_folder = 'train' if train else 'validation'
            dataset = torchvision.datasets.ImageFolder(os.path.join(
                opts.imagenet_data_path, data_folder),
                                                       transform=transform)
    elif opts.data == "cifar10":
        data_path = Path(__file__).parent.parent.absolute().joinpath(
            "data").joinpath("cifar10")
        dataset = torchvision.datasets.CIFAR10(root=data_path,
                                               train=train,
                                               download=True,
                                               transform=transform)
    mode = poptorch.DataLoaderMode.Async if async_dataloader and not isinstance(
        dataset,
        torch.utils.data.IterableDataset) else poptorch.DataLoaderMode.Sync
    dataloader = poptorch.DataLoader(
        model_opts,
        dataset,
        batch_size=opts.batch_size if
        not (isinstance(dataset, torch.utils.data.IterableDataset)) else None,
        num_workers=opts.dataloader_worker,
        shuffle=train
        and not (isinstance(dataset, torch.utils.data.IterableDataset)),
        drop_last=not (isinstance(dataset, torch.utils.data.IterableDataset)),
        persistent_workers=True,
        auto_distributed_partitioning=not isinstance(
            dataset, torch.utils.data.IterableDataset),
        worker_init_fn=None,
        mode=mode,
        async_options={'load_indefinitely': True})

    if isinstance(dataset, torch.utils.data.IterableDataset):
        global_batch_size = opts.batch_size * model_opts.device_iterations * model_opts.replication_factor * model_opts.Training.gradient_accumulation
        if async_dataloader:
            dataloader._accessor = poptorch.AsynchronousDataAccessor(
                DatasetRebatch(dataloader, global_batch_size),
                load_indefinitely=True)
        else:
            dataloader = DatasetRebatch(dataloader, global_batch_size)
    return dataloader