def __init__( self, device_iterations: int = 1, autoreport: bool = False, autoreport_dir: Optional[str] = None, parallel_devices: Optional[List[torch.device]] = None, cluster_environment: Optional[ClusterEnvironment] = None, checkpoint_io: Optional[CheckpointIO] = None, training_opts: Optional["poptorch.Options"] = None, inference_opts: Optional["poptorch.Options"] = None, ) -> None: """ Arguments: device_iterations: Number of iterations to run on device at once before returning to host. This can be used as an optimization to speed up training. https://docs.graphcore.ai/projects/poptorch-user-guide/en/0.1.67/batching.html autoreport: Enable auto-reporting for IPUs using PopVision https://docs.graphcore.ai/projects/graphcore-popvision-user-guide/en/latest/graph/graph.html autoreport_dir: Optional directory to store autoReport output. training_opts: Optional ``poptorch.Options`` to override the default created options for training. inference_opts: Optional ``poptorch.Options`` to override the default created options for validation/testing and predicting. """ super().__init__( parallel_devices=parallel_devices, cluster_environment=cluster_environment, checkpoint_io=checkpoint_io, ) if not _POPTORCH_AVAILABLE or not poptorch.ipuHardwareIsAvailable(): raise MisconfigurationException( "The IPU Accelerator requires IPU devices to run. " "Learn more or get started with IPUs at https://www.graphcore.ai/getstarted" ) self.device_iterations = device_iterations self.autoreport = autoreport self.autoreport_dir = autoreport_dir self.poptorch_models = {} self._original_accumulate_grad_batches = None self._training_opts = training_opts self._inference_opts = inference_opts if self.autoreport: options = {"autoReport.all": self.autoreport} if self.autoreport_dir: self._fs = get_filesystem(str(self.autoreport_dir)) self._fs.makedirs(self.autoreport_dir, exist_ok=True) options["autoReport.directory"] = self.autoreport_dir os.environ["POPLAR_ENGINE_OPTIONS"] = json.dumps(options)
_GROUP_AVAILABLE = not _IS_WINDOWS and _module_available( 'torch.distributed.group') _HOROVOD_AVAILABLE = _module_available("horovod.torch") _HYDRA_AVAILABLE = _module_available("hydra") _HYDRA_EXPERIMENTAL_AVAILABLE = _module_available("hydra.experimental") _KINETO_AVAILABLE = _TORCH_GREATER_EQUAL_1_8_1 and torch.profiler.kineto_available( ) _NATIVE_AMP_AVAILABLE = _module_available("torch.cuda.amp") and hasattr( torch.cuda.amp, "autocast") _OMEGACONF_AVAILABLE = _module_available("omegaconf") _POPTORCH_AVAILABLE = _module_available('poptorch') _TORCH_QUANTIZE_AVAILABLE = bool( [eg for eg in torch.backends.quantized.supported_engines if eg != 'none']) _TORCHTEXT_AVAILABLE = _module_available("torchtext") _TORCHVISION_AVAILABLE = _module_available('torchvision') _TORCHMETRICS_LOWER_THAN_0_3 = _compare_version("torchmetrics", operator.lt, "0.3.0") _TORCHMETRICS_GREATER_EQUAL_0_3 = _compare_version("torchmetrics", operator.ge, "0.3.0") _XLA_AVAILABLE = _module_available("torch_xla") from pytorch_lightning.utilities.xla_device import XLADeviceUtils # noqa: E402 _TPU_AVAILABLE = XLADeviceUtils.tpu_device_exists() if _POPTORCH_AVAILABLE: import poptorch _IPU_AVAILABLE = poptorch.ipuHardwareIsAvailable() else: _IPU_AVAILABLE = False
# Copyright (c) 2020 Graphcore Ltd. All rights reserved. # iterations_start from functools import reduce from operator import mul import sys import torch import poptorch if not poptorch.ipuHardwareIsAvailable(): print("Replicated top level graphs are not supported on the IPU model") sys.exit(0) class ExampleModelWithLoss(torch.nn.Module): def __init__(self, data_shape, num_classes): super().__init__() self.fc = torch.nn.Linear(reduce(mul, data_shape), num_classes) self.loss = torch.nn.CrossEntropyLoss() def forward(self, x, target=None): reshaped = x.reshape([x.shape[0], -1]) fc = self.fc(reshaped) if target is not None: return fc, self.loss(fc, target) return fc class ExampleDataset(torch.utils.data.Dataset):
#!/usr/bin/env python3 # Copyright (c) 2020 Graphcore Ltd. All rights reserved. import os import tempfile import pytest import poptorch import torch import helpers @pytest.mark.skipif(not poptorch.ipuHardwareIsAvailable(), reason="Hardware IPU needed") def test_ExecutableCaching(capfd): poptorch.setLogLevel(1) # Force debug logging class Model(torch.nn.Module): def forward(self, x): return x * 6 with tempfile.TemporaryDirectory() as cache: opts = poptorch.Options() opts.enableExecutableCaching(cache) m = poptorch.inferenceModel(Model(), opts) m.compile(torch.rand(2, 3)) m.destroy() log = helpers.LogChecker(capfd) log.assert_contains("set enableEngineCaching to value true") assert os.listdir(), "No executable saved in the cache" n = poptorch.inferenceModel(Model(), opts)
def example(): # pylint: disable=import-outside-toplevel import sys import poptorch if not poptorch.ipuHardwareIsAvailable(): poptorch.logger.warn("This examples requires IPU hardware to run") sys.exit(0) # pylint: disable=unused-variable, wrong-import-position, reimported, ungrouped-imports, wrong-import-order, import-outside-toplevel # mnist_start import torch import torch.nn as nn import torchvision import poptorch # Normal pytorch batch size training_batch_size = 20 validation_batch_size = 100 opts = poptorch.Options() # Device "step" opts.deviceIterations(20) # How many IPUs to replicate over. opts.replicationFactor(4) opts.randomSeed(42) # Load MNIST normally. training_data = poptorch.DataLoader( opts, torchvision.datasets.MNIST('mnist_data/', train=True, download=True, transform=torchvision.transforms.Compose([ torchvision.transforms.ToTensor(), torchvision.transforms.Normalize( (0.1307, ), (0.3081, )) ])), batch_size=training_batch_size, shuffle=True) # Load MNIST normally. val_options = poptorch.Options() validation_data = poptorch.DataLoader( val_options, torchvision.datasets.MNIST('mnist_data/', train=True, download=True, transform=torchvision.transforms.Compose([ torchvision.transforms.ToTensor(), torchvision.transforms.Normalize( (0.1307, ), (0.3081, )) ])), batch_size=validation_batch_size, shuffle=True, drop_last=True) # A helper block to build convolution-pool-relu blocks. class Block(nn.Module): def __init__(self, in_channels, num_filters, kernel_size, pool_size): super(Block, self).__init__() self.conv = nn.Conv2d(in_channels, num_filters, kernel_size=kernel_size) self.pool = nn.MaxPool2d(kernel_size=pool_size) self.relu = nn.ReLU() def forward(self, x): x = self.conv(x) x = self.pool(x) x = self.relu(x) return x # Define the network using the above blocks. class Network(nn.Module): def __init__(self): super().__init__() self.layer1 = Block(1, 10, 5, 2) self.layer2 = Block(10, 20, 5, 2) self.layer3 = nn.Linear(320, 256) self.layer3_act = nn.ReLU() self.layer4 = nn.Linear(256, 10) self.softmax = nn.LogSoftmax(1) self.loss = nn.NLLLoss(reduction="mean") def forward(self, x, target=None): x = self.layer1(x) x = self.layer2(x) x = x.view(-1, 320) x = self.layer3_act(self.layer3(x)) x = self.layer4(x) x = self.softmax(x) if target is not None: loss = self.loss(x, target) return x, loss return x # Create our model. model = Network() # Create model for training which will run on IPU. training_model = poptorch.trainingModel(model, training_data.options) # Same model as above, they will share weights (in 'model') which once training is finished can be copied back. inference_model = poptorch.inferenceModel(model, validation_data.options) def train(): for batch_number, (data, labels) in enumerate(training_data): output, losses = training_model(data, labels) if batch_number % 10 == 0: print(f"PoptorchIPU loss at batch: {batch_number} is {losses}") # Pick the highest probability. _, ind = torch.max(output, 1) assert training_data.options.anchor_mode in ( poptorch.AnchorMode.All, poptorch.AnchorMode.Final ), "Only 'Final' and 'All' AnchorMode supported" # If we're using Final: only keep the last labels, no-op if using All num_labels = ind.shape[0] labels = labels[-num_labels:] eq = torch.eq(ind, labels) elms, counts = torch.unique(eq, sorted=False, return_counts=True) acc = 0.0 if len(elms) == 2: if elms[0]: acc = (counts[0].item() / num_labels) * 100.0 else: acc = (counts[1].item() / num_labels) * 100.0 print( f"Training accuracy: {acc}% from batch of size {num_labels}" ) print("Done training") def test(): correct = 0 total = 0 with torch.no_grad(): for (data, labels) in validation_data: output = inference_model(data) # Argmax the probabilities to get the highest. _, ind = torch.max(output, 1) # Compare it against the ground truth for this batch. eq = torch.eq(ind, labels) # Count the number which are True and the number which are False. elms, counts = torch.unique(eq, sorted=False, return_counts=True) if len(elms) == 2 or elms[0]: if elms[0]: correct += counts[0].item() else: correct += counts[1].item() total += validation_batch_size print("Validation: of " + str(total) + " samples we got: " + str((correct / total) * 100.0) + "% correct") # Train on IPU. train() test()
def disableSmallModel(): # POPTORCH_IPU_MODEL takes precedence over POPTORCH_SMALL_IPU_MODEL if not poptorch.ipuHardwareIsAvailable(): return {"POPTORCH_IPU_MODEL": "1"} return {}