def test_save_tensors_externally(): d1 = np.array([1, -1, 6]).astype(np.float32) d2 = np.array([7, 4]).astype(np.float16) builder = popart.Builder() i1 = builder.addInitializedInputTensor(d1) i2 = builder.addInitializedInputTensor(d2) o = builder.aiOnnx.add([i1, i2]) tmpdir = tempfile.mkdtemp() def checkFile(file): # Check file exists assert os.path.exists(file) # Check file is of expected size: (3 * 4) + (2 * 2) = 16 bytes assert os.path.getsize(file) == 16 # Read the binary data back in and check the value is as expected assert np.array_equal(np.fromfile(file, dtype=np.float32, count=3), d1) assert np.array_equal( np.fromfile(file, dtype=np.float16, count=2, offset=12), d2) # Test GraphTransformer tmpfile0 = os.path.join(tmpdir, "model_tensors0.onnx") graph_transformer = popart.GraphTransformer(builder.getModelProto()) graph_transformer.saveInitializersExternally([i1, i2], tmpfile0) checkFile(tmpfile0) # Test Builder tmpfile1 = os.path.join(tmpdir, "model_tensors1.onnx") builder.saveInitializersExternally([i1, i2], tmpfile1) checkFile(tmpfile1)
def test_convert_all_fixed_point_initializers_to_constants(tmpdir): builder = popart.Builder() i1 = builder.addInputTensor(popart.TensorInfo("FLOAT", [2, 3])) i2 = builder.addInitializedInputTensor(np.array([1, 6], dtype=np.int64)) o1 = builder.aiOnnx.reshape([i1, i2]) i3 = builder.addInputTensor(popart.TensorInfo("FLOAT", [3, 2])) i4 = builder.addInitializedInputTensor(np.array([1, 6], dtype=np.int64)) o2 = builder.aiOnnx.reshape([i3, i4]) o = builder.aiOnnx.add([o1, o2]) builder.addOutputTensor(o) graph_transformer = popart.GraphTransformer(builder.getModelProto()) graph_transformer.convertAllFixedPointInitializersToConstants() builder = popart.Builder(graph_transformer.getModelProto()) ids = builder.getInputTensorIds() assert (i1 in ids) assert (i2 not in ids) assert (i3 in ids) assert (i4 not in ids)
def graph_builder(): model_path = ("models" if not FLAGS.model_path else FLAGS.model_path) proto = f"{model_path}/{FLAGS.model_name}/model_{FLAGS.micro_batch_size}.onnx" builder = popart.Builder(proto, opsets={ "ai.onnx": 10, "ai.onnx.ml": 1, "ai.graphcore": 1 }) input_id = builder.getInputTensorIds()[0] if FLAGS.synthetic: input_shape = [int(FLAGS.micro_batch_size), 3, 224, 224] data = { input_id: np.random.normal(0, 1, input_shape).astype(np.float16) } if FLAGS.batches_per_step > 1: data = { k: np.repeat(v[np.newaxis], FLAGS.batches_per_step, 0) for k, v in data.items() } else: data = load_dataset([input_id]) output_id = builder.getOutputTensorIds()[0] output = {output_id: popart.AnchorReturnType("ALL")} list_of_convolution_ids = [] graph_proto = onnx.load(proto).graph # get list of all IDs of outputs of convolutions. We need these so we can adjust their memory proportions from the default for i in range(len(graph_proto.node)): if graph_proto.node[i].op_type == 'Conv': list_of_convolution_ids.append(graph_proto.node[i].output[0]) memoryProportion = 0.3 if FLAGS.batch_size == 5: memoryProportion = 0.26 for id in list_of_convolution_ids: builder.setAvailableMemoryProportion(id, memoryProportion) proto = builder.getModelProto() graph_transformer = popart.GraphTransformer(proto) graph_transformer.convertFloatsToHalfs() return graph_transformer.getModelProto(), data, output, output_id
def compile_and_run(image_input, image_output, onnx_model): img_data = load_image(image_input) # create graph transformer using .onnx file. print(onnx_model) builder = popart.Builder(onnx_model) input_ = builder.getInputTensorIds()[0] output = builder.getOutputTensorIds()[0] print("Input:", input_, "Output:", output) graph_transformer = popart.GraphTransformer(onnx_model) graph_transformer.convertAllFixedPointInitializersToConstants() # Create forward pass session session = popart.InferenceSession( fnModel=graph_transformer.getModelProto(), dataFlow=popart.DataFlow(1, {output: popart.AnchorReturnType("All")}), deviceInfo=popart.DeviceManager().acquireAvailableDevice(1)) # Compile graph print("Compiling...") session.prepareDevice() # Create buffers to receive results from the execution inferenceAnchors = session.initAnchorArrays() stepio = popart.PyStepIO({input_: img_data.copy()}, inferenceAnchors) # Run the inference graph session.run(stepio) total_execution_cycles(session.getExecutionReport()) total_tile_sizes(session.getGraphReport()) data_out = inferenceAnchors[output] save_image(np.squeeze(data_out), image_output)
def test_convert_initializers_to_constants(tmpdir): builder = popart.Builder() i1 = builder.addInputTensor(popart.TensorInfo("FLOAT", [2, 3])) i2 = builder.addInitializedInputTensor(np.array([1, 6], dtype=np.int64)) o = builder.aiOnnx.reshape([i1, i2]) builder.addOutputTensor(o) graph_transformer = popart.GraphTransformer(builder.getModelProto()) graph_transformer.convertInitializersToConstants([i2]) with pytest.raises(popart.popart_exception) as e_info: graph_transformer.convertInitializersToConstants(["unknown"]) assert (e_info.value.args[0] == "TensorId unknown not in the model initalizers") builder = popart.Builder(graph_transformer.getModelProto()) ids = builder.getInputTensorIds() assert (i1 in ids) assert (i2 not in ids)
def saveModel(self, fnModel): print("Writing ONNX model to protobuf file %s" % (fnModel, )) # jump into eval mode, just to write the onnx model. # note that this might do strange things with batch-normalisation (?) self.module.eval() inputDataInfos = [self.inputShapeInfo.get(tid) for tid in self.inNames] inputData = [] containsint64 = False for info in inputDataInfos: shape = info.shape() dt = info.data_type_lcase() if dt == "int32": dt = "int64" # torch labels must be 'long' containsint64 = True inputData.append(torch.from_numpy(np.ones(shape=shape, dtype=dt))) torch.onnx.export(self.module, inputData, fnModel, verbose=False, input_names=self.inNames, output_names=self.outNames) # If the model contains 'long' tensors (e.g. in case of exporting # nllloss), they must be converted to int32 # Note: in models with reshape ops, the 'shape' tensor will be converted # to int32 by the blanket conversion. This leads to a technically invalid # onnx model. So we only convert when we know we definitely have int64 tensors. if containsint64: graph_transformer = popart.GraphTransformer(fnModel) graph_transformer.convertINT64ToINT32() graph_transformer.convertAllFixedPointInitializersToConstants() proto = graph_transformer.getModelProto() popart.Builder(proto).saveModelProto(fnModel) onnx.checker.check_model(fnModel)
def _run_impl(torchWriter, patterns, outputdir, cifarInIndices, device, device_hw_id, mode, syntheticData, transformations, epochs, printAnchorArrays): runIds = [-1] + [ int(x.split("runId")[1].split("_")[0]) for x in os.listdir(outputdir) if "runId" in x ] baseId = 1 + max(runIds) def getFnModel(framework, epoch): return os.path.join( outputdir, "runId%d_%sModel_epoch%s.onnx" % (baseId, framework, epoch)) def getFnPopArt(epoch): return getFnModel("PopArt", epoch) def getFnTorch(epoch): return getFnModel("Torch", epoch) def getFnModel0(): return os.path.join(outputdir, "runId%d_model0.onnx" % (baseId, )) dataFlow = torchWriter.dataFlow inputShapeInfo = torchWriter.inputShapeInfo validModes = ["infer", "train"] if mode not in validModes: raise Exception("mode must be one of " + str(validModes)) transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) # determine what the data directory is datadir = "unset" dir_path = os.path.dirname(os.path.realpath(__file__)) path_c10datadir = os.path.join(dir_path, "c10datadir.py") if os.path.exists(path_c10datadir): import c10datadir datadir = c10datadir.c10datadir else: tmpdir = tempfile.gettempdir() datadir = os.path.abspath(os.path.join(tmpdir, 'cifar10data')) print("Using datadir=%s" % (datadir)) if (not os.path.exists(datadir)): print( "Specified datadir %s does not exist. Consider making it here with os.mkdir(datadir)" % (datadir, )) print("c10driver: getting data from", datadir) trainset = datasets.CIFAR10(root=datadir, train=True, download=False, transform=transform) fnModel0 = getFnModel0() # write ONNX Model to file torchWriter.saveModel(fnModel=fnModel0) stepLoader = torch.utils.data.DataLoader( trainset, # the amount of data loaded for each step. # note this is not the batch size, it's the "step" size # (samples per step) batch_size=torchWriter.samplesPerBatch * dataFlow.batchesPerStep(), #non-random data loading shuffle=False, num_workers=0) deviceManager = popart.DeviceManager() # Create a CPU device if device == "cpu": device = deviceManager.createCpuDevice() # Create an IPU Model device elif device == "ipu_model": options = {"compileIPUCode": True, 'numIPUs': 1, 'tilesPerIPU': 4} device = deviceManager.createIpuModelDevice(options) # Create an Simulator elif device == "sim": options = {"numIpus": 1, "tilesPerIPU": 4} device = deviceManager.createSimDevice(options) # Get a Hardware Device elif device == "hw": # Get a hardware device that meets the reqirements, # may throw if none are available. # Will attach to the device if device_hw_id: device = deviceManager.acquireDeviceById(device_hw_id) else: device = tu.acquire_ipu() # Enumerate available devices print("Enumerating devices") print("-------------------------------------") for idx, d in enumerate(deviceManager.enumerateDevices()): print('{0}. {1}'.format(idx, d)) print("") opts = popart.SessionOptions() opts.logDir = outputdir if syntheticData == True: opts.syntheticDataMode = popart.SyntheticDataMode.RandomNormal modelProtoX = fnModel0 if transformations: gc = popart.GraphTransformer(fnModel0) for transformation in transformations: print("Running %s transformation pass" % (transformation, )) if transformation == "removeUnusedInputs": gc.removeUnusedInputs() elif transformation == "prepareNodesForTraining": gc.prepareNodesForTraining() else: raise RuntimeError("Unrecognised transformation %s" % (transformation, )) modelProtoX = gc.getModelProto() # Reads ONNX model from file and creates backwards graph, # performs Ir optimisations if mode == 'infer': session = popart.InferenceSession(fnModel=modelProtoX, inputShapeInfo=inputShapeInfo, dataFlow=dataFlow, patterns=patterns, userOptions=opts, deviceInfo=device) else: if len(torchWriter.outNames) != 1: raise RuntimeError("Expecting single scalar loss tensor") # Append output with an identity loss, to reduce to scalar if # necessary bder = popart.Builder(modelProtoX) loss = bder.aiGraphcore.identityloss( [torchWriter.outNames[0]], reduction=popart.ReductionType.Sum) session = popart.TrainingSession(fnModel=bder.getModelProto(), inputShapeInfo=inputShapeInfo, dataFlow=dataFlow, loss=loss, optimizer=torchWriter.optimizer, patterns=patterns, userOptions=opts, deviceInfo=device) # get the tensor info for the anchors anchorArrays = session.initAnchorArrays() allDotPrefixes = [x[0:-4] for x in os.listdir(outputdir) if ".dot" in x] print("Will generate graph pdfs for all of:") print(allDotPrefixes) import subprocess # set generateFromDots to True to # generate pdf figures of the Ir. It # requires the 'dot' program generateFromDots = False if generateFromDots: for name in allDotPrefixes: dotfile = os.path.join(outputdir, "%s.dot" % (name, )) outputfile = os.path.join(outputdir, "%s.pdf" % (name, )) log = subprocess.call( ["dot", "-T", "pdf", "-o", outputfile, dotfile]) print("Exit status on `%s' was: %s" % (name, log)) print("Setting device to IPU, and preparing it") session.prepareDevice() if mode == "train": print("Writing weights to device") session.weightsFromHost() print("Writing Optimizer tensors to device, if there are any") def addStepDimension(data, batchesPerStep): if batchesPerStep == 1: return data else: dataShape = np.array(np.shape(data)) dataShape[0] //= batchesPerStep dataShape = np.insert(dataShape, 0, batchesPerStep) return np.reshape(data, dataShape) def reportTensorError(tensorInd, result): reportStr = str(tensorInd) + " :\n" reportStr += " |pA - tA|^2 / (|pA||tA| + 1e-8) = " + str( result) + "\n" return reportStr def getAnchorTensor(tId, anchorArrays): assertStr = "Tensor" + tId + " must be specified as an anchor" assert (tId in anchorArrays.keys()), assertStr return anchorArrays[tId] def subsampleBatches(array, refShape): arrayShape = np.shape(array) # Every Nth batch if len(arrayShape) == len(refShape): n = arrayShape[0] // refShape[0] return array[n - 1::n] # Last batch only else: return array[-1] def getTensorError(tA, pA): # pA, tA are corresponding tensors from two models pA_shape = np.shape(pA) tA_shape = np.shape(tA) assert (pA_shape == tA_shape), "Arrays must be same shape" ss_err = np.sum((np.array(pA) - np.array(tA))**2) ss_pA = np.sum(np.array(pA)**2) ss_tA = np.sum(np.array(tA)**2) return ss_err / (math.sqrt(ss_pA * ss_tA) + 1.0e-8) def checkResult(result, margin): if np.isnan(result): raise TestFailureError(str(result) + " is NaN") elif (result > margin): raise TestFailureError( str(result) + " is greater than " + str(margin)) margin = 5.0e-7 numReports = [] for epoch in range(epochs): # loop over the dataset multiple times print("Epoch is %d" % (epoch, )) stepData = next(iter(stepLoader)) # Form the input map for one step's worth of data. # Note: data from the torch DataLoader has shape: # [stepSize * batchSize, sampleShape] # whereas Popart expects input data of the shape: # [stepSize, batchSize, sampleShape] # so we reshape the input array before passing to the stepio inputs = {} for tenId in cifarInIndices.keys(): inputs[tenId] = \ addStepDimension(stepData[cifarInIndices[tenId]].numpy(), session.dataFlow.batchesPerStep()) if mode == "train": # take batchesPerStep passes (1 step), Torch torchWriter.train(inputs) # take batchesPerStep passes (1 step), PopArt pystepio = popart.PyStepIO(inputs, anchorArrays) session.run(pystepio) if printAnchorArrays: print( "\nAnchor arrays (being printed as printAnchorArrays==True):" ) for name in anchorArrays.keys(): arr = anchorArrays[name] print("\nAnchored Array Name=", name, " and Size=", arr.size) if (arr.size < 10): print("\nArray (of size < 10) values are") print(arr) if len(arr.shape) > 1: for i, slice0 in enumerate(arr): print("Sum along axis %d is Sum=%.15f" % (i, slice0.sum())) print("Total Sum is %.15f" % (arr.sum())) # write models to file fnTorchModel = getFnTorch(epoch) fnPopArtModel = getFnPopArt(epoch) torchWriter.saveModel(fnTorchModel) session.modelToHost(fnPopArtModel) print("Writing models to " + fnTorchModel + " and " + fnPopArtModel) # Compare parameters from updated Onnx models print("Obtaining popart NumericsReport, A: Torch, B: Popart.") if epoch is 0: nr = popart.NumericsReport(fnModel0, fnTorchModel, fnModel0, fnPopArtModel) else: nr = popart.NumericsReport(getFnTorch(epoch - 1), fnTorchModel, getFnPopArt(epoch - 1), fnPopArtModel) print(nr.fullReport()) # One relative error calculated per weight tensor for tId, relerror in nr.getRelativeErrors().items(): checkResult(relerror, margin) elif mode == "infer": # take batchesPerStep passes (1 step), Torch # returns map of outputs for each sample # Note: already are of dimension matching the # anchors torchOutputs = torchWriter.infer(inputs) # take batchesPerStep passes (1 step), PopArt pystepio = popart.PyStepIO(inputs, anchorArrays) session.run(pystepio) # Compare torch outputs tensors with popart output from # anchor tensor maps for nInd, outName in enumerate(torchWriter.outNames): # Torch outputs returned for all samples, whereas # anchors are returned as specified by the user. # Subsample torch outputs to match dimensions torchOuput = subsampleBatches(torchOutputs[outName], np.shape(anchorArrays[outName])) result = getTensorError(torchOuput, anchorArrays[outName]) print(reportTensorError(nInd, result)) checkResult(result, margin) return anchorArrays
import popart if len(sys.argv) != 2: raise RuntimeError("onnx model file name expected as argument") model_file = sys.argv[1] opts = popart.SessionOptions() opts.logging = {'all': 'TRACE'} options = {"compileIPUCode": True, 'numIPUs': 1} # currently, with both Cpu and IpuModel, we have outstanding tasks # T6384 and T6405, about the conv planner failing (after ~15 mins with Cpu) device = popart.DeviceManager().createCpuDevice() #createIpuModelDevice(options) # TODO: change to not use builder when T6675 is complete builder = popart.Builder(model_file) graph_transformer = popart.GraphTransformer(builder.getModelProto()) graph_transformer.convertAllFixedPointInitializersToConstants() #specific to the task, this output might need changing output = builder.getOutputTensorIds()[0] dataFlow = popart.DataFlow(1, {output: popart.AnchorReturnType("All")}) s = popart.Session(graph_transformer.getModelProto(), dataFlow=dataFlow, userOptions=opts) s.setDevice(device) s.prepareDevice()
def test_manual_serialization(): # Basic model: # # X: data input if shape (N, C0) # W: weight input of shape (C0, C1) # # Y = matmul(X, W) # Z = relu(Y) # loss = l1Loss(Z) # # With array dimensions N = 12 C0 = 244 C1 = 286 # In this test, we manually serialise the matmul, converting # matmul ((N,C0) , (C0,C1)) # # into a sequence of factor-f smaller matmuls # matmul (N,C0/f),(C0/f,C1)) # # reapeated and accumulated f times, where f is f = 4 assert (C0 % f == 0) # Constructing the model builder = popart.Builder() # NOTE: T22702 For some seeds this test fails. np.random.seed(0) wVals = np.array(npr.randn(C0, C1), dtype=np.float32) W = builder.addInitializedInputTensor(wVals) xInfo = popart.TensorInfo("FLOAT", [N, C0]) X = builder.addInputTensor(xInfo) axesV = np.array([0, 1]).astype(np.int32) axes = builder.addInitializedInputTensor(axesV) for i in range(f): # the lower index of the i'th slice lwr = int(i * C0 / f) # the upper index of the i'th slice upp = int((i + 1) * C0 / f) # Take a slice of size (N,C0/f) out of X s0 = builder.addInitializedInputTensor( np.array([0, lwr]).astype(np.int32)) e0 = builder.addInitializedInputTensor( np.array([N, upp]).astype(np.int32)) X_slice = builder.aiOnnx.slice([X, s0, e0, axes]) # Take a slice of size (C0/f,C1) out of W s1 = builder.addInitializedInputTensor( np.array([lwr, 0]).astype(np.int32)) e1 = builder.addInitializedInputTensor( np.array([upp, C1]).astype(np.int32)) W_slice = builder.aiOnnx.slice([W, s1, e1, axes]) # Multiply the slices together, and accumulate as necessary mm_part = builder.aiOnnx.matmul([X_slice, W_slice]) if i == 0: Y = mm_part else: Y = builder.aiOnnx.add([mm_part, Y]) # Finally, the non-linearity Z = builder.aiOnnx.relu([Y]) # This boiler-plate is currently necessary with opset-10 slice graph_transformer = popart.GraphTransformer(builder.getModelProto()) graph_transformer.convertAllFixedPointInitializersToConstants() builder = popart.Builder(graph_transformer.getModelProto()) l1 = builder.aiGraphcore.l1loss([Z], 0.2) dataFlow = popart.DataFlow(1, {}) device = tu.create_test_device() userOptions = popart.SessionOptions() # To obtain the final dot graph, uncomment this: # userOptions.dotChecks = {"Final"}; patterns = popart.Patterns() session = popart.TrainingSession(fnModel=builder.getModelProto(), dataFlow=dataFlow, optimizer=popart.SGD( {"defaultLearningRate": (0.1, True)}), loss=l1, patterns=patterns, userOptions=userOptions, deviceInfo=device) session.prepareDevice() session.weightsFromHost() inputVals = np.array(npr.randn(1 * N * C0), dtype=np.float32) stepio = popart.PyStepIO({X: inputVals}, {}) session.run(stepio) session.weightsToHost() w0R = np.array(-777.0 * np.ones(C0 * C1), dtype=np.float32) weightsRead = popart.PyWeightsIO({W: w0R}) session.readWeights(weightsRead) # A pytorch version to confirm numerical correctness: class Net(nn.Module): def __init__(self): super(Net, self).__init__() self.w0 = torch.nn.Parameter(torch.from_numpy(wVals.copy())) def forward(self, x): return torch.relu(torch.matmul(x, self.w0)) net = Net() optimizer = optim.SGD(net.parameters(), lr=0.1) out = net(torch.from_numpy(inputVals.reshape([N, C0]))) loss = 0.2 * torch.mean(torch.abs(out)) optimizer.zero_grad() loss.backward() optimizer.step() baseline0 = np.sum( np.abs(net.w0.detach().numpy().flatten() - wVals.flatten())) baseline1 = np.sum(np.abs(w0R - wVals.flatten())) error = np.sum(np.abs(np.abs(net.w0.detach().numpy().flatten() - w0R))) assert (error / (baseline0 + baseline1) < 1e-6)
def test_type_cast_INT64ToINT32_clip(): """ The model: starts,ends,axes (int64) | t0 -----------.------- Slice -- t1 ------. | | indices ---- Gather ----------- t2 ----- Add - o (int64) - Gather takes two inputs: - Constant int64 'indices' - 't0'. A data input - It cannot be evaluated on host by the const expression util, as it takes a variable input - The IPU does not support int64. Therefore we must convert the int64 tensors of the onnx model to int32 - But conversion is only possible if all int64 tensor data is within the range of int32, unless we clip the tensor data to int32's numeric limits - In this case the 'starts' tensor of the slice is valid according to the onnxspec, but out of range of int32. - But we know in this case it is safe to clip it, as we will still get the same result """ d1 = np.array([[-1, -2, -3], [4, 5, 6], [7, 8, 9]]).astype(np.float32) d2 = np.array([0, 1]).astype(np.int64) axis = 0 axesV = np.array([0], dtype=np.int64) # Out of range value for int32! startsV = np.array([-9223372036854775807], dtype=np.int64) endsV = np.array([2], dtype=np.int64) builder = popart.Builder() i1 = builder.addInputTensor("FLOAT", d1.shape) i2 = builder.addInputTensor("INT64", d2.shape) g = builder.aiOnnx.gather([i1, i2], axis) axes = builder.aiOnnx.constant(axesV) starts = builder.aiOnnx.constant(startsV) ends = builder.aiOnnx.constant(endsV) s = builder.aiOnnx.slice([i1, starts, ends, axes]) o = builder.aiOnnx.add([g, s]) int64_proto = builder.getModelProto() graph_transformer = popart.GraphTransformer(int64_proto) graph_transformer.convertINT64ToINT32(clip=True) int32_proto = graph_transformer.getModelProto() session = popart.InferenceSession(fnModel=int32_proto, dataFlow=popart.DataFlow(1, [o]), deviceInfo=tu.create_test_device()) session.prepareDevice() anchors = session.initAnchorArrays() stepio = popart.PyStepIO({i1: d1, i2: d2}, anchors) session.run(stepio) reference = 2 * np.take(d1, d2, axis=axis) assert np.allclose(anchors[o], reference)
start = time.perf_counter() model(dummy_input) end = time.perf_counter() elapsed = end - start timePerEvent = elapsed / n print("{:.12f}".format(elapsed)) else: # POPART IMPORT graph_transformer = popart.GraphTransformer(onnx_model) anchors = {"tag": popart.AnchorReturnType("ALL")} dataFeed = popart.DataFlow(1, anchors) device = popart.DeviceManager().acquireAvailableDevice(1) session = popart.InferenceSession(graph_transformer.getModelProto(), dataFeed, device) session.prepareDevice() inferenceAnchors = session.initAnchorArrays() inputs = np.random.rand(n, nFeatures, nTracks, 1).astype(np.float32) stepio = popart.PyStepIO({"data": inputs}, inferenceAnchors)
def test_type_cast_BFloatToFloat(): # Build an onnx proto with a single constant node # Create output tensors of the type to cast X = helper.make_tensor_value_info('X', TensorProto.BFLOAT16, [3, 2]) Y = helper.make_tensor_value_info('Y', TensorProto.BFLOAT16, [3, 2]) # Define the target values as float32 and cast to bytes float_values = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6]).astype(np.float32) float_bytes = float_values.tobytes() # Reinterpret byte string as int16 values. That way we have split the floats # in 2 sets of 16bits int16_values = np.frombuffer(float_bytes, dtype=np.uint16) # Keep only the second 2 bytes of each float (for some reason it seems # that np.array.tobytes() puts the fractional bytes first), ie every other int16 # and convert back to bytes. We should now have some bfloat which values # are close enough to the original floats (precision loss of around 5e-3) bfloat_as_int16 = int16_values[1::2] bfloat = bfloat_as_int16.tobytes() # This data is generated to check against to make sure that we actually get # the same "truncated" data with our method bfloat_values = np.frombuffer(bfloat, dtype=np.uint16) int16_from_bfloat = bfloat_values for i in range(6): int16_from_bfloat = np.insert(int16_from_bfloat, 5 - i, 0) float_again_bytes = np.array(int16_from_bfloat).tobytes() float_again = np.frombuffer(float_again_bytes, dtype=np.float32) node_def = onnx.helper.make_node('ConstantOfShape', inputs=['X'], outputs=['Y'], value=onnx.helper.make_tensor( name='const_tensor', data_type=onnx.TensorProto.BFLOAT16, dims=[3, 2], vals=bfloat, raw=True)) # Create the graph (GraphProto) graph_def = helper.make_graph( [node_def], 'test-model', [X], [Y], ) # Create the model (ModelProto) onnx_model = helper.make_model(graph_def) # Make sure the opset version is version 9 (by default it would be 11 # which would crash subsequent function calls) onnx_model = version_converter.convert_version(onnx_model, 9) # Compile the model to an onnx graph onnx.save_model(onnx_model, "type_test.onnx") # Load proto into a graph transfomer and apply cast graph_transformer = popart.GraphTransformer("type_test.onnx") graph_transformer.convertBFloats16ToFloat32() # Retrieve modeified graph proto proto = graph_transformer.getModelProto() popart.Builder(proto).saveModelProto("type_test_modified.onnx") # Load the model as an onnx model again # modified_onnx_model = onnx.load(proto) modified_onnx_model = onnx.load("type_test_modified.onnx") # Make sure the graph is still good onnx.checker.check_model(modified_onnx_model) # Get only the first input of the input array (there should only be one) i = modified_onnx_model.graph.input[0] o = modified_onnx_model.graph.output[0] input_type = i.type.tensor_type output_type = o.type.tensor_type # Make sure shapes remain untouched assert (input_type.HasField("shape") ), "Modified graph output has no shape attribute" assert (output_type.HasField("shape") ), "Modified graph output has no shape attribute" assert (input_type.shape.dim[0].dim_value == 3 ), "Dimensions were not conserved by cast" assert (input_type.shape.dim[1].dim_value == 2 ), "Dimensions were not conserved by cast" assert (output_type.shape.dim[0].dim_value == 3 ), "Dimensions were not conserved by cast" assert (output_type.shape.dim[1].dim_value == 2 ), "Dimensions were not conserved by cast" # Test whether the new tensor has the right size assert (len( modified_onnx_model.graph.node[0].attribute[0].t.float_data) == 6 ), "Wrong number of Bytes in casted version." # Retrieve the two constant tensors and compare the values assert np.allclose( modified_onnx_model.graph.node[0].attribute[0].t.float_data, float_values, rtol=1e-2), "Data was not conserved by cast" assert np.allclose( modified_onnx_model.graph.node[0].attribute[0].t.float_data, float_again), "Data was not conserved by cast"
def test_type_cast_DoubleToFloat(): # Build an onnx proto with a single constant node # Create output tensors of the type to cast X = helper.make_tensor_value_info('X', TensorProto.DOUBLE, [3, 2]) Y = helper.make_tensor_value_info('Y', TensorProto.DOUBLE, [3, 2]) values = np.array([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]]).astype(np.double) node_def = onnx.helper.make_node( 'ConstantOfShape', inputs=['X'], outputs=['Y'], value=onnx.helper.make_tensor(name='const_tensor', data_type=onnx.TensorProto.DOUBLE, dims=values.shape, vals=values.flatten().astype(np.double), raw=False)) # Create the graph (GraphProto) graph_def = helper.make_graph( [node_def], 'test-model', [X], [Y], ) # Create the model (ModelProto) onnx_model = helper.make_model(graph_def) # Make sure the opset version is version 9 (by default it would be 11 # which would crash subsequent function calls) onnx_model = version_converter.convert_version(onnx_model, 9) # Compile the model to an onnx graph onnx.save_model(onnx_model, "type_test.onnx") # Load proto into a graph transfomer and apply cast graph_transformer = popart.GraphTransformer("type_test.onnx") graph_transformer.convertDoublesToFloats() # Retrieve modeified graph proto proto = graph_transformer.getModelProto() popart.Builder(proto).saveModelProto("type_test_modified.onnx") # Load the model as an onnx model again # modified_onnx_model = onnx.load(proto) modified_onnx_model = onnx.load("type_test_modified.onnx") # Make sure the graph is still good onnx.checker.check_model(modified_onnx_model) # Get only the first input of the input array (there should only be one) i = modified_onnx_model.graph.input[0] o = modified_onnx_model.graph.output[0] input_type = i.type.tensor_type output_type = o.type.tensor_type # Make sure shapes remain untouched assert (input_type.HasField("shape") ), "Modified graph output has no shape attribute" assert (output_type.HasField("shape") ), "Modified graph output has no shape attribute" assert (input_type.shape.dim[0].dim_value == 3 ), "Dimensions were not conserved by cast" assert (input_type.shape.dim[1].dim_value == 2 ), "Dimensions were not conserved by cast" assert (output_type.shape.dim[0].dim_value == 3 ), "Dimensions were not conserved by cast" assert (output_type.shape.dim[1].dim_value == 2 ), "Dimensions were not conserved by cast" # Test whether the new tensor has the right size assert (len( modified_onnx_model.graph.node[0].attribute[0].t.float_data) == len( onnx_model.graph.node[0].attribute[0].t.double_data) ), "Wrong number of Bytes in casted version." # Retrieve the two constant tensors and compare the values assert np.allclose( modified_onnx_model.graph.node[0].attribute[0].t.float_data, values.flatten()), "Data was not conserved by cast"
def test_type_cast_UINT8ToINT32(): # Build an onnx proto with a single constant node # Create output tensors of the type to cast X = helper.make_tensor_value_info('X', TensorProto.UINT8, [3, 2]) Y = helper.make_tensor_value_info('Y', TensorProto.UINT8, [3, 2]) values = np.array([[1, 2], [3, 4], [5, 6]]).astype(np.uint8) # We use ConstantOfShape even if it is not followint the onnx # spec because we need an op that can store several actual values # in the graph to make sure the type conversion is succesful. Constant # would not work here becauseit has only been available since opset 12. # Constant-9 is not compatible with the default opset-version 11 node_def = onnx.helper.make_node('ConstantOfShape', inputs=['X'], outputs=['Y'], value=onnx.helper.make_tensor( name='const_tensor', data_type=onnx.TensorProto.UINT8, dims=values.shape, vals=values.flatten().astype( np.uint8).tobytes(), raw=True)) # Create the graph (GraphProto) graph_def = helper.make_graph( [node_def], 'test-model', [X], [Y], ) # Create the model (ModelProto) onnx_model = helper.make_model(graph_def) # Make sure the opset version is version 9 (by default it would be 11 # which would crash subsequent function calls) onnx_model = version_converter.convert_version(onnx_model, 9) # Compile the model to an onnx graph onnx.save_model(onnx_model, "type_test.onnx") # Load proto into a graph transfomer and apply cast graph_transformer = popart.GraphTransformer("type_test.onnx") graph_transformer.convertUINT8ToINT32() # Retrieve modeified graph proto proto = graph_transformer.getModelProto() popart.Builder(proto).saveModelProto("type_test_modified.onnx") # Load the model as an onnx model again # modified_onnx_model = onnx.load(proto) modified_onnx_model = onnx.load("type_test_modified.onnx") # Make sure the graph is still good onnx.checker.check_model(modified_onnx_model) # Get only the first input of the input array (there should only be one) i = modified_onnx_model.graph.input[0] o = modified_onnx_model.graph.output[0] input_type = i.type.tensor_type output_type = o.type.tensor_type # Make sure shapes remain untouched assert (input_type.HasField("shape") ), "Modified graph output has no shape attribute" assert (output_type.HasField("shape") ), "Modified graph output has no shape attribute" assert (input_type.shape.dim[0].dim_value == 3 ), "Dimensions were not conserved by cast" assert (input_type.shape.dim[1].dim_value == 2 ), "Dimensions were not conserved by cast" assert (output_type.shape.dim[0].dim_value == 3 ), "Dimensions were not conserved by cast" assert (output_type.shape.dim[1].dim_value == 2 ), "Dimensions were not conserved by cast" # Test whether the new tensor has the right size assert (len( modified_onnx_model.graph.node[0].attribute[0].t.int32_data) == len( onnx_model.graph.node[0].attribute[0].t.raw_data) ), "Wrong number of Bytes in casted version." # Retrieve the two constant tensors and compare the values assert np.allclose( modified_onnx_model.graph.node[0].attribute[0].t.int32_data, values.flatten()), "Data was not conserved by cast"
start = time.perf_counter() net(dummy_input, dummy_init_hc) end = time.perf_counter() elapsed = end - start timePerEvent = elapsed / n print("{:.12f}".format(elapsed)) else: # IMPORT INTO POPART graph_transformer = popart.GraphTransformer(export_name) inputShapeInfo = popart.InputShapeInfo() inputShapeInfo.add("data", popart.TensorInfo("FLOAT", [n, nTracks, nFeatures])) inputShapeInfo.add("init_hc", popart.TensorInfo("FLOAT", [1, n, nHidden])) anchors = {"tag": popart.AnchorReturnType("ALL")} dataFeed = popart.DataFlow(1, anchors) # device = popart.DeviceManager().createIpuModelDevice({}) device = popart.DeviceManager().acquireAvailableDevice(1) session = popart.InferenceSession( graph_transformer.getModelProto(), dataFeed,
def _convert_floats_to_halfs(proto): graph_transformer = popart.GraphTransformer(proto) graph_transformer.convertFloatsToHalfs() return graph_transformer.getModelProto()