def create_model(batch_size): """ Create an ONNX protobuf description of a simple linear model. This function uses the popart library builder functions to create the ONNX description directly. An alternative would be to load an exported ONNX protobuf from a file. """ builder = popart.Builder() np.random.seed(0) # For predictable weight initialization input_shape = popart.TensorInfo("FLOAT", [batch_size, ROWS * COLS]) x = builder.addInputTensor(input_shape) init_weights = np.random.normal(0, 1, [ROWS * COLS, 10]).astype(np.float32) W = builder.addInitializedInputTensor(init_weights) y = builder.aiOnnx.matmul([x, W]) init_biases = np.random.normal(0, 1, [10]).astype(np.float32) b = builder.addInitializedInputTensor(init_biases) output = builder.aiOnnx.add([y, b], "output") builder.addOutputTensor(output) probs = builder.aiOnnx.softmax([output]) label_shape = popart.TensorInfo("INT32", [batch_size]) label = builder.addInputTensor(label_shape) loss = popart.NllLoss(probs, label, "nllLossVal") proto = builder.getModelProto() return proto, x, label, output, loss
def loss(prob, label): if model.config.task == "SQUAD": vGraph = model.squad_scope.virtualGraph pStage = model.squad_scope.pipelineStage nllloss = popart.NllLoss(prob, label, f"{label}/loss") elif 'nsp' in label: vGraph = model.nsp_scope.virtualGraph pStage = model.nsp_scope.pipelineStage nllloss = popart.NllLoss(prob, label, f"{label}/loss", ignore_index=2) else: vGraph = model.mlm_scope.virtualGraph pStage = model.mlm_scope.pipelineStage nllloss = popart.NllLoss(prob, label, f"{label}/loss", ignore_index=0) nllloss.virtualGraph(vGraph) nllloss.pipelineStage(pStage) logger.debug(f"Placing NllLoss for {label} on ipu {vGraph} stage {pStage}") return nllloss
def create_pipelined_model(num_features, num_classes, batch_size): builder = popart.Builder() # Init def init_weights(input_size, output_size): return np.random.normal(0, 1, [input_size, output_size]).astype(np.float32) def init_biases(size): return np.random.normal(0, 1, [size]).astype(np.float32) # Labels labels_shape = [batch_size] labels = builder.addInputTensor(popart.TensorInfo("INT32", labels_shape)) # Input input_shape = [batch_size, num_features] x0 = builder.addInputTensor(popart.TensorInfo("FLOAT", input_shape)) # Dense 1 W0 = builder.addInitializedInputTensor(init_weights(num_features, 512)) b0 = builder.addInitializedInputTensor(init_biases(512)) with builder.virtualGraph(0): x1 = builder.aiOnnx.gemm([x0, W0, b0], debugPrefix="gemm_x1") x2 = builder.aiOnnx.relu([x1], debugPrefix="relu_x2") # Dense 2 W1 = builder.addInitializedInputTensor(init_weights(512, num_classes)) b1 = builder.addInitializedInputTensor(init_biases(num_classes)) with builder.virtualGraph(1): x3 = builder.aiOnnx.gemm([x2, W1, b1], debugPrefix="gemm_x3") x4 = builder.aiOnnx.relu([x3], debugPrefix="relu_x4") # Outputs with builder.virtualGraph(1): output_probs = builder.aiOnnx.softmax([x4], axis=1, debugPrefix="softmax_output") builder.addOutputTensor(output_probs) # Loss loss = popart.NllLoss(output_probs, labels, "loss") loss.virtualGraph(1) # Anchors art = popart.AnchorReturnType("ALL") anchor_map = {"loss": art} anchor_map[popart.reservedGradientPrefix() + x0] = art # Protobuffer model_proto = builder.getModelProto() return x0, labels, model_proto, anchor_map, loss
def eval_builder(opts): builder, data, outputs, __, __ = infer_builder(opts) probs = builder.aiOnnx.softmax([list(outputs)[0]]) label_shape = [opts.timesteps, opts.batch_size] label = builder.addInputTensor(popart.TensorInfo("INT32", label_shape)) loss = popart.NllLoss(probs, label, "nllLossVal") if opts.use_zero_values: label_data = np.zeros(label_shape, np.int32) else: label_data = np.random.uniform(0, 2, label_shape).astype(np.int32) return [ builder, { **data, label: label_data }, { loss.output(0): popart.AnchorReturnType("ALL") }, [loss], None ]
def convert_model(batch_size: int, protobuf_file: str, output_name: str) -> Tuple[bytes, str, popart.NllLoss]: """Create popart builder and loss for model. Args: batch_size : Batch size per inference. protobuf_file : ONNX binary protobuf filename. output_name: Name of the output Tensor using which loss must be computed. Returns: Modelproto, label and loss. """ # Create builder from onnx protobuf file builder = popart.Builder(protobuf_file) # Set up label Tensor label_shape = popart.TensorInfo("INT32", [batch_size]) label = builder.addInputTensor(label_shape) proto = builder.getModelProto() # Add loss loss = popart.NllLoss(output_name, label, "nllLossVal") return proto, label, loss
def create_model(builder, opts, image, label): resnet = ResNet(opts, builder) resnet.train = True logits = resnet(image) probs = resnet.builder.aiOnnx.softmax([logits]) argmax = resnet.builder.aiOnnx.argmax([probs], axis=1, keepdims=0) loss = popart.NllLoss(probs, label, "loss", reduction=popart.ReductionType.Mean) outputs = { argmax: popart.AnchorReturnType("ALL"), "loss": popart.AnchorReturnType("ALL") } proto = resnet.builder.getModelProto() return proto, loss, argmax, outputs
def create_model(batch_size): """ Create an ONNX protobuf description of a simple model. This function uses the popart library builder functions to create the ONNX description directly. An alternative would be to load an exported ONNX protobuf from a file. """ builder = popart.Builder() input_shape = popart.TensorInfo('FLOAT', [batch_size, 1, ROWS, COLS]) input_t = builder.addInputTensor(input_shape) x = input_t init_weights = kaiming_init([20, 1, 5, 5], 1 * 5 * 5) W1 = builder.addInitializedInputTensor(init_weights) init_weights = kaiming_init([20], 1 * 5 * 5, 1, 1) b1 = builder.addInitializedInputTensor(init_weights) x = builder.aiOnnx.conv([x, W1, b1], dilations=[1, 1], kernel_shape=[5, 5], strides=[1, 1], pads=[0, 0, 0, 0]) x = builder.aiOnnx.relu([x]) (x, ) = builder.aiOnnx.maxpool([x], num_outputs=1, kernel_shape=[2, 2], pads=[0, 0, 0, 0], strides=[2, 2]) init_weights = kaiming_init([50, 20, 5, 5], 20 * 5 * 5) W2 = builder.addInitializedInputTensor(init_weights) init_weights = kaiming_init([50], 20 * 5 * 5, 1, 1) b2 = builder.addInitializedInputTensor(init_weights) x = builder.aiOnnx.conv([x, W2, b2], dilations=[1, 1], kernel_shape=[5, 5], strides=[1, 1], pads=[0, 0, 0, 0]) x = builder.aiOnnx.relu([x]) (x, ) = builder.aiOnnx.maxpool([x], num_outputs=1, kernel_shape=[2, 2], pads=[0, 0, 0, 0], strides=[2, 2]) shape = builder.aiOnnx.constant(np.asarray([batch_size, 50 * 4**2])) x = builder.aiOnnx.reshape([x, shape]) init_weights = kaiming_init([50 * 4**2, 500], 50 * 4**2) W3 = builder.addInitializedInputTensor(init_weights) init_weights = kaiming_init([500], 50 * 4**2, 1, 1) b3 = builder.addInitializedInputTensor(init_weights) x = builder.aiOnnx.matmul([x, W3]) x = builder.aiOnnx.add([x, b3]) x = builder.aiOnnx.relu([x]) init_weights = kaiming_init([500, 10], 500) W4 = builder.addInitializedInputTensor(init_weights) init_weights = kaiming_init([10], 500, 1, 1) b4 = builder.addInitializedInputTensor(init_weights) x = builder.aiOnnx.matmul([x, W4]) output_t = builder.aiOnnx.add([x, b4]) builder.addOutputTensor(output_t) probs = builder.aiOnnx.softmax([output_t]) label_shape = popart.TensorInfo('INT32', [batch_size]) label = builder.addInputTensor(label_shape) loss = popart.NllLoss(probs, label, 'nllLossVal') proto = builder.getModelProto() return proto, input_t, label, output_t, loss
def create_model(num_features, num_classes, batch_size, force_recompute=False): builder = popart.Builder() # Init def init_weights(input_size, output_size): return np.random.normal(0, 1, [input_size, output_size]).astype(np.float32) def init_biases(size): return np.random.normal(0, 1, [size]).astype(np.float32) # Labels labels_shape = [batch_size] labels = builder.addInputTensor(popart.TensorInfo("INT32", labels_shape)) # Input input_shape = [batch_size, num_features] x0 = builder.addInputTensor(popart.TensorInfo("FLOAT", input_shape)) # Dense 1 W0 = builder.addInitializedInputTensor(init_weights(num_features, 512)) b0 = builder.addInitializedInputTensor(init_biases(512)) x = builder.aiOnnx.gemm([x0, W0, b0], debugPrefix="gemm_1") if force_recompute: builder.recomputeOutputInBackwardPass(x) x = builder.aiOnnx.relu([x], debugPrefix="relu_1") if force_recompute: builder.recomputeOutputInBackwardPass(x) # Dense 2 W1 = builder.addInitializedInputTensor(init_weights(512, 512)) b1 = builder.addInitializedInputTensor(init_biases(512)) x = builder.aiOnnx.gemm([x, W1, b1], debugPrefix="gemm_2") if force_recompute: builder.recomputeOutputInBackwardPass(x) x = builder.aiOnnx.relu([x], debugPrefix="relu_2") if force_recompute: builder.recomputeOutputInBackwardPass(x) # Dense 3 W2 = builder.addInitializedInputTensor(init_weights(512, 512)) b2 = builder.addInitializedInputTensor(init_biases(512)) x = builder.aiOnnx.gemm([x, W2, b2], debugPrefix="gemm_3") if force_recompute: builder.recomputeOutputInBackwardPass(x) x = builder.aiOnnx.relu([x], debugPrefix="relu_3") if force_recompute: builder.recomputeOutputInBackwardPass(x) # Dense 4 W3 = builder.addInitializedInputTensor(init_weights(512, num_classes)) b3 = builder.addInitializedInputTensor(init_biases(num_classes)) x = builder.aiOnnx.gemm([x, W3, b3], debugPrefix="gemm_4") if force_recompute: builder.recomputeOutputInBackwardPass(x) out = builder.aiOnnx.relu([x], debugPrefix="relu_4") if force_recompute: builder.recomputeOutputInBackwardPass(out) # Outputs output_probs = builder.aiOnnx.softmax([out], axis=1, debugPrefix="softmax_output") builder.addOutputTensor(output_probs) # Loss loss = popart.NllLoss(output_probs, labels, "loss") # Anchors art = popart.AnchorReturnType("ALL") anchor_map = {"loss": art} anchor_map[popart.reservedGradientPrefix() + x0] = art # Protobuffer model_proto = builder.getModelProto() return x0, labels, model_proto, anchor_map, loss
} dataFlow = popart.DataFlow(batchesPerStep, anchors) inputShapeInfo = popart.InputShapeInfo() inputShapeInfo.add("image0", popart.TensorInfo("FLOAT", [batchSize, nInChans, 32, 32])) inputShapeInfo.add("image1", popart.TensorInfo("FLOAT", [batchSize, nInChans, 32, 32])) inputShapeInfo.add("label", popart.TensorInfo("INT32", [batchSize])) inNames = ["image0", "image1"] cifarInIndices = {"image0": 0, "image1": 0, "label": 1} outNames = ["imageSum", "postConv0", "preProbSquared", "probs"] losses = [ popart.NllLoss("probs", "label", "nllLossVal"), popart.L1Loss("preProbSquared", "l1LossVal", 0.01) ] willowOptPatterns = popart.Patterns(popart.PatternsLevel.All) class Module0(torch.nn.Module): def __init__(self): torch.nn.Module.__init__(self) self.conv1 = torchwriter.conv3x3(nInChans, nOutChans) self.conv2 = torchwriter.conv3x3(nOutChans, nOutChans) self.sin = torch.sin self.pad = torch.nn.functional.pad # for softmax dim -1 is correct for [sample][class], # gives class probabilities for each sample.