def popart_result_and_model(config, weight_transposed, is_bwd=False): """Run popart model based on config. Args: config (BertConfig): Popart config. weight_transposed: Construct embedding dict transposed. is_bwd (bool, optional): Construct training graph if True, else inference graph. Defaults to False. Returns: Tuple: Gathered numpy data, outputs from model, proto, post_proto """ user_options = {} popart_model = Bert(config) builder = popart_model.builder indices_len = config.micro_batch_size * config.sequence_length sequence_info = popart.TensorInfo("UINT32", [indices_len]) indices = builder.addInputTensor(sequence_info) data = { indices: np.random.randint(0, config.vocab_length, (indices_len)).astype(np.uint32) } output = popart_model.word_embedding_serialized(indices, num_splits) if is_bwd: l1_loss = popart_model.builder.aiGraphcore.l1loss( [output], 0.1, debugContext="l1LossVal", reduction=popart.ReductionType.Sum) proto = builder.getModelProto() optimizer = popart.ConstSGD(0.01) outputs, post_proto = run_py(proto, data, (output, l1_loss), loss=l1_loss, optimizer=optimizer, user_options=user_options) else: proto = builder.getModelProto() outputs, post_proto = run_py(proto, data, output, user_options=user_options) return [data[indices]], outputs, proto, post_proto
def test_detach_error(): np.random.seed(0) Batchsize = 8 Classes = 32 dshape = [Batchsize, 2, 4, 4] lshape = [Batchsize] wshape = [2, 2, 3, 3] ip_data = np.random.rand(*dshape).astype(np.float32) w1_data = np.random.rand(*wshape).astype(np.float32) lb_data = np.random.randint(Classes, size=lshape) builder = popart.Builder() input_ = builder.addInputTensor(popart.TensorInfo("FLOAT", dshape), "input_i1") lb = builder.addInputTensor(popart.TensorInfo("INT32", lshape)) w1 = builder.addInitializedInputTensor(w1_data) conv1 = builder.aiOnnx.conv([input_, w1], dilations=[1, 1], pads=[1, 1, 1, 1], strides=[1, 1], debugPrefix="conv") o = builder.reshape_const(builder.aiOnnx, [conv1], [Batchsize, Classes]) o = builder.aiGraphcore.detach([o]) o = builder.aiOnnx.softmax([o], axis=np.size(lshape)) loss = builder.aiGraphcore.nllloss([o, lb]) dataFlow = popart.DataFlow( 1, [o, loss, popart.reservedGradientPrefix() + input_]) opts = popart.SessionOptions() with pytest.raises(popart.popart_exception) as e_info: session = popart.TrainingSession( fnModel=builder.getModelProto(), dataFlow=dataFlow, loss=loss, optimizer=popart.ConstSGD(LEARNING_RATE, WEIGHT_DECAY), userOptions=opts, deviceInfo=popart.DeviceManager().createIpuModelDevice({})) assert (e_info.value.args[0].startswith( f"Anchor tensor `{popart.reservedGradientPrefix() + input_}' not in Ir Tensors." ))
def runModel(pipeline, recompute): builder = popart.Builder() in0 = builder.addInputTensor("FLOAT", dshape) in1 = builder.addInputTensor("INT32", lshape) w0 = builder.addInitializedInputTensor(w0_data) with builder.virtualGraph(0), builder.pipelineStage(0): x = builder.aiOnnx.matmul([in0, w0]) with builder.virtualGraph(1), builder.pipelineStage(1): x = builder.aiOnnx.sqrt([x]) with builder.virtualGraph(0), builder.pipelineStage(2): x = builder.aiOnnx.add([w0, x]) loss = builder.aiGraphcore.nllloss([x, in1]) opts = popart.SessionOptions() opts.virtualGraphMode = popart.VirtualGraphMode.Manual opts.enablePipelining = pipeline if pipeline == True: opts.enableGradientAccumulation = True opts.accumulationFactor = bps test_bps = 1 else: test_bps = bps if recompute == True: opts.autoRecomputation = popart.RecomputationType.Pipeline session = popart.TrainingSession( deviceInfo=popart.DeviceManager().createIpuModelDevice( {"numIPUs": "2"}), dataFlow=popart.DataFlow(test_bps, [loss]), fnModel=builder.getModelProto(), loss=loss, optimizer=popart.ConstSGD(0.1), userOptions=opts) session.prepareDevice() session.weightsFromHost() anchors = session.initAnchorArrays() stepio = popart.PyStepIO({in0: in0_data, in1: in1_data}, anchors) session.run(stepio) weights = {} weights[w0] = np.empty(shape=dshape, dtype=np.float32) weightsIo = popart.PyWeightsIO(weights) session.weightsToHost() session.readWeights(weightsIo) return weights[w0]
def create_session(inplacing): builder = popart.Builder() input_ = builder.addInputTensor(popart.TensorInfo("FLOAT", data.shape), "data") w = builder.addInitializedInputTensor(weight, 'input_weights') shape = builder.aiOnnx.constant(np.array(input_size)) transpose = builder.aiOnnx.transpose([w], [0, 2, 1, 3], "transpose") builder.setInplacePreferences(transpose, {"TransposeInplace": +1e8}) reshape = builder.aiOnnx.reshape([transpose, shape], "reshape") builder.setInplacePreferences(reshape, {"ReshapeInplace": +1e6}) add = builder.aiOnnx.add([input_, reshape], "add") builder.setInplacePreferences(add, {"AddRhsInplace": +1e7}) loss = builder.aiGraphcore.l1loss([add], 0.1, reduction=popart.ReductionType.Mean) builder.addOutputTensor(loss) patterns = popart.Patterns(popart.PatternsLevel.Default) patterns.InPlace = inplacing opts = popart.SessionOptions() opts.constantWeights = constantWeights session = popart.TrainingSession(fnModel=builder.getModelProto(), dataFlow=popart.DataFlow(1, [loss]), deviceInfo=tu.create_test_device(), userOptions=opts, patterns=patterns, loss=loss, optimizer=popart.ConstSGD(1e-3)) session.prepareDevice() anchorRets = session.initAnchorArrays() inputs = {"data": data.copy()} stepio = popart.PyStepIO(inputs, anchorRets) session.weightsFromHost() return session, stepio, anchorRets
def test_batchnorm_train_half_fp32var(op_tester): # create test data d1 = np.random.rand(1, 3, 2, 2).astype(np.float16) * 100 scale = np.random.rand(3).astype(np.float16) b = np.random.rand(3).astype(np.float16) mean = np.random.rand(3).astype(np.float16) var = np.random.rand(3).astype(np.float32) epsilon = 1e-05 momentum = 0.1 builder = popart.Builder() i1 = builder.addInputTensor(popart.TensorInfo(d1)) iScale = builder.addInitializedInputTensor(scale) iB = builder.addInitializedInputTensor(b) iMean = builder.addInitializedInputTensor(mean) iVar = builder.addInitializedInputTensor(var) o_y, o_mean, o_var, o_smean, o_svar = builder.aiOnnx.batchnormalization( [i1, iScale, iB, iMean, iVar], 5, epsilon, momentum) builder.addOutputTensor(o_y) lossId = builder.aiGraphcore.identityloss([o_y]) proto = builder.getModelProto() dataFlow = popart.DataFlow(1, {o_y: popart.AnchorReturnType("All")}) device = tu.create_test_device() options = popart.SessionOptions() options.enableStochasticRounding = False session = popart.TrainingSession(fnModel=proto, loss=lossId, dataFlow=dataFlow, deviceInfo=device, optimizer=popart.ConstSGD(0.01), userOptions=options) anchors = session.initAnchorArrays() session.prepareDevice() inputs = {i1: d1} stepio = popart.PyStepIO(inputs, anchors) session.weightsFromHost() session.run(stepio) stepio = popart.PyStepIO(inputs, anchors) session.run(stepio)
def test_mini_resnet_like(): dirpath = os.path.dirname(os.path.realpath(__file__)) sys.path.append(dirpath + "/../../graph_util") batch_size = 1 training = True norm_type = 'BatchNorm' # Get model proto proto, ip, op = get_resnet18_proto(batch_size, training, norm_type) # Create the onnx session opts = popart.SessionOptions() session = popart.TrainingSession(fnModel=proto, dataFlow=popart.DataFlow( 1, {op: popart.AnchorReturnType("All")}), optimizer=popart.ConstSGD(0.001), loss=op, deviceInfo=tu.create_test_device(), userOptions=opts) session.prepareDevice() graph_report = session.getGraphReport() graph_report = json.loads(graph_report) total_mem = sum(graph_report['memory']['byTile']['total']) max_mem = max(graph_report['memory']['byTile']['totalIncludingGaps']) print(f'total_mem: {total_mem}') print(f'max_mem: {max_mem}') # Check that the total memory is within 5% of the reference ref_total = 67_201_630 # If it is more than 5% over, it needs investigating assert total_mem / ref_total < 1.05 # If it is move than 5% under, the reference should probably be updated assert total_mem / ref_total > 0.95 # Check that the maximum memory is within 5% of the reference ref_max = 134_840 # If it is more than 5% over, it needs investigating assert max_mem / ref_max < 1.05 # If it is move than 5% under, the reference should probably be updated assert max_mem / ref_max > 0.95
def get_session(anchorIds, proto, device, loss, bps=1): dfAnchors = {} for anchorId in anchorIds: dfAnchors.update({anchorId: popart.AnchorReturnType("All")}) session = popart.TrainingSession(fnModel=proto, dataFlow=popart.DataFlow(bps, dfAnchors), optimizer=popart.ConstSGD(0.1), loss=loss, patterns=popart.Patterns( popart.PatternsLevel.All), deviceInfo=device) session.prepareDevice() session.weightsFromHost() anchors = session.initAnchorArrays() return session, anchors
def test_stochastic_rounding(): # create test data d1 = np.random.rand(1, 3, 2, 2).astype(np.float16) * 100 scale = np.random.rand(3).astype(np.float16) b = np.random.rand(3).astype(np.float16) mean = np.random.rand(3).astype(np.float16) var = np.random.rand(3).astype(np.float16) epsilon = 1e-05 momentum = 0.1 builder = popart.Builder() i1 = builder.addInputTensor(popart.TensorInfo(d1)) iScale = builder.addInitializedInputTensor(scale) iB = builder.addInitializedInputTensor(b) iMean = builder.addInitializedInputTensor(mean) iVar = builder.addInitializedInputTensor(var) [o_y, o_mean, o_var, o_smean, o_svar] = builder.aiOnnx.batchnormalization([i1, iScale, iB, iMean, iVar], 5, epsilon, momentum) loss = builder.aiGraphcore.identityloss([o_y]) builder.addOutputTensor(o_y) proto = builder.getModelProto() dataFlow = popart.DataFlow(1, {o_y: popart.AnchorReturnType("All")}) device = tu.create_test_device() options = popart.SessionOptions() options.enableStochasticRounding = True sess = popart.TrainingSession(fnModel=proto, optimizer=popart.ConstSGD(0.1), loss=loss, dataFlow=dataFlow, deviceInfo=device, userOptions=options) anchors = sess.initAnchorArrays() sess.prepareDevice() # Confirm that you are able to set the random seed when # enableStochasticRounding is true, even though the random seed tensor # is not consumed by any op in the Ir sess.setRandomSeed(0)
def popart_result_and_model(popart_config, is_bwd=False, momentum=0.0): popart_model = Bert(popart_config) input_info = popart.TensorInfo(popart_config.popart_dtype, [ popart_config.micro_batch_size * popart_config.sequence_length, popart_config.hidden_size ]) input_tensor = popart_model.builder.addInputTensor(input_info) data = { input_tensor: np.random.normal(0, 0.02, input_info.shape()).astype(popart_config.dtype) } output = popart_model.feed_forward(input_tensor) if is_bwd: l1 = popart_model.builder.aiGraphcore.l1loss( [output], 0.1, debugContext="l1LossVal", reduction=popart.ReductionType.Sum) proto = popart_model.builder.getModelProto() if momentum > 0.0: optimizer = popart.SGD({ "defaultLearningRate": (lr, False), "defaultMomentum": (momentum, False), "defaultWeightDecay": (0.0, False) }) else: optimizer = popart.ConstSGD(lr) outputs, post_proto = run_py(proto, data, (output, l1), loss=l1, optimizer=optimizer, num_reps=num_reps_bwd) else: proto = popart_model.builder.getModelProto() outputs, post_proto = run_py(proto, data, output) return data[input_tensor], outputs, proto, post_proto
def test_cast_no_grad(npSrcType, builderDstType): """Check that CastOp, doesn't return gradient Op when casted-from type is not float/half. """ np.random.seed(0) # Is randomly generated data ok here? Also, the tested range is [0, 10], so # no negative inputs are tested. inputData = np.random.uniform(0, 10, 10).astype(npSrcType) builder = popart.Builder() input_ = builder.addInputTensor(popart.TensorInfo(inputData)) output_ = builder.aiOnnx.cast([input_], builderDstType) builder.addOutputTensor(output_) lossId = builder.aiGraphcore.identityloss([output_]) proto = builder.getModelProto() dataFlow = popart.DataFlow( 1, { output_: popart.AnchorReturnType("All"), popart.reservedGradientPrefix() + input_: popart.AnchorReturnType("All"), }) device = tu.create_test_device() patterns = popart.Patterns(['PreUniRepl', 'PostNRepl', 'SqrtGradOp']).enableRuntimeAsserts(False) options = popart.SessionOptions() options.enableStochasticRounding = False with pytest.raises(popart.popart_exception) as e_info: popart.TrainingSession(fnModel=proto, loss=lossId, dataFlow=dataFlow, deviceInfo=device, optimizer=popart.ConstSGD(0.01), patterns=patterns, userOptions=options) assert (e_info.value.args[0].startswith( f"Anchor tensor `{popart.reservedGradientPrefix() + input_}' not in Ir Tensors." ))
def _setup(self): ti = popart.TensorInfo("FLOAT", self.input_data.shape) builder = popart.Builder() self.input = builder.addInputTensor(ti) self.input_grad = popart.reservedGradientPrefix() + self.input self.output = builder.aiGraphcore.shapeddropout([self.input], shape=self.drop_shape, ratio=self.drop_ratio) self.loss = builder.aiGraphcore.identityloss([self.output]) builder.addOutputTensor(self.loss) dfAnchors = [self.input, self.input_grad, self.output, self.loss] dfAnchors = {a: popart.AnchorReturnType("All") for a in dfAnchors} df = popart.DataFlow(self.batches_per_step, dfAnchors) model_proto = builder.getModelProto() all_patterns = popart.Patterns(popart.PatternsLevel.All) device = tu.create_test_device(self.replication_factor) session_opts = popart.SessionOptions() if self.replication_factor > 1: session_opts.enableReplicatedGraphs = True session_opts.replicatedGraphCount = self.replication_factor self.session = popart.TrainingSession(fnModel=model_proto, dataFlow=df, patterns=all_patterns, optimizer=popart.ConstSGD(0.1), loss=self.loss, userOptions=session_opts, deviceInfo=device) self.session.prepareDevice() self.session.setRandomSeed(self.seed) self.anchors = self.session.initAnchorArrays() batched_data = np.tile( self.input_data, [self.batches_per_step * self.replication_factor, 1, 1]) self.stepio = popart.PyStepIO({self.input: batched_data}, self.anchors)
def test_reducemedian_indices_1_shape_infer(keepdims, axes): data = np.random.randn(1, 3, 4, 7, 8).astype(np.float32) builder = popart.Builder() tensor = builder.addInputTensor(popart.TensorInfo(data)) out = builder.aiGraphcore.reducemedian( [tensor], axes=axes, keepdims=keepdims, debugContext='test_reducemedian_values_{}_{}'.format(axes, keepdims), ) builder.addOutputTensor(out[0]) builder.addOutputTensor(out[1]) lossId = builder.aiGraphcore.identityloss([out[0]]) proto = builder.getModelProto() anchors = [out[0], out[1]] art = popart.AnchorReturnType("All") dataFlow = popart.DataFlow(1, {a: art for a in anchors}) device = tu.create_test_device() options = popart.SessionOptions() options.enableStochasticRounding = False # store the shapes here to make sure we are checking shapes # before the IR is complete (i.e. testing onnx shape inference) shapes = [] for a in anchors: shapes.append(tuple(builder.getTensorShape(a))) session = popart.TrainingSession(fnModel=proto, loss=lossId, dataFlow=dataFlow, deviceInfo=device, optimizer=popart.ConstSGD(0.01), userOptions=options) anchors = session.initAnchorArrays() session.prepareDevice() inputs = {tensor: data} stepio = popart.PyStepIO(inputs, anchors) session.weightsFromHost() session.run(stepio) stepio = popart.PyStepIO(inputs, anchors) # This tests the shape inference has run for a, b in zip([out[0], out[1]], shapes): assert anchors[a].shape == b
def run_test(): proto, data, w, loss = model() options = popart.SessionOptions() optimizer = popart.ConstSGD(0.1) options.enableStochasticRounding = True options.enableReplicatedGraphs = True options.replicatedGraphCount = 2 options.engineOptions = {"target.deterministicWorkers": "portable"} data = {k: np.repeat(v[np.newaxis], 2, 0) for k, v in data.items()} device = tu.create_test_device(2, pattern=popart.SyncPattern.Full) w_updated = popart.reservedUpdatedVarPrefix() + w dataFlow = popart.DataFlow( 1, { loss: popart.AnchorReturnType("ALL"), w_updated: popart.AnchorReturnType("FINAL") }) session = popart.TrainingSession(fnModel=proto, dataFlow=dataFlow, userOptions=options, loss=loss, optimizer=optimizer, deviceInfo=device) session.prepareDevice() session.weightsFromHost() anchors = session.initAnchorArrays() stepio = popart.PyStepIO(data, anchors) session.run(stepio) device.detach() return anchors[w_updated]
def test_lstm_explicit_recompute(): d1 = np.array([[[1., 2., 3.], [4., 5., 6.]], [[7., 8., 9.], [10., 11., 12.]]]).astype(np.float32) input_size = d1.shape[2] hidden_size = 7 d2 = np.random.rand(1, 4 * hidden_size, input_size).astype(np.float32) d3 = np.zeros((1, 4 * hidden_size, hidden_size)).astype(np.float32) builder = popart.Builder() i1 = builder.addInputTensor(popart.TensorInfo(d1)) i2 = builder.addInitializedInputTensor(d2) i3 = builder.addInitializedInputTensor(d3) Y, Y_h, Y_c = builder.aiOnnx.lstm([i1, i2, i3], 3, clip=None) builder.recomputeOutputInBackwardPass(set([Y, Y_h, Y_c])) out = builder.aiOnnx.relu([Y]) loss = builder.aiGraphcore.identityloss([out]) builder.addOutputTensor(loss) device = tu.create_test_device() opts = popart.SessionOptions() opts.explicitRecomputation = True session = popart.TrainingSession( fnModel=builder.getModelProto(), dataFlow=popart.DataFlow(1, {out: popart.AnchorReturnType("All")}), deviceInfo=device, optimizer=popart.ConstSGD(0.1), userOptions=opts, loss=loss) # Now the test is passing we can add a check to make sure there # are 2 lstm ops. This is to check that the lstm is being cloned # and we are actually testing what we intended. ir = json.loads(session._serializeIr(popart.IrSerializationFormat.JSON)) maingraph = ir['maingraph'] lstms = [i for i in maingraph if i['type'] == 'LSTM'] assert len(lstms) == 2
def simple_training_session(tmpdir, inputShape, inputArray, BPS, art, GA=1): builder = popart.Builder() inInfo = popart.TensorInfo("FLOAT", inputShape) i1 = builder.addInputTensor(inInfo) w1 = builder.addInitializedInputTensor( np.zeros(inputShape, dtype=np.float32), "w1") o = builder.aiOnnx.add([i1, w1]) l1 = builder.aiGraphcore.l1loss([o], 0.0) proto = builder.getModelProto() batchesPerStep = BPS dataFlow = popart.DataFlow(batchesPerStep, {o: art}) opts = popart.SessionOptions() opts.accumulationFactor = GA opts.enableGradientAccumulation = GA > 1 session = popart.TrainingSession(fnModel=proto, dataFlow=dataFlow, deviceInfo=tu.create_test_device(), userOptions=opts, loss=l1, optimizer=popart.ConstSGD(0.01)) session.prepareDevice() session.weightsFromHost() anchors = session.initAnchorArrays() inputs = { i1: np.array(inputArray, dtype=np.float32), } stepio = popart.PyStepIO(inputs, anchors) session.run(stepio) return anchors[o]
def get_session(expand_op=True): builder = popart.Builder() i1 = builder.addInputTensor(popart.TensorInfo("FLOAT", in_.shape)) w_in = builder.addInitializedInputTensor(w) mul = builder.aiOnnx.matmul([w_in, i1]) if expand_op: c = builder.aiOnnx.constant(const) o = builder.aiOnnx.expand([mul, c]) else: ones = np.ones(shape=const).astype(np.float32) ones_in = builder.aiOnnx.constant(ones) o = builder.aiOnnx.mul([mul, ones_in]) builder.addOutputTensor(o) loss = builder.aiGraphcore.identityloss([o]) anchor_returns = [ o, popart.reservedGradientPrefix() + o, popart.reservedGradientPrefix() + i1, popart.reservedGradientPrefix() + w_in ] opts = popart.SessionOptions() session = popart.TrainingSession(fnModel=builder.getModelProto(), dataFlow=popart.DataFlow( 1, anchor_returns), deviceInfo=tu.create_test_device(), optimizer=popart.ConstSGD(0.1), loss=loss, userOptions=opts) anchors = session.initAnchorArrays() inputs = {i1: in_} stepio = popart.PyStepIO(inputs, anchors) session.prepareDevice() session.weightsFromHost() return session, stepio, anchors, anchor_returns
def run_session(proto, loss_id): anchor_id = popart.reservedGradientPrefix() + t0 opts = popart.SessionOptions() opts.enableReplicatedGraphs = True opts.replicatedGraphCount = repl_factor opts.accumulationAndReplicationReductionType = reduction session = popart.TrainingSession( fnModel=proto, deviceInfo=tu.create_test_device(repl_factor), dataFlow=popart.DataFlow(1, [anchor_id]), loss=loss_id, optimizer=popart.ConstSGD(0.1), userOptions=opts) session.prepareDevice() anchors = session.initAnchorArrays() stepio = popart.PyStepIO({t0: t0_data}, anchors) session.run(stepio) return anchors[anchor_id]
def prepare(self, init_builder): self._builder = _Builder() anchorIds = init_builder(self._builder) self._builder._check_inputs() anchors = _get_anchors(anchorIds, self._builder) dataFlow = popart.DataFlow(self.batchesPerStep, anchors) proto = self._builder.getModelProto() loss = self._get_loss(anchorIds) device = self._get_device() optimizer = popart.ConstSGD(0.01) self._session = self._get_session(fnModel=proto, dataFlow=dataFlow, loss=loss, optimizer=optimizer, deviceInfo=device, patterns=self.patterns, userOptions=self.options) self._device_prepared = False
def test_nll_loss_input_with_invalid_input(): # fix the random seed for this test np.random.seed(0) ## input data Batchsize = 2 ExtraDim = 4 # e.g. sequence length in language model Classes = 3 dshape = [Batchsize, ExtraDim, Classes] lshape = [Batchsize, ExtraDim + 1] # Doesn't match! ip_data = np.random.rand(*dshape).astype(np.float32) lb_data = np.random.randint(Classes, size=lshape) ### # Popart ### builder = popart.Builder() ip = builder.addInitializedInputTensor(ip_data) lb = builder.addInputTensor(popart.TensorInfo("INT32", lshape)) out = builder.aiOnnx.softmax([ip], axis=np.size(lshape)) nll0 = builder.aiGraphcore.nllloss([out, lb], popart.ReductionType.NoReduction) patterns = popart.PatternsLevel.NoPatterns with pytest.raises(popart.popart_exception) as e_info: session = popart.TrainingSession(fnModel=builder.getModelProto(), dataFlow=popart.DataFlow(1, [nll0]), optimizer=popart.ConstSGD( LEARNING_RATE, WEIGHT_DECAY), loss=nll0, patterns=popart.Patterns(patterns), deviceInfo=tu.create_test_device()) assert (e_info.value.args[0].startswith( "The label tensor (INT32 [2 5]) must have shape [2 4] to match all but the final dimension of the probabilities tensor (FLOAT [2 4 3])" ))
def test_load_onnx_model_from_file(tmpdir): # Create a builder, store the model in a file and load it into a different # builder. builder = popart.Builder() shape = popart.TensorInfo("FLOAT", [2]) i1 = builder.addInputTensor(shape) i2 = builder.addInputTensor(shape) o = builder.aiOnnx.add([i1, i2]) builder.addOutputTensor(o) filename = tmpdir + "/model.onnx" with open(filename, 'wb') as out: out.write(builder.getModelProto()) builder2 = popart.Builder(str(filename)) dataFlow = popart.DataFlow(1, {o: popart.AnchorReturnType("All")}) optimizer = popart.ConstSGD(0.01) proto = builder2.getModelProto() session = popart.InferenceSession(fnModel=proto, dataFlow=dataFlow, deviceInfo=getDevice()) anchors = session.initAnchorArrays() session.prepareDevice() inputs = { i1: np.array([1, 2], dtype=np.float32), i2: np.array([3, 4], dtype=np.float32) } stepio = popart.PyStepIO(inputs, anchors) session.run(stepio) assert (np.array_equal(anchors[o], [4, 6]))
def test_nll_no_underflow(): dtype = np.float16 # Input probabilities probs_np = np.array( [[1., 0., 0., 0., 0.], [0.5, 0.5, 0., 0., 0.], [1 / 3.0, 1 / 3.0, 1 / 3.0, 0., 0.], [0.25, 0.25, 0.25, 0.25, 0.], [0.2, 0.2, 0.2, 0.2, 0.2]], dtype=dtype) labels_np = np.array([0, 1, 2, 3, 4], dtype=np.int32) builder = popart.Builder() probs = builder.addInitializedInputTensor(probs_np, "probs") builder.addOutputTensor(builder.aiOnnx.identity([probs])) dprobs = popart.reservedGradientPrefix() + probs labels = builder.addInputTensor(popart.TensorInfo("INT32", [5])) loss = builder.aiGraphcore.nllloss([probs, labels], popart.ReductionType.Sum, debugPrefix="nllLossVal") anchor_desc = { dprobs: popart.AnchorReturnType("ALL"), loss: popart.AnchorReturnType("ALL") } dataFlow = popart.DataFlow(1, anchor_desc) session = popart.TrainingSession( fnModel=builder.getModelProto(), loss=loss, deviceInfo=popart.DeviceManager().createIpuModelDevice({}), optimizer=popart.ConstSGD(0.00001), dataFlow=dataFlow) session.prepareDevice() session.weightsFromHost() anchors = session.initAnchorArrays() stepio = popart.PyStepIO({labels: labels_np}, anchors) session.run(stepio) assert not np.isnan(anchors[loss]).any() assert not np.isnan(anchors[dprobs]).any()
def test_randomuniform_repeatable_replica(dtypes): seed = 8 replication_factor = 2 builder = popart.Builder() out = builder.aiOnnx.randomuniform(shape=[10, 1], dtype=dtypes[1]) loss = builder.aiGraphcore.identityloss([out]) builder.addOutputTensor(loss) opts = popart.SessionOptions() opts.enableReplicatedGraphs = True opts.replicatedGraphCount = replication_factor device = tu.create_test_device(replication_factor) session = popart.TrainingSession( fnModel=builder.getModelProto(), dataFlow=popart.DataFlow(1, {out: popart.AnchorReturnType("All")}), patterns=popart.Patterns(popart.PatternsLevel.All), deviceInfo=device, userOptions=opts, optimizer=popart.ConstSGD(0.1), loss=loss) session.prepareDevice() session.setRandomSeed(seed) session.weightsFromHost() anchors = session.initAnchorArrays() stepio = popart.PyStepIO({}, anchors) session.run(stepio) o = anchors[out] for ai, bi in itertools.combinations( [i for i in range(replication_factor)], 2): print(f'Checking o[{ai}] is not equal to o[{bi}]') a = o[ai] b = o[bi] assert not np.allclose(a, b)
def init_session(proto, loss, dataFlow, userOpts, device, training=True): # Create a session to compile and execute the graph if training: session = popart.TrainingSession(fnModel=proto, loss=loss, deviceInfo=device, optimizer=popart.ConstSGD(0.001), dataFlow=dataFlow, userOptions=userOpts) else: session = popart.InferenceSession(fnModel=proto, deviceInfo=device, dataFlow=dataFlow, userOptions=userOpts) print("Compiling the {} graph.".format( "training" if training else "validation")) session.prepareDevice() session.setRandomSeed(1) # Create buffers to receive results from the execution anchors = session.initAnchorArrays() return Session(session, anchors)
def test_set_weights_from_host(): # Run the first builder builder = popart.Builder() shape = popart.TensorInfo("FLOAT", [2]) i1 = builder.addInputTensor(shape) data = np.array([1, 2], dtype=np.float32) i2 = builder.addInitializedInputTensor(data) o = builder.aiOnnx.add([i1, i2]) loss = builder.aiGraphcore.identityloss([o]) proto = builder.getModelProto() dataFlow = popart.DataFlow(1, {o: popart.AnchorReturnType("All")}) optimizer = popart.ConstSGD(0.01) session = popart.TrainingSession(fnModel=proto, dataFlow=dataFlow, loss=loss, optimizer=optimizer, deviceInfo=getDevice()) anchors = session.initAnchorArrays() session.prepareDevice() inputs = {i1: np.array([1, 2], dtype=np.float32)} stepio = popart.PyStepIO(inputs, anchors) with pytest.raises(popart.popart_exception) as e_info: session.run(stepio) assert (e_info.value.args[0].startswith( "Must call weightsFromHost before run as the"))
def test_no_prepare_device(): popart.getLogger().setLevel("TRACE") # Check that `session.modelToHost` can be called when using a # model with a constant node, without throwing an exceptions builder = popart.Builder() i1 = builder.addInputTensor(popart.TensorInfo("FLOAT", [2, 2])) i2 = builder.addInputTensor(popart.TensorInfo("FLOAT", [2, 2])) c = builder.aiOnnx.constant(np.array([[1, 2], [3, 4]], dtype=np.float32)) o1 = builder.aiOnnx.add([i1, i2]) o2 = builder.aiOnnx.add([o1, c]) loss = builder.aiGraphcore.identityloss([o2]) proto = builder.getModelProto() anchors = {o2: popart.AnchorReturnType("All")} dataFlow = popart.DataFlow(1, anchors) optimizer = popart.ConstSGD(0.01) opts = popart.SessionOptions() session = popart.TrainingSession(fnModel=proto, dataFlow=dataFlow, userOptions=opts, loss=loss, optimizer=optimizer, deviceInfo=tu.create_test_device()) # No session.prepareDevice() with pytest.raises(popart.popart_exception) as e_info: session.modelToHost('session_proto.onnx') assert (e_info.value.args[0].startswith( "Devicex::prepare() must be called before Devicex::weightsToHost"))
def session(skip_execution=False, include_patterns=True, momentum=False): proto, data, x = model() # Required patterns = [ "MatMulOp", "MatMulLhsGradOp", "MatMulRhsGradOp", "OpToIdentity", "PreUniRepl", "PostNRepl", "InPlace" ] if include_patterns: patterns += ["InplaceWorkaroundPattern"] optimizer = popart.ConstSGD(0.1) if momentum: optimizer = popart.SGD({ "defaultLearningRate": (0.1, True), "defaultMomentum": (0.9, True) }) return run_py(proto, data=data, outputs=x, loss=popart.L1Loss(x, 'loss', 0.1), optimizer=optimizer, patterns=popart.Patterns(patterns), user_options={"enableOutlining": False}, skip_execution=skip_execution)
def get_replicated_dropout_session(replication_factor=4, dsize=10, num_layers=1, ratio=0.3, batches_per_step=1, seed=0): builder = popart.Builder() ip = builder.addInputTensor(popart.TensorInfo("FLOAT", [dsize])) d__ip = popart.reservedGradientPrefix() + ip out = ip for layer in range(num_layers): [out] = builder.aiOnnx.dropout([out], num_outputs=1, ratio=ratio) loss = builder.aiGraphcore.identityloss([out]) builder.addOutputTensor(loss) device = tu.create_test_device(replication_factor) dfAnchors = [out, ip, d__ip] dfAnchors = {i: popart.AnchorReturnType("All") for i in dfAnchors} opts = popart.SessionOptions() opts.enableReplicatedGraphs = True opts.replicatedGraphCount = replication_factor session = popart.TrainingSession(fnModel=builder.getModelProto(), dataFlow=popart.DataFlow( batches_per_step, dfAnchors), optimizer=popart.ConstSGD(0.1), loss=loss, userOptions=opts, deviceInfo=device) session.prepareDevice() session.setRandomSeed(seed) session.weightsFromHost() anchors = session.initAnchorArrays() return session, ip, out, d__ip, anchors
def getAnchors(extraReduction): builder = popart.Builder() ip = builder.addInitializedInputTensor(ip_data) lb = builder.addInputTensor("INT32", lshape) sm = builder.aiOnnx.softmax([ip], axis=np.size(lshape)) if extraReduction == True: nll = builder.aiGraphcore.nllloss( [sm, lb], reduction=popart.ReductionType.NoReduction) loss = builder.aiOnnx.reducesum([nll]) else: loss = builder.aiGraphcore.nllloss( [sm, lb], reduction=popart.ReductionType.Sum) anchors = [popart.reservedGradientPrefix() + ip] # Always test 'loss' too, except for when we want to test with # the SoftmaxGradDirect pattern, which requires 'loss' to be # anchored if 'SoftmaxGradDirect' not in patternsList or 'NlllWithSoftmaxGradDirect' in patternsList: anchors.append(loss) session = popart.TrainingSession( fnModel=builder.getModelProto(), loss=loss, dataFlow=popart.DataFlow(1, anchors), optimizer=popart.ConstSGD(0.1), deviceInfo=tu.create_test_device(), patterns=popart.Patterns( patternsList).enableRuntimeAsserts(False)) session.prepareDevice() session.weightsFromHost() anchors = session.initAnchorArrays() stepio = popart.PyStepIO({lb: lb_data.astype(np.int32)}, anchors) session.run(stepio) return anchors
def popart_result_and_model(popart_config, is_bwd=False): builder = popart.Builder() popart_model = Bert(popart_config, builder=builder) input_info = popart.TensorInfo(popart_config.popart_dtype, [ popart_config.batch_size * popart_config.sequence_length, popart_config.hidden_size ]) input_tensor = builder.addInputTensor(input_info) data = { input_tensor: np.random.normal(0, 0.02, input_info.shape()).astype(popart_config.dtype) } output = popart_model.feed_forward(input_tensor) proto = builder.getModelProto() if is_bwd: l1_lambda = 0.1 l1 = builder.aiGraphcore.l1loss([output], l1_lambda, debugPrefix="l1LossVal", reduction=popart.ReductionType.Sum) proto = builder.getModelProto() optimizer = popart.ConstSGD(0.01) outputs, post_proto = run_py(proto, data, (output, l1), loss=l1, optimizer=optimizer) else: outputs, post_proto = run_py(proto, data, output) return data[input_tensor], outputs, proto, post_proto
def test_valid_recompute_options(): builder = popart.Builder() i1 = builder.addInputTensor(popart.TensorInfo("FLOAT", [1])) r1 = builder.aiOnnx.relu([i1]) o = builder.aiOnnx.relu([r1]) # specify manual recomputation builder.recomputeOutputInBackwardPass(r1) # specify auto recomputation as well opts = popart.SessionOptions() opts.autoRecomputation = popart.RecomputationType.Standard with pytest.raises(popart.popart_exception) as e_info: session = popart.TrainingSession(fnModel=builder.getModelProto(), dataFlow=popart.DataFlow(1, [o]), optimizer=popart.ConstSGD(0.001), loss=o, patterns=popart.Patterns([]), userOptions=opts, deviceInfo=tu.create_test_device()) assert (e_info.value.args[0] == "A mixture of auto and manual recomputaion is not supported")