def test_loss_inputs_untouched(): import popart height = 32 batchesPerStep = 5 samplesPerBatch = 48 samplesPerMicroBatch = samplesPerBatch stepDataShape = [batchesPerStep, samplesPerBatch, height, height] input_data = np.zeros(stepDataShape).astype(np.float32) weights_data = np.zeros([height, height]).astype(np.float32) def init_builder(builder): i0 = builder.addInputTensor(input_data) w0 = builder.addInitializedInputTensor(weights_data) x = builder.aiOnnx.matmul([i0, w0]) loss = builder.aiGraphcore.identityloss([x]) builder.addOutputTensor(x) builder.setLoss(loss) return [] session = PopartTestSession() session.mode = 'train' session.patterns = popart.Patterns(popart.PatternsLevel.Default) session.prepare(init_builder)
def run_test(groupingEnabled, verify): builder = popart.Builder() a = builder.addInputTensor(popart.TensorInfo("FLOAT", A), "A") b = builder.addInputTensor(popart.TensorInfo("FLOAT", B), "B") c = builder.addInputTensor(popart.TensorInfo("FLOAT", C), "C") d = builder.addInputTensor(popart.TensorInfo("FLOAT", D), "D") r1 = builder.aiOnnx.matmul([a, b], "MATMUL_A") r2 = builder.aiOnnx.matmul([c, d], "MATMUL_B") o = builder.aiOnnx.add([r1, r2], "END") loss = builder.aiGraphcore.identityloss([o]) proto = builder.getModelProto() dataFlow = popart.DataFlow( 1, { o: popart.AnchorReturnType("All"), popart.reservedGradientPrefix() + a: popart.AnchorReturnType("All"), popart.reservedGradientPrefix() + b: popart.AnchorReturnType("All"), popart.reservedGradientPrefix() + c: popart.AnchorReturnType("All"), popart.reservedGradientPrefix() + d: popart.AnchorReturnType("All") }) opts = popart.SessionOptions() opts.reportOptions = {"showExecutionSteps": "true"} opts.enableOutlining = False opts.enableGroupedMatmuls = groupingEnabled opts.dotOpNames = True pat = popart.Patterns(popart.PatternsLevel.Default) session = popart.TrainingSession( fnModel=proto, dataFlow=dataFlow, userOptions=opts, patterns=pat, loss=loss, optimizer=popart.ConstSGD(0.01), deviceInfo=tu.create_test_device(opts={"compileIPUCode": False})) session.prepareDevice() anchors = session.initAnchorArrays() inputs = {a: A_data, b: B_data, c: C_data, d: D_data} stepio = popart.PyStepIO(inputs, anchors) session.run(stepio) verify(session) return anchors[o]
def test_outline_dropout_pattern_one(custom_ops): ''' Tests that the OutlineDropoutPattern successfully outlines all 3 dropouts (fwd, bwd) into a single subgraph Expected IR Graph (excluding adds etc) fwd... x = add(data0, weight0) 0_seed = seedModify(seed, 0) x = call_0(x, 0_seed) 1_seed = seedModify(seed, 1) x = call_0(x, 1_seed) 2_seed = seedModify(seed, 2) x = call_0(x, 2_seed) bwd... x = call_0(x, 0_seed) x = call_0(x, 1_seed) x = call_0(x, 2_seed) where call_0(x, seed) = dropout(x, seed) ''' input_data = np.random.rand(2, 2).astype(np.float32) builder = popart.Builder() d0 = builder.addInputTensor(popart.TensorInfo('FLOAT', input_data.shape), 'data0') w0 = builder.addInitializedInputTensor(input_data, 'weight0') x = builder.aiOnnx.add([d0, w0]) x = builder.aiOnnx.dropout([x], 1)[0] x = builder.aiOnnx.dropout([x], 1)[0] x = builder.aiOnnx.dropout([x], 1)[0] session = run_py(builder.getModelProto(), data={d0: input_data}, outputs=x, loss=popart.L1Loss(x, 'loss', 0.1), optimizer=popart.ConstSGD(0.1), patterns=popart.Patterns( ["OutlineDropoutPattern", "PostNRepl"]), user_options={"outlineThreshold": -1}, skip_execution=True) ir = json.loads(session._serializeIr(popart.IrSerializationFormat.JSON)) # There should only be a main graph and 1 subgraph containing dropout assert len(ir.keys()) == 2 ops = [o["type"] for o in ir["_subgraph(0)"]] assert "Dropout" in ops ops = [o["type"] for o in ir["maingraph"]] # Should only be 1 seed modify per dropout assert len(list(filter(lambda op: op == "SeedModify", ops))) == 6 # The bwd and fwd should be outlined together assert len(list(filter(lambda op: op == "Call", ops))) == 6
def bert_session_patterns(args): patterns = popart.Patterns() if args.task != "SQUAD": patterns.enablePattern("DisableAttnDropoutBwdPattern", False) if args.execution_mode == ExecutionMode.PHASED: patterns.enablePattern("TiedGatherPattern", False) patterns.enablePattern("SparseAccumulatePattern", False) if args.execution_mode == ExecutionMode.PIPELINE and args.recompute_checkpoint_every_layer and any( map(lambda l: l > 1, args.layers_per_ipu)): patterns.enablePattern("AccumulatePriorityPattern", True) if args.task == "PRETRAINING" and args.execution_mode != ExecutionMode.PHASED and args.gradient_accumulation_factor <= 1 and not args.inference: patterns.enablePattern("TiedGatherPattern", False) logger.warning( "Running Pretraining without Gradient Accumulation will disable optimisations " "for the Word Embedding weight. This will increase memory usage. " "Consider enabling Gradient Accumulation.") if args.optimizer == "SGD" and args.optimizer_state_offchip and args.execution_mode != ExecutionMode.PHASED: patterns.enablePattern("TiedGatherPattern", False) logger.warning( "Remote Optimizer State with SGD/SGD+M is not a recommended configuration" ) return patterns
def run_test(matmul_serialization_mode, matmul_serialization_factor, verify): builder = popart.Builder() lhs = builder.addInitializedInputTensor(lhs_data, "lhs") rhs = builder.addInitializedInputTensor(rhs_data, "rhs") o = builder.aiOnnx.matmul([lhs, rhs]) builder.setSerializeMatMul({o}, matmul_serialization_mode, matmul_serialization_factor, keep_precision=True) loss = builder.aiGraphcore.l1loss([o], 0.1) proto = builder.getModelProto() dataFlow = popart.DataFlow( 1, { o: popart.AnchorReturnType("All"), rhs: popart.AnchorReturnType("Final"), popart.reservedGradientPrefix() + lhs: popart.AnchorReturnType("All"), #popart.reservedGradientPrefix() + rhs: popart.AnchorReturnType("All"), << T11469 }) opts = getBaseOptions() pat = popart.Patterns( ['MatMulOp', 'MatMulRhsGradOp', 'MatMulLhsGradOp', 'OpToIdentity']) pat.enableRuntimeAsserts(False) session = popart.TrainingSession( fnModel=proto, dataFlow=dataFlow, userOptions=opts, loss=loss, optimizer=popart.ConstSGD(0.01), patterns=pat, deviceInfo=tu.create_test_device(opts={"compileIPUCode": False})) session.prepareDevice() session.weightsFromHost() anchors = session.initAnchorArrays() inputs = {lhs: lhs_data} stepio = popart.PyStepIO(inputs, anchors) session.run(stepio) session.weightsToHost() verify(session, matmul_serialization_factor) return anchors[rhs]
def _run_comparison_test(data, result, proto, expected_activations, lstm_op_pattern): model = onnx.load_from_string(proto) if expected_activations: lstms = [i for i in model.graph.node if i.op_type == 'LSTM'] assert len(lstms) == 1 activations = [ i for i in lstms[0].attribute if i.name == 'activations' ] assert len(activations) == 1 activations = activations[0].strings assert len(activations) == len(expected_activations) for expected, actual in zip(expected_activations, activations): assert expected == actual.decode('utf-8').lower() outId = model.graph.output[0].name inId = model.graph.input[0].name dataFlow = popart.DataFlow(1, {outId: popart.AnchorReturnType("All")}) patterns = popart.Patterns(popart.PatternsLevel.Default) patterns.enablePattern('LSTMOp', lstm_op_pattern) session = popart.InferenceSession(fnModel=proto, dataFlow=dataFlow, deviceInfo=tu.create_test_device(), patterns=patterns) session.prepareDevice() anchors = session.initAnchorArrays() stepio = popart.PyStepIO({inId: data}, anchors) session.run(stepio) assert np.allclose(anchors[outId], result)
def test_patterns_str(): import popart patterns = popart.Patterns(["PostNRepl", "InPlace"]) assert (patterns.PostNRepl == True) assert (patterns.InPlace == True) assert (patterns.SoftMaxGradDirect == False)
def test_matmul_serialization_invalid_factor(tmpdir): lhs_shape = [2, 2] rhs_shape = [2, 4] lhs_data = np.random.rand(*lhs_shape).astype(np.float32) rhs_data = np.random.rand(*rhs_shape).astype(np.float32) builder = popart.Builder() lhs = builder.addInputTensor(popart.TensorInfo("FLOAT", lhs_shape), "lhs") rhs = builder.addInputTensor(popart.TensorInfo("FLOAT", rhs_shape), "rhs") o = builder.aiOnnx.matmul([lhs, rhs]) builder.setSerializeMatMul({o}, "output_channels", 3) builder.addOutputTensor(o) proto = builder.getModelProto() dataFlow = popart.DataFlow(1, {o: popart.AnchorReturnType("All")}) opts = getBaseOptions() pat = popart.Patterns(['MatMulOp', 'MatMulRhsGradOp', 'MatMulLhsGradOp']) with pytest.raises(popart.popart_exception) as e_info: session = popart.InferenceSession( fnModel=proto, dataFlow=dataFlow, userOptions=opts, patterns=pat, deviceInfo=tu.create_test_device(opts={"compileIPUCode": False})) assert (e_info.value.args[0].startswith( "Invalid serialisation factor 3 for output channels dim 4. output_channels dim should be a multple of the serialisation factor" ))
def session(train=False, skip_execution=False, include_patterns=True, splits=1, outline=False, optim="Sgd"): proto, data, x, loss = model(splits=splits) patterns = popart.Patterns() patterns.enablePattern("TiedGatherPattern", include_patterns) patterns.enablePattern("TiedGatherAccumulatePattern", include_patterns) user_options = { "enableOutlining": outline, "enableGradientAccumulation": True, "accumulationFactor": 2, "accumulationAndReplicationReductionType": popart.ReductionType.Mean, "meanAccumulationAndReplicationReductionStrategy": popart.MeanReductionStrategy.Running } if optim == "Lamb": optimizer = popart.Adam({ "defaultLearningRate": (0.1, False), "defaultWeightDecay": (0.1, True), "defaultBeta1": (0.1, True), "defaultBeta2": (0.1, True), "lossScaling": (20, True), }, mode=popart.AdamMode.LambNoBias) # NoBias to increase the error of incorrect gradients user_options["optimizerStateTensorLocationSettings"] = popart.TensorLocationSettings( popart.TensorLocation( popart.TensorStorage.OffChip, popart.ReplicatedTensorSharding.On), 0, 0) user_options["enableReplicatedGraphs"] = True user_options["replicatedGraphCount"] = 2 ipus = 2 else: optimizer = popart.SGD({ "defaultLearningRate": (0.1, True), "defaultMomentum": (0.9, True), "defaultDampening": (0, True), # 0 dampening to increase the error of incorrect gradients "lossScaling": (20, True)}) ipus = 1 if train: return run_py( proto, data=data, outputs=x, loss=loss, optimizer=optimizer, patterns=patterns, user_options=user_options, skip_execution=skip_execution) else: return run_py( proto, data=data, outputs=x, patterns=patterns, user_options={ "enableOutlining": outline, "constantWeights": False }, skip_execution=skip_execution)
def test_enable_pattern(): patterns = popart.Patterns() # a non-mandatory patterns pattern = "PadSum" enabled = patterns.isPatternEnabled(pattern) patterns.enablePattern(pattern, not enabled) assert (not enabled == patterns.isPatternEnabled(pattern))
def test_patterns_enum(): import popart patterns = popart.Patterns([popart.PreAliasPatternType.PostNRepl]) patterns.InPlace = True assert (patterns.PostNRepl == True) assert (patterns.InPlace == True) assert (patterns.SoftMaxGradDirect == False)
def run_test(groupingEnabled, verify): builder = popart.Builder() lhs = builder.addInputTensor(popart.TensorInfo("FLOAT", lhs_shape), "lhs") rhs = builder.addInputTensor(popart.TensorInfo("FLOAT", rhs_shape), "rhs") lhs_2 = builder.addInputTensor(popart.TensorInfo("FLOAT", lhs_2_shape), "lhs_2") rhs_2 = builder.addInputTensor(popart.TensorInfo("FLOAT", rhs_2_shape), "rhs_2") r1 = builder.aiOnnx.matmul([lhs, rhs]) r2 = builder.aiOnnx.matmul([lhs_2, rhs_2]) r2_t = builder.aiOnnx.transpose([r2], perm=[0, 2, 1], debugPrefix="rhs.transpose") o = builder.aiOnnx.matmul([r1, r2_t]) proto = builder.getModelProto() dataFlow = popart.DataFlow(1, {o: popart.AnchorReturnType("All")}) opts = popart.SessionOptions() opts.reportOptions = {"showExecutionSteps": "true"} opts.enableOutlining = False opts.enableGroupedMatmuls = groupingEnabled pat = popart.Patterns(popart.PatternsLevel.Default) session = popart.InferenceSession( fnModel=proto, dataFlow=dataFlow, userOptions=opts, patterns=pat, deviceInfo=tu.create_test_device(opts={"compileIPUCode": False})) session.prepareDevice() anchors = session.initAnchorArrays() inputs = { lhs: lhs_data, rhs: rhs_data, lhs_2: lhs_2_data, rhs_2: rhs_2_data } stepio = popart.PyStepIO(inputs, anchors) session.run(stepio) verify(session) return anchors[o]
def run(model_file_name, enableOutlining): dsize = 10 ratio = 0.5 builder = popart.Builder() ip = builder.addInputTensor(popart.TensorInfo("FLOAT", [dsize, dsize])) d__ip = popart.reservedGradientPrefix() + ip def add_layer(in_id): w = builder.addInitializedInputTensor( np.ones([dsize, dsize], np.float32)) matmul_id = builder.aiOnnx.matmul([in_id, w]) return matmul_id m1 = add_layer(ip) m2 = add_layer(m1) m3 = add_layer(m2) anchorIds = [] for i in (ip, m1, m2, m3): anchorIds.append(popart.reservedGradientPrefix() + i) out = builder.aiGraphcore.identityloss([m3]) builder.addOutputTensor(out) device = tu.create_test_device() dfAnchors = {} for anchorId in anchorIds: dfAnchors.update({anchorId: popart.AnchorReturnType("All")}) opts = popart.SessionOptions() opts.enableOutlining = enableOutlining opts.separateCallOpPdfs = False opts.subgraphCopyingStrategy = subgraphCopyingStrategy proto = builder.getModelProto() session = popart.TrainingSession( fnModel=proto, dataFlow=popart.DataFlow(1, dfAnchors), optimizer=popart.ConstSGD(0.1), loss=out, patterns=popart.Patterns(popart.PatternsLevel.All), userOptions=opts, deviceInfo=device) session.prepareDevice() session.weightsFromHost() anchors = session.initAnchorArrays() ip_data = np.ones((dsize, dsize), dtype=np.float32) stepio = popart.PyStepIO({ip: ip_data}, anchors) session.run(stepio) session.modelToHost(str(tmpdir / model_file_name))
def run(opt_dict, enable_outlining, model_file_name): np.random.seed(1878) dsize = 10 builder = popart.Builder() ip = builder.addInputTensor( popart.TensorInfo("FLOAT" if dtype == np.float32 else "FLOAT16", [dsize, dsize])) d__ip = popart.reservedGradientPrefix() + ip def add_layer(in_id, name): w = builder.addInitializedInputTensor( np.random.rand(dsize, dsize).astype(dtype), "w_" + name) b = builder.addInitializedInputTensor( np.random.rand(dsize).astype(dtype), "b_" + name) matmul_id = builder.aiOnnx.gemm([in_id, w, b], 1, 1, False, False) return matmul_id m1 = add_layer(ip, "0") m2 = add_layer(m1, "1") m3 = add_layer(m2, "2") m4 = add_layer(m3, "3") out = builder.aiGraphcore.identityloss([m4]) builder.addOutputTensor(out) device = tu.create_test_device() anchors = {} opts = popart.SessionOptions() opts.enableOutliningCopyCostPruning = False opts.outlineThreshold = -np.inf opts.enableOutlining = enable_outlining proto = builder.getModelProto() session = popart.TrainingSession(fnModel=proto, dataFlow=popart.DataFlow(1, anchors), optimizer=opt_dict[0], loss=out, patterns=popart.Patterns( popart.PatternsLevel.All), userOptions=opts, deviceInfo=device) session.prepareDevice() session.weightsFromHost() for i in range(steps): if i in opt_dict: session.updateOptimizerFromHost(opt_dict[i]) ip_data = np.ones((dsize, dsize), dtype=dtype) stepio = popart.PyStepIO({ip: ip_data}, anchors) session.run(stepio) session.modelToHost(str(tmpdir / model_file_name))
def run_lstm_popart(onnx_file_name, inputs): # generate a popart session builder = popart.Builder(onnx_file_name) loss = builder.aiGraphcore.identityloss(['out']) outputs = builder.getOutputTensorIds() anchors = outputs + [ popart.reservedGradientPrefix() + 'out', popart.reservedGradientPrefix() + 'X', popart.reservedGradientPrefix() + 'initial_h', popart.reservedGradientPrefix() + 'initial_c', popart.reservedGradientPrefix() + 'lstm.weight_ih_l0', popart.reservedGradientPrefix() + 'lstm.weight_hh_l0', popart.reservedGradientPrefix() + 'lstm.bias_ih_l0', popart.reservedGradientPrefix() + 'lstm.bias_hh_l0' ] dataFlow = popart.DataFlow(1, anchors) optimizer = popart.ConstSGD(0.1) device = tu.create_test_device(1) print('Creating session') s = popart.TrainingSession(fnModel=builder.getModelProto(), dataFlow=dataFlow, optimizer=optimizer, loss=loss, patterns=popart.Patterns([ 'PreUniRepl', 'OpToReshape' ]).enableRuntimeAsserts(False), deviceInfo=device) print('setting device') anchor_map = s.initAnchorArrays() s.prepareDevice() # run the popart session input_map = { 'X': inputs[0], 'initial_h': inputs[1], 'initial_c': inputs[2] } stepio = popart.PyStepIO(input_map, anchor_map) s.weightsFromHost() s.run(stepio) s.modelToHost(get_popart_fname(onnx_file_name)) anchor_map[popart.reservedGradientPrefix() + 'W'] = anchor_map.pop(popart.reservedGradientPrefix() + 'lstm.weight_ih_l0') anchor_map[popart.reservedGradientPrefix() + 'R'] = anchor_map.pop(popart.reservedGradientPrefix() + 'lstm.weight_hh_l0') anchor_map[popart.reservedGradientPrefix() + 'WB'] = anchor_map.pop(popart.reservedGradientPrefix() + 'lstm.bias_ih_l0') anchor_map[popart.reservedGradientPrefix() + 'RB'] = anchor_map.pop(popart.reservedGradientPrefix() + 'lstm.bias_hh_l0') return anchor_map
def test_enable_inplace(): patterns = popart.Patterns() # Turn off inplacing and verify it is off patterns.enablePattern("InPlace", False) assert not patterns.InPlace # Turn on inplacing and verify it is on patterns.enablePattern("InPlace", True) assert patterns.InPlace
def run_test(aliaszerocopy): proto, data, x, loss = model() options = popart.SessionOptions() patterns = popart.Patterns() optimizer = popart.SGD({ "defaultLearningRate": (0.1, True), "defaultMomentum": (0.9, True), "defaultDampening": (0, True) }) options.enableOutlining = True options.outlineThreshold = -np.inf options.enableOutliningCopyCostPruning = False options.autoRecomputation = popart.RecomputationType.Standard options.virtualGraphMode = popart.VirtualGraphMode.ExecutionPhases options.explicitRecomputation = True options.aliasZeroCopy = aliaszerocopy options.executionPhaseSettings.phases = 5 request_ipus = 2 device = tu.create_test_device(2, pattern=popart.SyncPattern.Full) dataFlow = popart.DataFlow(1, {x: popart.AnchorReturnType("ALL")}) session = popart.TrainingSession(fnModel=proto, dataFlow=dataFlow, userOptions=options, loss=loss, optimizer=optimizer, patterns=patterns, deviceInfo=device) session.prepareDevice() session.weightsFromHost() anchors = session.initAnchorArrays() stepio = popart.PyStepIO(data, anchors) session.run(stepio) file_path = str(tmpdir / f"aliaszerocopy_model_test.onnx") session.modelToHost(file_path) post_proto = onnx.load(file_path) device.detach() graph_report = json.loads(session.getGraphReport()) max_tile_memory = max(graph_report["memory"]["byTile"]["total"]) total_memory = np.sum(graph_report["memory"]["byTile"]["total"]) return anchors[x], post_proto, total_memory
def setPatterns(self, patterns, enableRuntimeAsserts=None): """ Accept either a Patterns instance, a list of strings or a PatternsLevel. If enableRuntimeAsserts is set you will get an error if you don't enable all mandatory patterns. """ if isinstance(patterns, popart.Patterns): self.patterns = patterns else: self.patterns = popart.Patterns(patterns) if enableRuntimeAsserts is not None: self.patterns.enableRuntimeAsserts(enableRuntimeAsserts)
def test_patterns_default(): import popart patterns = popart.Patterns() assert (patterns.PreUniRepl == True) assert (patterns.PostNRepl == True) assert (patterns.SoftMaxGradDirect == True) assert (patterns.OpToIdentity == True) assert (patterns.SubtractArg1GradOp == True) assert (patterns.InPlace == True) print(str(patterns))
def test_patterns_none(): import popart patterns = popart.Patterns(popart.PatternsLevel.NoPatterns) assert (patterns.PreUniRepl == False) assert (patterns.PostNRepl == False) assert (patterns.SoftMaxGradDirect == False) assert (patterns.OpToIdentity == False) assert (patterns.SubtractArg1GradOp == False) assert (patterns.InPlace == False) print(str(patterns))
def run_test(outlining): proto, data, x, loss = model() options = popart.SessionOptions() patterns = popart.Patterns() optimizer = popart.SGD({ "defaultLearningRate": (0.1, True), }) options.enableOutlining = outlining options.outlineThreshold = 10.0 options.enableGradientAccumulation = True options.accumulationFactor = 4 options.enableReplicatedGraphs = True options.replicatedGraphCount = 2 options.virtualGraphMode = popart.VirtualGraphMode.Manual if pipeline: options.enablePipelining = True options.autoRecomputation = popart.RecomputationType.Pipeline device = tu.create_test_device(4) dataFlow = popart.DataFlow(1, {x: popart.AnchorReturnType("ALL")}) session = popart.TrainingSession(fnModel=proto, dataFlow=dataFlow, userOptions=options, loss=loss, optimizer=optimizer, patterns=patterns, deviceInfo=device) session.prepareDevice() session.weightsFromHost() anchors = session.initAnchorArrays() stepio = popart.PyStepIO(data, anchors) session.run(stepio) file_path = str(tmpdir / f"outlining_execution_context_model.onnx") session.modelToHost(file_path) post_proto = onnx.load(file_path) device.detach() graph_report = json.loads(session.getGraphReport()) max_tile_memory = max(graph_report["memory"]["byTile"]["total"]) total_memory = np.sum(graph_report["memory"]["byTile"]["total"]) return session, anchors[x], post_proto, total_memory
def test_patterns_all(): import popart patterns = popart.Patterns(popart.PatternsLevel.All) assert (patterns.PreUniRepl == True) assert (patterns.PostNRepl == True) assert (patterns.SoftMaxGradDirect == True) assert (patterns.SplitConvBias == True) assert (patterns.OpToIdentity == True) assert (patterns.SubtractArg1GradOp == True) assert (patterns.InPlace == True) print(str(patterns))