def getInstrumentedSession(instrumentation): opts = popart.SessionOptions() opts.instrumentWithHardwareCycleCounter = True opts.hardwareInstrumentations = instrumentation opts.replicatedGraphCount = 2 opts.virtualGraphMode = popart.VirtualGraphMode.Manual opts.enableReplicatedGraphs = True if useIOTiles is True: opts.numIOTiles = 32 # Trying to use less than all the tiles throw an error like # popart_core.poplar_exception: Trying to access tile 72 on IPU # 0 but the virtual graph only covers the following tiles on # that IPU: 0-63 # The error happens in a call to poplar made by gcl::perIPUTiles. device = tu.create_test_device(numIpus=4, tilesPerIPU=tu.USE_ALL_TILES) else: device = tu.create_test_device(numIpus=4, tilesPerIPU=4) session = popart.InferenceSession(fnModel=builder.getModelProto(), dataFlow=popart.DataFlow(20, [act]), userOptions=opts, deviceInfo=device) session.prepareDevice() stepio = popart.PyStepIO({d0: np.random.rand(40).astype(np.float32)}, session.initAnchorArrays()) session.run(stepio) return session
def return_options(anchorDict): opts = popart.SessionOptions() if anchorDict["Pipelining"]: opts.enablePipelining = True if anchorDict["AccumulationFactor"] > 1: opts.enableGradientAccumulation = True opts.accumulationFactor = anchorDict["AccumulationFactor"] ipus = 1 if anchorDict["Pipelining"] == False: ipus = 1 * anchorDict["ReplicationFactor"] else: opts.virtualGraphMode = popart.VirtualGraphMode.Auto ipus = 2 * anchorDict["ReplicationFactor"] if anchorDict["ReplicationFactor"] > 1: opts.replicatedGraphCount = anchorDict["ReplicationFactor"] opts.enableReplicatedGraphs = True if tu.ipu_available(ipus): device = tu.create_test_device(numIpus=ipus) else: print("No IPUS available for test options.") return None, None else: device = tu.create_test_device(numIpus=ipus) print("device: ", device) return opts, device
def get_dropout_session(dsize=100, ratio=0.2, bps=1, use_ipu=False, num_layers=1, seed=0): builder = popart.Builder() ip = builder.addInputTensor(popart.TensorInfo("FLOAT", [dsize])) d__ip = popart.reservedGradientPrefix() + ip out = ip for numl in range(num_layers): [out] = builder.aiOnnx.dropout([out], num_outputs=1, ratio=ratio) loss = builder.aiGraphcore.identityloss([out]) builder.addOutputTensor(loss) device = tu.create_test_device() session, anchors = get_session(anchorIds=[out, ip, d__ip], proto=builder.getModelProto(), device=device, loss=loss, bps=bps, seed=seed) return session, ip, out, d__ip, anchors
def test_valid_recompute_options(): builder = popart.Builder() i1 = builder.addInputTensor(popart.TensorInfo("FLOAT", [1])) r1 = builder.aiOnnx.relu([i1]) o = builder.aiOnnx.relu([r1]) # specify manual recomputation builder.recomputeOutputInBackwardPass(r1) # specify auto recomputation as well opts = popart.SessionOptions() opts.autoRecomputation = popart.RecomputationType.Standard with pytest.raises(popart.popart_exception) as e_info: session = popart.TrainingSession(fnModel=builder.getModelProto(), dataFlow=popart.DataFlow(1, [o]), optimizer=popart.ConstSGD(0.001), loss=o, patterns=popart.Patterns( []).enableRuntimeAsserts(False), userOptions=opts, deviceInfo=tu.create_test_device()) assert (e_info.value.args[0] == "A mixture of auto and manual recomputaion is not supported")
def test_dropout_training7(): dsize = 100 ratio = 0.2 builder = popart.Builder() ip = builder.addInputTensor(popart.TensorInfo("FLOAT", [dsize])) d__ip = popart.reservedGradientPrefix() + ip [d1] = builder.aiOnnx.dropout([ip], num_outputs=1, ratio=ratio) [d2] = builder.aiOnnx.dropout([ip], num_outputs=1, ratio=ratio) out = builder.aiOnnx.add([d1, d2]) out = builder.aiGraphcore.identityloss([out]) builder.addOutputTensor(out) if tu.ipu_available(): device = tu.create_test_device() else: pytest.skip("Test needs to run on IPU, but none are available") session, anchors = get_session(anchorIds=[d1, d2], proto=builder.getModelProto(), device=device, loss=out) # Same data for each batch ip_data = np.random.random_sample(dsize).astype(np.float32) stepio = popart.PyStepIO({ip: ip_data}, anchors) session.run(stepio) assert (np.array_equal(anchors[d1], anchors[d2]) is not True)
def test_stream_tensors_to_multiple_ipus(): """ Streaming an input to Ops on multiple IPUs throws an error 09/07/2019 Since D12445 this test no longer raises an exception. By default, stream tensors are now replicated by streaming to a single IPU, then copied across to the other IPUs where they are needed. Leaving this test in to verify that this remains the case """ builder, op0_out, op1_out, op2_out, op3_out, anchor_map = get_simple_linear_model( streamInputToOp1AndOp2=True) opts = popart.SessionOptions() opts.enablePipelining = True opts.virtualGraphMode = popart.VirtualGraphMode.Manual builder.virtualGraph(op0_out, 0) builder.virtualGraph(op1_out, 1) builder.virtualGraph(op2_out, 1) builder.virtualGraph(op3_out, 1) session = popart.InferenceSession(fnModel=builder.getModelProto(), dataFlow=popart.DataFlow(10, anchor_map), userOptions=opts, deviceInfo=tu.create_test_device( numIpus=2, tilesPerIpu=20))
def test_sharding_multi_source(): """ Branched sharding does not merge IPU Copies with pipelining e.g. Op0 -> Op2 ^ Op1 ----' where the vGraph split is IPU0 : {Op0}, IPU1 : {Op1}, IPU2 : {Op2} """ builder = popart.Builder() shape_d = [10] shape_l = [] d0 = builder.addInputTensor(popart.TensorInfo("FLOAT", shape_d)) d1 = builder.addInputTensor(popart.TensorInfo("FLOAT", shape_d)) l0 = builder.addInputTensor(popart.TensorInfo("INT32", shape_l)) op0_out = builder.aiOnnx.sin([d0], "s0") op1_out = builder.aiOnnx.exp([d1], "r0") op2_out = builder.aiOnnx.mul([op0_out, op1_out], "m0") nll = builder.aiGraphcore.nllloss([op2_out, l0]) opts = popart.SessionOptions() opts.enablePipelining = True opts.virtualGraphMode = popart.VirtualGraphMode.Manual builder.virtualGraph(op0_out, 0) builder.virtualGraph(op1_out, 1) builder.virtualGraph(op2_out, 2) builder.virtualGraph(nll, 2) session = popart.InferenceSession(fnModel=builder.getModelProto(), dataFlow=popart.DataFlow(10, [op2_out]), userOptions=opts, deviceInfo=tu.create_test_device( numIpus=3, tilesPerIpu=20))
def test_replicated_allreduce(): input_data = np.array(range(10), dtype=np.float32) replicatedGraphCount = 2 builder = popart.Builder() t = builder.addInitializedInputTensor(input_data, "input") o = builder.aiGraphcore.replicatedallreduce([t]) builder.addOutputTensor(o) proto = builder.getModelProto() dataFlow = popart.DataFlow(1, {o: popart.AnchorReturnType("All")}) opts = popart.SessionOptions() opts.enableReplicatedGraphs = True opts.replicatedGraphCount = replicatedGraphCount numIpus = 2 device = tu.create_test_device(numIpus=numIpus) session = popart.InferenceSession(fnModel=proto, dataFlow=dataFlow, userOptions=opts, deviceInfo=device) session.prepareDevice() anchors = session.initAnchorArrays() inputs = {} stepio = popart.PyStepIO(inputs, anchors) session.run(stepio) ground_truth = 2.0 * np.array(range(10), dtype=np.float32) for i in range(replicatedGraphCount): assert np.allclose(anchors[o][i], ground_truth)
def test_summary_report_with_cpu_device(tmpdir): builder = popart.Builder() i1 = builder.addInputTensor(popart.TensorInfo("FLOAT", [1, 2, 32, 32])) i2 = builder.addInputTensor(popart.TensorInfo("FLOAT", [1, 2, 32, 32])) o = builder.aiOnnx.add([i1, i2]) builder.addOutputTensor(o) proto = builder.getModelProto() dataFlow = popart.DataFlow(1, {o: popart.AnchorReturnType("All")}) session = popart.InferenceSession(fnModel=proto, dataFlow=dataFlow, deviceInfo=tu.create_test_device()) session.initAnchorArrays() session.prepareDevice() with pytest.raises(popart.poplar_exception) as e_info: session.getExecutionReport() assert (e_info.value.args[0].endswith( "Profiling is disabled for current device type."))
def getUpdatedWeights(decomposeGradSum): opts = popart.SessionOptions() opts.decomposeGradSum = decomposeGradSum loss = builder.aiGraphcore.identityloss([actIn]) if doSharding == True: builder.virtualGraph(loss, numLayers - 1) opts.virtualGraphMode = popart.VirtualGraphMode.Manual numIpus = numLayers else: numIpus = 1 device = tu.create_test_device(numIpus=numIpus) session = popart.TrainingSession(fnModel=builder.getModelProto(), dataFlow=popart.DataFlow(1, [w0]), deviceInfo=device, optimizer=popart.ConstSGD(0.1), loss=loss, userOptions=opts) anchors = session.initAnchorArrays() np.random.seed(1) # ensure same input vals between sessions inputs = {in0: np.random.rand(*shape).astype('float32')} stepio = popart.PyStepIO(inputs, anchors) session.prepareDevice() session.weightsFromHost() session.run(stepio) return anchors[w0]
def getAnchors(extraReduction): builder = popart.Builder() ip = builder.addInitializedInputTensor(ip_data) lb = builder.addInputTensor("INT32", lshape) sm = builder.aiOnnx.softmax([ip], axis=np.size(lshape)) if extraReduction == True: nll = builder.aiGraphcore.nllloss( [sm, lb], reduction=popart.ReductionType.NoReduction) loss = builder.aiOnnx.reducesum([nll]) else: loss = builder.aiGraphcore.nllloss( [sm, lb], reduction=popart.ReductionType.Sum) anchors = [popart.reservedGradientPrefix() + ip] # Always test 'loss' too, except for when we want to test with # the SoftmaxGradDirect pattern, which requires 'loss' to be # anchored if 'SoftmaxGradDirect' not in patternsList or 'NlllWithSoftmaxGradDirect' in patternsList: anchors.append(loss) session = popart.TrainingSession( fnModel=builder.getModelProto(), loss=loss, dataFlow=popart.DataFlow(1, anchors), optimizer=popart.ConstSGD(0.1), deviceInfo=tu.create_test_device(), patterns=popart.Patterns(patternsList)) session.prepareDevice() session.weightsFromHost() anchors = session.initAnchorArrays() stepio = popart.PyStepIO({lb: lb_data.astype(np.int32)}, anchors) session.run(stepio) return anchors
def getAnchors(extraReduction): builder = popart.Builder() ip = builder.addInitializedInputTensor(ip_data) if extraReduction == True: l1 = builder.aiGraphcore.l1loss( [ip], 0.1, reduction=popart.ReductionType.NoReduction) loss = builder.aiOnnx.reducesum([l1]) else: loss = builder.aiGraphcore.l1loss( [ip], 0.1, reduction=popart.ReductionType.Sum) anchors = [loss, popart.reservedGradientPrefix() + ip] session = popart.TrainingSession( fnModel=builder.getModelProto(), loss=loss, dataFlow=popart.DataFlow(1, anchors), optimizer=popart.ConstSGD(0.1), deviceInfo=tu.create_test_device(), patterns=popart.Patterns( popart.PatternsLevel.NoPatterns).enableRuntimeAsserts( False)) session.prepareDevice() session.weightsFromHost() anchors = session.initAnchorArrays() stepio = popart.PyStepIO({}, anchors) session.run(stepio) return anchors
def test_implicit_recompute_op_scheduled_pre_loss_no(): """ Regression test for T36828. Confirm that compilation completes without an exception being thrown. It is possible that the MulGrad op that produces Gradient___t3 is scheduled early (e.g. at index 0 in the schedule). If this happens, all ops after it in the schedule are classified as 'post loss'. The matmul operation is recomputed in the backwards pass. The implicit recomputation setting forbids that an op to be recomputed is a 'post loss' op. """ builder = popart.Builder() t0 = builder.addInputTensor("FLOAT", [2, 2]) t1 = builder.addInitializedInputTensor( np.random.rand(2, 2).astype(np.float32)) t2 = builder.aiOnnx.matmul([t0, t1]) t3 = builder.aiGraphcore.l1loss([t2], 0.1) const = np.array([4]).astype(np.float32) t5 = builder.aiOnnx.constant(const) t6 = builder.aiOnnx.mul([t3, t5]) builder.recomputeOutputInBackwardPass(t2) session = popart.TrainingSession(deviceInfo=tu.create_test_device(), fnModel=builder.getModelProto(), dataFlow=popart.DataFlow(1, []), loss=t6, optimizer=popart.SGD( {"lossScaling": (2.0, False)})) session.prepareDevice()
def run_test(): proto, data, x, loss = model() options = popart.SessionOptions() dataFlow = popart.DataFlow(1, {x: popart.AnchorReturnType("ALL")}) with tu.create_test_device() as device: session = popart.TrainingSession(fnModel=proto, dataFlow=dataFlow, deviceInfo=device, userOptions=options, loss=loss, optimizer=popart.ConstSGD(0.01)) session.prepareDevice() session.weightsFromHost() anchors = session.initAnchorArrays() stepio = popart.PyStepIO(data, anchors) session.run(stepio) ir = json.loads( session._serializeIr(popart.IrSerializationFormat.JSON)) dropouts = [ op for op in ir['maingraph'] if op['type'] == 'Dropout' ] assert (len(dropouts) <= 2) device.detach() return anchors[x]
def test_matmul_serialization_invalid_factor(tmpdir): lhs_shape = [2, 2] rhs_shape = [2, 4] lhs_data = np.random.rand(*lhs_shape).astype(np.float32) rhs_data = np.random.rand(*rhs_shape).astype(np.float32) builder = popart.Builder() lhs = builder.addInputTensor(popart.TensorInfo("FLOAT", lhs_shape), "lhs") rhs = builder.addInputTensor(popart.TensorInfo("FLOAT", rhs_shape), "rhs") o = builder.aiOnnx.matmul([lhs, rhs]) builder.setSerializeMatMul({o}, "output_channels", 3) builder.addOutputTensor(o) proto = builder.getModelProto() dataFlow = popart.DataFlow(1, {o: popart.AnchorReturnType("All")}) opts = getBaseOptions() pat = popart.Patterns(['MatMulOp', 'MatMulRhsGradOp', 'MatMulLhsGradOp']) with pytest.raises(popart.popart_exception) as e_info: session = popart.InferenceSession( fnModel=proto, dataFlow=dataFlow, userOptions=opts, patterns=pat, deviceInfo=tu.create_test_device(opts={"compileIPUCode": False})) assert (e_info.value.args[0].startswith( "Invalid serialisation factor 3 for output channels dim 4. output_channels dim should be a multple of the serialisation factor" ))
def test_abort_conditional(): input_data = np.array([0], dtype=np.float32) builder = popart.Builder() t = builder.addInitializedInputTensor(input_data, "input") o = builder.aiOnnx.abs([t]) builder.aiGraphcore.abort([o]) builder.addOutputTensor(o) proto = builder.getModelProto() dataFlow = popart.DataFlow(1, {o: popart.AnchorReturnType("All")}) opts = popart.SessionOptions() device = tu.create_test_device() session = popart.InferenceSession(fnModel=proto, dataFlow=dataFlow, userOptions=opts, deviceInfo=device) session.prepareDevice() anchors = session.initAnchorArrays() inputs = {} stepio = popart.PyStepIO(inputs, anchors) session.run(stepio)
def test_abort_unconditional(): input_data = np.array(range(10), dtype=np.float32) builder = popart.Builder() t = builder.addInitializedInputTensor(input_data, "input") o = builder.aiOnnx.abs([t]) builder.aiGraphcore.abort([]) builder.addOutputTensor(o) proto = builder.getModelProto() dataFlow = popart.DataFlow(1, {o: popart.AnchorReturnType("All")}) opts = popart.SessionOptions() device = tu.create_test_device() session = popart.InferenceSession(fnModel=proto, dataFlow=dataFlow, userOptions=opts, deviceInfo=device) session.prepareDevice() anchors = session.initAnchorArrays() inputs = {} stepio = popart.PyStepIO(inputs, anchors) with pytest.raises(popart.poplar_runtime_error) as e_info: session.run(stepio) assert (e_info.value.args[0].startswith("Abort Program"))
def test_basic_mapping(tmpdir): builder = popart.Builder() shape = popart.TensorInfo("FLOAT", [512]) i1 = builder.addInputTensor(shape) i2 = builder.addInputTensor(shape) o = builder.aiOnnx.add([i1, i2]) builder.addOutputTensor(o) proto = builder.getModelProto() dataFlow = popart.DataFlow(1, {o: popart.AnchorReturnType("All")}) session = popart.InferenceSession( fnModel=proto, dataFlow=dataFlow, deviceInfo=tu.create_test_device(opts={"compileIPUCode": False})) anchors = session.initAnchorArrays() session.prepareDevice() m = session.getTensorTileMap() # get tile mappings of i1 and i2 as a Dict[Tile, Intervals] i1_map = {t: i for t, i in enumerate(m[i1]) if i} i2_map = {t: i for t, i in enumerate(m[i2]) if i} # i1 and i2 maps should not share any tiles assert set(i1_map.keys()).isdisjoint(set(i2_map.keys()))
def test_virtual_graph(): popart.getLogger().setLevel("TRACE") builder = popart.Builder() i1 = builder.addInputTensor(popart.TensorInfo("FLOAT", [1])) i2 = builder.addInputTensor(popart.TensorInfo("FLOAT", [1])) i3 = builder.addInputTensor(popart.TensorInfo("FLOAT", [1])) i4 = builder.addInputTensor(popart.TensorInfo("FLOAT", [1])) o1 = builder.aiOnnx.add([i1, i2]) o2 = builder.aiOnnx.add([i3, i4]) o = builder.aiOnnx.add([o1, o2]) builder.addOutputTensor(o) builder.virtualGraph(o1, 1) builder.virtualGraph(o2, 1) builder.virtualGraph(o, 1) proto = builder.getModelProto() dataFlow = popart.DataFlow(1, {o: popart.AnchorReturnType("All")}) opts = popart.SessionOptions() opts.virtualGraphMode = popart.VirtualGraphMode.Manual s = popart.InferenceSession(fnModel=proto, dataFlow=dataFlow, userOptions=opts, deviceInfo=tu.create_test_device(numIpus=2)) s.prepareDevice() pass
def run_ir(ir: pir.Ir, y: pir.Tensor): ir_ = ir._pb_ir # Internal ir y_d2h = pir.d2h_stream(y.shape, y.dtype, name="y_stream") ops.host_store(y_d2h, y) y_id = y_d2h.tensor_id() dataFlow = popart.DataFlow( batchesPerStep=1, anchorTensors={y_id: popart.AnchorReturnType("All")}) ir_.setDataFlow(dataFlow) opts = ir_.getSessionOptions() opts.useHostCopyOps = True opts.enableExplicitMainLoops = True opts.aliasZeroCopy = True opts.explicitRecomputation = True ir_.updateVertices() session = popart.InferenceSession.fromIr( ir=ir_, deviceInfo=tu.create_test_device()) session.prepareDevice() # Create buffers for anchors anchors = session.initAnchorArrays() # Run the model stepio = popart.PyStepIO(inputs={}, outputs=anchors) session.weightsFromHost() session.run(stepio) y_ = anchors[y_id] return y_
def test_engine_options_passed_to_engine(tmpdir): popart.getLogger().setLevel("DEBUG") builder = popart.Builder() shape = popart.TensorInfo("FLOAT", [1, 2, 32, 32]) i1 = builder.addInputTensor(shape) i2 = builder.addInputTensor(shape) o = builder.aiOnnx.add([i1, i2]) builder.addOutputTensor(o) proto = builder.getModelProto() dataFlow = popart.DataFlow(1, {o: popart.AnchorReturnType("All")}) opts = popart.SessionOptions() opts.engineOptions = {'option': 'value'} session = popart.InferenceSession(fnModel=proto, dataFlow=dataFlow, userOptions=opts, deviceInfo=tu.create_test_device()) session.initAnchorArrays() with pytest.raises(popart.poplar_exception) as e_info: session.prepareDevice() assert (e_info.value.args[0].endswith("Unrecognised option 'option'"))
def test_gru(op_tester): d1 = np.random.randint(0, 20, size=(2, 2, 3)).astype(np.float32) input_size = d1.shape[2] # (2,2,3) hidden_size = 7 d2 = np.random.rand(1, 3 * hidden_size, input_size).astype(np.float32) d3 = np.random.rand(1, 3 * hidden_size, hidden_size).astype(np.float32) def init_builder(builder): i1 = builder.addInputTensor(d1) i2 = builder.addInputTensor(d2) i3 = builder.addInputTensor(d3) Y, Y_h = builder.aiOnnx.gru([i1, i2, i3], 2, clip=None) builder.addOutputTensor(Y_h) return [Y, Y_h] def reference(ref_data): gru = GRU_Helper(X=d1, W=d2, R=d3) Y, Y_h = gru.step() return [Y.astype(np.float32), Y_h.astype(np.float32)] op_tester.atol = 1e-06 op_tester.rtol = 1e-03 op_tester.device = tu.create_test_device() op_tester.run(init_builder, reference, 'infer')
def test_one_ipu(): """ In this test we check that an error is thrown when doing pipelining on 1 IPU """ builder = popart.Builder() shape_d = [10] shape_l = [1] d0 = builder.addInputTensor(popart.TensorInfo("FLOAT", shape_d)) d1 = builder.addInputTensor(popart.TensorInfo("FLOAT", shape_d)) op0_out = builder.aiOnnx.sin([d0], "s0") op1_out = builder.aiOnnx.exp([d1], "r0") op2_out = builder.aiOnnx.mul([op0_out, op1_out], "m0") builder.addOutputTensor(op2_out) opts = popart.SessionOptions() opts.enablePipelining = True opts.virtualGraphMode = popart.VirtualGraphMode.Manual # i.e. use 1 ipu builder.pipelineStage(op0_out, 0) builder.virtualGraph(op0_out, 0) builder.pipelineStage(op1_out, 0) builder.virtualGraph(op1_out, 0) builder.pipelineStage(op2_out, 1) builder.virtualGraph(op2_out, 0) with pytest.raises(popart.popart_exception) as e_info: session = popart.InferenceSession(fnModel=builder.getModelProto(), dataFlow=popart.DataFlow( 10, [op2_out, "loss"]), userOptions=opts, deviceInfo=tu.create_test_device()) session.prepareDevice() assert e_info.value.args[0].startswith("Pipelining requires more than")
def test_average_pool_with_count_include_pad(op_tester): popart.getLogger().setLevel("TRACE") builder = popart.Builder() i1 = builder.addInputTensor(popart.TensorInfo("FLOAT", [1, 1, 14, 14])) o = builder.aiOnnx.averagepool([i1], kernel_shape=[3, 3], count_include_pad=1, pads=[0, 0, 0, 0], strides=[2, 2]) builder.addOutputTensor(o) builder.removeNodeAttribute("count_include_pad", set([o])) builder.addNodeAttribute("count_include_pad", 1, set([o])) dataFlow = popart.DataFlow(1, {o: popart.AnchorReturnType("All")}) optimizer = popart.ConstSGD(0.01) loss = builder.aiGraphcore.l1loss([o], 0.1) proto = builder.getModelProto() opts = popart.SessionOptions() with pytest.raises(popart.popart_exception) as e_info: popart.TrainingSession(fnModel=proto, dataFlow=dataFlow, loss=loss, optimizer=optimizer, userOptions=opts, deviceInfo=tu.create_test_device()) assert (e_info.value.args[0].startswith( "`count_include_pad` is not supported"))
def test_dropout_training4(): dsize = 10 ratio = 0.2 builder = popart.Builder() ip = builder.addInputTensor(popart.TensorInfo("FLOAT", [dsize, dsize])) d__ip = popart.reservedGradientPrefix() + ip [d1] = builder.aiOnnx.dropout([ip], num_outputs=1, ratio=ratio) # Matmul to change the layout -- ensures we are testing the dependency # of random mask on the layout of the 'reference' dropout tensor w = builder.addInitializedInputTensor(np.ones([dsize, dsize], np.float32)) out = builder.aiOnnx.matmul([d1, w]) out = builder.aiGraphcore.identityloss([out]) builder.addOutputTensor(out) device = tu.create_test_device() session, anchors = get_session(anchorIds=[d1, d__ip], proto=builder.getModelProto(), device=device, loss=out) # Ensure inputs in range [1.0, 2.0] to ensure comparing with 0 is valid ip_data = np.random.random_sample((dsize, dsize)).astype(np.float32) + 1 stepio = popart.PyStepIO({ip: ip_data}, anchors) session.run(stepio) for fwdEl, bwdEl in zip(np.ndarray.flatten(anchors[d1]), np.ndarray.flatten(anchors[d__ip])): if fwdEl == 0: assert bwdEl == 0 if bwdEl != 0: assert fwdEl != 0
def run_lstm_popart(onnx_file_name, inputs): # generate a popart session builder = popart.Builder(onnx_file_name) outputs = builder.getOutputTensorIds() dataFlow = popart.DataFlow(1, outputs) device = tu.create_test_device(1) s = popart.InferenceSession(fnModel=onnx_file_name, dataFlow=dataFlow, deviceInfo=device) anchor_map = s.initAnchorArrays() s.prepareDevice() h0 = inputs[1] c0 = inputs[2] outs = [] for i in range(inputs[0].shape[0]): input_data = inputs[0][i] input_data = input_data.reshape(1, *input_data.shape) input_map = {'X': input_data, 'initial_h': h0, 'initial_c': c0} stepio = popart.PyStepIO(input_map, anchor_map) s.run(stepio) h0 = anchor_map['Y_h'] c0 = anchor_map['Y_c'] outs.append(np.copy(anchor_map['Y'])) outs = np.concatenate(outs) return (outs, anchor_map['Y_h'], anchor_map['Y_c'])
def test_dropout_training1(): dsize = 10 ratio = 0.2 d1 = np.random.rand(dsize).astype(np.float32) builder = popart.Builder() ip = builder.addInputTensor(popart.TensorInfo("FLOAT", [dsize])) d__ip = popart.reservedGradientPrefix() + ip [o1, o2] = builder.aiOnnx.dropout([ip], num_outputs=2, ratio=ratio) out = builder.aiGraphcore.identityloss([o1]) builder.addOutputTensor(o1) builder.addOutputTensor(o2) session, anchors = get_session(anchorIds=[o1, o2, ip, d__ip], proto=builder.getModelProto(), device=tu.create_test_device(), loss=out) stepio = popart.PyStepIO({ip: d1}, anchors) session.run(stepio) # d1 * mask * (1/(1-ratio)) should give the same answer as popart implementation reference = d1 * anchors[o2] * (1 / (1 - ratio)) assert (np.isclose(anchors[o1], reference)).all()
def test_lstm_export_with_constantofshape(tmpdir): np.random.seed(42) torch.manual_seed(43) class RNNNet(torch.nn.Module): def __init__(self): super(RNNNet, self).__init__() hidden_size = 8 input_size = 18 self.lstm = torch.nn.LSTM(input_size=input_size, hidden_size=hidden_size, batch_first=True) def forward(self, x): x, (h, c) = self.lstm(x) return x net = RNNNet() np_data = np.random.rand(1, 100, 18).astype(np.float32) torch_data = torch.from_numpy(np_data) torchOutput = net(torch_data).detach().numpy() export_name = str(tmpdir / "lstm_small_repro.onnx") torch.onnx.export(net, torch_data, export_name, verbose=True, input_names=['data'], output_names=['tag']) # Verify this model contains a ConstantOfShape op. model = onnx.load(export_name) nodes = model.graph.node nodes = [i for i in nodes if i.op_type == 'ConstantOfShape'] assert len(nodes) > 0 inputShapeInfo = popart.InputShapeInfo() inputShapeInfo.add("data", popart.TensorInfo("FLOAT", [1, 100, 18])) anchors = {"tag": popart.AnchorReturnType("All")} dataFlow = popart.DataFlow(1, anchors) device = tu.create_test_device() session = popart.InferenceSession(export_name, dataFlow, device, inputShapeInfo=inputShapeInfo) session.prepareDevice() inferenceAnchors = session.initAnchorArrays() stepio = popart.PyStepIO({"data": np_data}, inferenceAnchors) session.run(stepio) popartOutput = inferenceAnchors['tag'] assert torchOutput.shape == popartOutput.shape assert np.allclose(torchOutput, popartOutput, atol=1e-07)
def test_randomuniform_repeatable(dtypes): seed = 8 builder = popart.Builder() out = builder.aiOnnx.randomuniform(shape=[10, 1], dtype=dtypes[1]) builder.addOutputTensor(out) session = popart.InferenceSession( fnModel=builder.getModelProto(), dataFlow=popart.DataFlow(1, {out: popart.AnchorReturnType("All")}), patterns=popart.Patterns(popart.PatternsLevel.All), deviceInfo=tu.create_test_device()) session.prepareDevice() session.setRandomSeed(seed) session.weightsFromHost() anchors = session.initAnchorArrays() stepio = popart.PyStepIO({}, anchors) session.run(stepio) # need to copy the anchor as the next call to run will overwrite the data run1_out = np.copy(anchors[out]) # Reset the seed to the same value and run the session again session.setRandomSeed(seed) session.run(stepio) run2_out = np.copy(anchors[out]) assert np.array_equal(run1_out, run2_out)
def run_test(expected_dot_file_count): builder = popart.Builder() shape = popart.TensorInfo("FLOAT", [1]) i1 = builder.addInputTensor(shape) o = builder.aiOnnx.identity([i1]) builder.addOutputTensor(o) proto = builder.getModelProto() dataFlow = popart.DataFlow(1, {o: popart.AnchorReturnType("All")}) opts = popart.SessionOptions() with tempfile.TemporaryDirectory() as tmpdir: opts.logDir = tmpdir session = popart.InferenceSession( fnModel=proto, dataFlow=dataFlow, userOptions=opts, deviceInfo=tu.create_test_device()) dotFiles = list(Path(tmpdir).glob('*.dot')) assert len(dotFiles) == expected_dot_file_count