def test_acts_match_restored_acts(): """ In this test we check that the stashed tensors and their equivalent Restored tensors have the same values for all batches. This confirms that the schedule of restoring and streaming anchors is correct How do we know they're not both wrong? Take this example where the streamed input is stashed. Check that it matches the raw data input that is fed to the StepIO """ bps = 8 pipelined_anchors = get_model_anchors(doSharding=True, doPipelining=True, batchesPerStep=bps, doTraining=True, anchorRestoredTensors=True, returnRawInput=True) for (tId, t) in pipelined_anchors.items(): for i in range(np.shape(t)[0]): print("batch: ", i, tId, np.sum(t[i])) assert np.allclose( pipelined_anchors[popart.reservedRestoredPrefix() + "Exp:0"], pipelined_anchors["Exp:0"]) assert np.allclose( pipelined_anchors[popart.reservedRestoredPrefix() + "input"], pipelined_anchors["input"]) assert np.allclose(pipelined_anchors["input_raw"], pipelined_anchors["input"])
def get_model_anchors(doSharding, doPipelining, batchesPerStep, doTraining, doProfiling=False, doDevicex=True, anchorRestoredTensors=False, returnRawInput=False): np.random.seed(seed=1) builder = popart.Builder() batchSize = 2 shape_d0 = [batchSize, 2, 4, 4] shape_l0 = [batchSize] d0 = builder.addInputTensor(popart.TensorInfo("FLOAT", shape_d0)) data_w0 = np.ones(shape=[2, 2, 3, 3]).astype(np.float32) w0 = builder.addInitializedInputTensor(data_w0) l0 = builder.addInputTensor(popart.TensorInfo("INT32", shape_l0)) s0 = builder.aiOnnx.sin([d0], "s0") e0 = builder.aiOnnx.exp([s0], "e0") c0 = builder.aiOnnx.conv([e0, w0], dilations=[1, 1], pads=[1, 1, 1, 1], strides=[1, 1], debugPrefix="c0") r0 = builder.reshape_const(builder.aiOnnx, [c0], [batchSize, 32]) out = builder.aiOnnx.softmax([r0], axis=1, debugPrefix="sfm") nll = builder.aiGraphcore.nllloss([out, l0]) art = popart.AnchorReturnType("All") anchor_map = {nll: art, w0: art, e0: art} if doTraining is True: anchor_map[popart.reservedGradientPrefix() + d0] = art if doPipelining is True and anchorRestoredTensors is True: anchor_map[popart.reservedRestoredPrefix() + e0] = art anchor_map[d0] = art anchor_map[popart.reservedRestoredPrefix() + d0] = art opts = popart.SessionOptions() opts.reportOptions = {"showExecutionSteps": "true"} opts.enablePipelining = doPipelining if doSharding is False: numIPUs = 1 else: opts.virtualGraphMode = popart.VirtualGraphMode.Manual numIPUs = 3 builder.virtualGraph(s0, 0) builder.virtualGraph(e0, 1) builder.virtualGraph(c0, 1) builder.virtualGraph(r0, 2) builder.virtualGraph(out, 2) builder.virtualGraph(nll, 2) if doTraining is True: session = popart.TrainingSession( fnModel=builder.getModelProto(), dataFlow=popart.DataFlow(batchesPerStep, anchor_map), loss=nll, optimizer=popart.ConstSGD(0.01), userOptions=opts, deviceInfo=tu.create_test_device(numIpus=numIPUs, tilesPerIpu=20)) else: session = popart.InferenceSession( fnModel=builder.getModelProto(), dataFlow=popart.DataFlow(batchesPerStep, anchor_map), userOptions=opts, deviceInfo=tu.create_test_device(numIpus=numIPUs, tilesPerIpu=20)) if doDevicex is False: return None anchors = session.initAnchorArrays() session.prepareDevice() if batchesPerStep > 1: shape_d0.insert(0, batchesPerStep) shape_l0.insert(0, batchesPerStep) data = np.random.uniform(low=-10.0, high=10.0, size=shape_d0).astype(np.float32) classes = np.prod(shape_d0) / (batchSize * batchesPerStep) label = np.random.randint(low=0, high=classes, size=shape_l0).astype(np.int32) inputs = {d0: data, l0: label} stepio = popart.PyStepIO(inputs, anchors) session.weightsFromHost() session.run(stepio) if doProfiling is True: from gcprofile import save_popart_report save_popart_report(session) if returnRawInput is True: anchors["input_raw"] = data return anchors
def get_model_anchors_model1(doSharding, doPipelining, batchesPerStep, doTraining, doGradAccl=False, gradAcclFactor=1, doProfiling=False, doDevicex=True, anchorRestoredTensors=False, labelArray=None): micro_batch_size = batch_size // gradAcclFactor builder = popart.Builder() input_shape = [micro_batch_size, hidden_size] input_ = builder.addInputTensor(popart.TensorInfo("FLOAT", input_shape)) x = input_ with builder.virtualGraph(0): for i in range(2): w = builder.addInitializedInputTensor( np.ones([hidden_size, hidden_size]).astype(np.float32), f"weight_0_{i}") x = builder.aiOnnx.matmul([x, w]) with builder.virtualGraph(1 if doSharding else 0): for i in range(2): w = builder.addInitializedInputTensor( np.ones([hidden_size, hidden_size]).astype(np.float32), f"weight_1_{i}") x = builder.aiOnnx.matmul([x, w]) with builder.virtualGraph(2 if doSharding else 0): for i in range(2): w = builder.addInitializedInputTensor( np.ones([hidden_size, hidden_size]).astype(np.float32), f"weight_2_{i}") if i == 1: w0 = w x = builder.aiOnnx.matmul([x, w]) label = builder.addInputTensor("INT32", [micro_batch_size]) x = builder.aiGraphcore.nllloss([x, label]) output = x builder.addOutputTensor(output) art = popart.AnchorReturnType("All") anchor_map = {x: art, w0: art} if doTraining is True: anchor_map[popart.reservedGradientPrefix() + x] = art if doPipelining is True and anchorRestoredTensors is True: anchor_map[popart.reservedRestoredPrefix() + x] = art anchor_map[popart.reservedRestoredPrefix() + w0] = art opts = popart.SessionOptions() opts.reportOptions = {"showExecutionSteps": "true"} opts.enablePipelining = doPipelining opts.enableGradientAccumulation = doGradAccl opts.accumulationFactor = gradAcclFactor opts.virtualGraphMode = popart.VirtualGraphMode.Manual if doSharding is False: numIPUs = 1 else: numIPUs = 3 if doTraining is True: session = popart.TrainingSession( fnModel=builder.getModelProto(), dataFlow=popart.DataFlow(batchesPerStep, anchor_map), loss=output, optimizer=popart.ConstSGD(0.01), userOptions=opts, deviceInfo=tu.create_test_device(numIpus=numIPUs)) else: session = popart.InferenceSession( fnModel=builder.getModelProto(), dataFlow=popart.DataFlow(batchesPerStep, anchor_map), userOptions=opts, deviceInfo=tu.create_test_device(numIpus=numIPUs)) if doDevicex is False: return None anchors = session.initAnchorArrays() session.prepareDevice() outer_dim = 1 if batchesPerStep > 1: # Add an outer dimension of batchesPerStep. We repeat the labels # as we want consistency if we have different shape inputs between examples. outer_dim *= batchesPerStep labelArray = np.repeat(labelArray[np.newaxis], batchesPerStep, 0) if gradAcclFactor > 1: # Divide up the batches per step batches into gradAcclFactor * batchesPerStep # samples. outer_dim *= gradAcclFactor labelArray = labelArray.reshape([gradAcclFactor * batchesPerStep, -1]) if outer_dim > 1: # Add the gradAcclFactor * batchesPerStep dimension into the input. input_shape = [outer_dim] + input_shape stepio = popart.PyStepIO( { input_: np.ones(input_shape, np.float32), label: labelArray.astype(np.int32) }, anchors) session.weightsFromHost() session.run(stepio) return anchors
def get_model_anchors_model2(doSharding, doPipelining, batchesPerStep, doTraining, doGradAccl=False, gradAcclFactor=1, doProfiling=False, doDevicex=True, anchorRestoredTensors=False, returnRawInput=False, labelArray=None): np.random.seed(1234) builder = popart.Builder() micro_batch_size = batch_size // gradAcclFactor shape_d0 = [micro_batch_size, 2, 4, 4] shape_l0 = [batch_size] d0 = builder.addInputTensor(popart.TensorInfo("FLOAT", shape_d0), "inp") data_w0 = np.ones(shape=[2, 2, 3, 3]).astype(np.float32) w0 = builder.addInitializedInputTensor(data_w0, "weights") s0 = builder.aiOnnx.sin([d0], "s0") e0 = builder.aiOnnx.exp([s0], "e0") c0 = builder.aiOnnx.conv([e0, w0], dilations=[1, 1], pads=[1, 1, 1, 1], strides=[1, 1], debugContext="c0") r0 = builder.reshape_const(builder.aiOnnx, [c0], [micro_batch_size, 32]) out = builder.aiOnnx.softmax([r0], axis=1, debugContext="sfm") label_shape = [micro_batch_size] l0 = builder.addInputTensor(popart.TensorInfo("INT32", label_shape), "label") nll = builder.aiGraphcore.nllloss([out, l0]) art = popart.AnchorReturnType("All") anchor_map = {nll: art, w0: art, e0: art, s0: art, c0: art} if doTraining is True: anchor_map[popart.reservedGradientPrefix() + d0] = art if doPipelining is True and anchorRestoredTensors is True: anchor_map[popart.reservedRestoredPrefix() + e0] = art anchor_map[d0] = art anchor_map[popart.reservedRestoredPrefix() + d0] = art opts = popart.SessionOptions() opts.reportOptions = {"showExecutionSteps": "true"} opts.enablePipelining = doPipelining opts.enableGradientAccumulation = doGradAccl opts.accumulationFactor = gradAcclFactor if doSharding is False: numIPUs = 1 else: opts.virtualGraphMode = popart.VirtualGraphMode.Manual numIPUs = 3 builder.virtualGraph(s0, 0) builder.virtualGraph(e0, 1) builder.virtualGraph(c0, 1) builder.virtualGraph(r0, 2) builder.virtualGraph(out, 2) builder.virtualGraph(nll, 2) if doTraining is True: session = popart.TrainingSession( fnModel=builder.getModelProto(), dataFlow=popart.DataFlow(batchesPerStep, anchor_map), loss=nll, optimizer=popart.ConstSGD(0.01), userOptions=opts, deviceInfo=tu.create_test_device(numIpus=numIPUs)) else: session = popart.InferenceSession( fnModel=builder.getModelProto(), dataFlow=popart.DataFlow(batchesPerStep, anchor_map), userOptions=opts, deviceInfo=tu.create_test_device(numIpus=numIPUs)) if doDevicex is False: return None anchors = session.initAnchorArrays() session.prepareDevice() classes = np.prod(shape_d0) / (micro_batch_size * batchesPerStep) label = np.random.randint(low=0, high=classes, size=shape_l0).astype(np.int32) outer_dim = 1 if batchesPerStep > 1: # Add an outer dimension of batchesPerStep. We repeat the labels # as we want consistency if we have different shape inputs between examples. outer_dim *= batchesPerStep label = np.repeat(label[np.newaxis], batchesPerStep, 0) if gradAcclFactor > 1: # Divide up the batches per step batches into gradAcclFactor * batchesPerStep # samples. outer_dim *= gradAcclFactor label = label.reshape([gradAcclFactor * batchesPerStep, -1]) if outer_dim > 1: # Add the gradAcclFactor * batchesPerStep dimension into the input. shape_d0.insert(0, outer_dim) data = np.ones(shape=shape_d0).astype(np.float32) inputs = {d0: data, l0: label} stepio = popart.PyStepIO(inputs, anchors) session.weightsFromHost() for i in range(6): session.run(stepio) if returnRawInput is True: anchors["input_raw"] = data return anchors
def get_model_anchors(doSharding, doPipelining, batchesPerStep, doTraining, replicated_graph_count=1, doProfiling=False, doDropout=False, doGradientAccl=False, acclSteps=1, doDevicex=True, anchorRestoredTensors=False, returnRawInput=False): np.random.seed(seed=1) builder = popart.Builder() batchSize = 16 microBatchSize = batchSize // acclSteps shape_d0 = [microBatchSize, 2, 4, 4] shape_l0 = [microBatchSize] d0 = builder.addInputTensor(popart.TensorInfo("FLOAT", shape_d0)) data_w0 = np.ones(shape=[2, 2, 3, 3]).astype(np.float32) w0 = builder.addInitializedInputTensor(data_w0) l0 = builder.addInputTensor(popart.TensorInfo("INT32", shape_l0)) s0 = builder.aiOnnx.sin([d0], "s0") e0 = builder.aiOnnx.exp([s0], "e0") c0 = builder.aiOnnx.conv([e0, w0], dilations=[1, 1], pads=[1, 1, 1, 1], strides=[1, 1], debugContext="c0") r0 = builder.reshape_const(builder.aiOnnx, [c0], [microBatchSize, 32]) if doDropout: do0 = builder.aiOnnx.dropout([r0], num_outputs=1, ratio=0.2)[0] out = builder.aiOnnx.softmax([do0], axis=1, debugContext="sfm") else: out = builder.aiOnnx.softmax([r0], axis=1, debugContext="sfm") nll = builder.aiGraphcore.nllloss([out, l0], reduction=popart.ReductionType.Sum) art = popart.AnchorReturnType("All") anchor_map = {nll: art, w0: art, e0: art} if doTraining is True: anchor_map[popart.reservedGradientPrefix() + d0] = art if doPipelining is True and anchorRestoredTensors is True: anchor_map[popart.reservedRestoredPrefix() + e0] = art anchor_map[d0] = art anchor_map[popart.reservedRestoredPrefix() + d0] = art opts = popart.SessionOptions() opts.reportOptions = {"showExecutionSteps": "true"} opts.enablePipelining = doPipelining opts.enableGradientAccumulation = doGradientAccl opts.accumulationFactor = acclSteps opts.enableStochasticRounding = False if doSharding is False: numIpus = 1 * replicated_graph_count else: opts.virtualGraphMode = popart.VirtualGraphMode.Manual numIpus = 2 * replicated_graph_count builder.virtualGraph(s0, 0) builder.virtualGraph(e0, 0) builder.virtualGraph(c0, 0) builder.virtualGraph(r0, 1) if doDropout: builder.virtualGraph(do0, 1) builder.virtualGraph(out, 1) builder.virtualGraph(nll, 1) if replicated_graph_count > 1: opts.replicatedGraphCount = replicated_graph_count opts.enableReplicatedGraphs = True device = tu.create_test_device(numIpus=numIpus) if doTraining is True: session = popart.TrainingSession(fnModel=builder.getModelProto(), dataFlow=popart.DataFlow( batchesPerStep, anchor_map), loss=nll, optimizer=popart.ConstSGD(0.01), userOptions=opts, deviceInfo=device) else: session = popart.InferenceSession(fnModel=builder.getModelProto(), dataFlow=popart.DataFlow( batchesPerStep, anchor_map), userOptions=opts, deviceInfo=device) if doDevicex is False: return None session.prepareDevice() anchors = session.initAnchorArrays() session.setRandomSeed(0) classes = np.prod(shape_d0) // (batchSize * batchesPerStep) label = np.random.randint(low=0, high=classes, size=shape_l0).astype(np.int32) # With all options enabled return anchors are of the shape: # [batches_per_step, accl_factor, repl_factor, micro_batch, *data_shape] if acclSteps > 1: shape_d0.insert(0, acclSteps) label = label.reshape([acclSteps, -1]) if batchesPerStep > 1: shape_d0.insert(0, batchesPerStep) label = np.repeat(label[np.newaxis], batchesPerStep, 0) data = np.random.random_sample(shape_d0).astype(np.float32) # This is a slightly odd case - we want the same data to be input for both # replicated graphs, but the dimension we need to repeat on is either the # first or second (the replication dimension) depending on whether we # have gradient accumulation enabled. # If we are not testing, this is a lot simpler as we can split samples however # we want. if replicated_graph_count > 1: if acclSteps > 1: data = np.repeat(data[np.newaxis], replicated_graph_count, 2) label = label.reshape([replicated_graph_count, -1]) else: data = np.repeat(data[np.newaxis], replicated_graph_count, 1) label = label.reshape([replicated_graph_count, -1]) inputs = {d0: data, l0: label} stepio = popart.PyStepIO(inputs, anchors) stepio.enableRuntimeAsserts(False) session.weightsFromHost() session.run(stepio) if doProfiling is True: from gcprofile import save_popart_report save_popart_report(session) if returnRawInput is True: anchors["input_raw"] = data return anchors