예제 #1
0
def test_acts_match_restored_acts():
    """
    In this test we check that the stashed tensors and their equivalent
    Restored tensors have the same values for all batches. This confirms
    that the schedule of restoring and streaming anchors is correct

    How do we know they're not both wrong? Take this example where the
    streamed input is stashed. Check that it matches the raw data input
    that is fed to the StepIO
    """
    bps = 8
    pipelined_anchors = get_model_anchors(doSharding=True,
                                          doPipelining=True,
                                          batchesPerStep=bps,
                                          doTraining=True,
                                          anchorRestoredTensors=True,
                                          returnRawInput=True)

    for (tId, t) in pipelined_anchors.items():
        for i in range(np.shape(t)[0]):
            print("batch: ", i, tId, np.sum(t[i]))

    assert np.allclose(
        pipelined_anchors[popart.reservedRestoredPrefix() + "Exp:0"],
        pipelined_anchors["Exp:0"])
    assert np.allclose(
        pipelined_anchors[popart.reservedRestoredPrefix() + "input"],
        pipelined_anchors["input"])
    assert np.allclose(pipelined_anchors["input_raw"],
                       pipelined_anchors["input"])
예제 #2
0
def get_model_anchors(doSharding,
                      doPipelining,
                      batchesPerStep,
                      doTraining,
                      doProfiling=False,
                      doDevicex=True,
                      anchorRestoredTensors=False,
                      returnRawInput=False):
    np.random.seed(seed=1)

    builder = popart.Builder()
    batchSize = 2
    shape_d0 = [batchSize, 2, 4, 4]
    shape_l0 = [batchSize]
    d0 = builder.addInputTensor(popart.TensorInfo("FLOAT", shape_d0))
    data_w0 = np.ones(shape=[2, 2, 3, 3]).astype(np.float32)
    w0 = builder.addInitializedInputTensor(data_w0)
    l0 = builder.addInputTensor(popart.TensorInfo("INT32", shape_l0))

    s0 = builder.aiOnnx.sin([d0], "s0")
    e0 = builder.aiOnnx.exp([s0], "e0")
    c0 = builder.aiOnnx.conv([e0, w0],
                             dilations=[1, 1],
                             pads=[1, 1, 1, 1],
                             strides=[1, 1],
                             debugPrefix="c0")
    r0 = builder.reshape_const(builder.aiOnnx, [c0], [batchSize, 32])
    out = builder.aiOnnx.softmax([r0], axis=1, debugPrefix="sfm")
    nll = builder.aiGraphcore.nllloss([out, l0])

    art = popart.AnchorReturnType("All")

    anchor_map = {nll: art, w0: art, e0: art}
    if doTraining is True:
        anchor_map[popart.reservedGradientPrefix() + d0] = art
        if doPipelining is True and anchorRestoredTensors is True:
            anchor_map[popart.reservedRestoredPrefix() + e0] = art
            anchor_map[d0] = art
            anchor_map[popart.reservedRestoredPrefix() + d0] = art

    opts = popart.SessionOptions()
    opts.reportOptions = {"showExecutionSteps": "true"}
    opts.enablePipelining = doPipelining

    if doSharding is False:
        numIPUs = 1
    else:
        opts.virtualGraphMode = popart.VirtualGraphMode.Manual
        numIPUs = 3
        builder.virtualGraph(s0, 0)
        builder.virtualGraph(e0, 1)
        builder.virtualGraph(c0, 1)
        builder.virtualGraph(r0, 2)
        builder.virtualGraph(out, 2)
        builder.virtualGraph(nll, 2)

    if doTraining is True:
        session = popart.TrainingSession(
            fnModel=builder.getModelProto(),
            dataFlow=popart.DataFlow(batchesPerStep, anchor_map),
            loss=nll,
            optimizer=popart.ConstSGD(0.01),
            userOptions=opts,
            deviceInfo=tu.create_test_device(numIpus=numIPUs, tilesPerIpu=20))
    else:
        session = popart.InferenceSession(
            fnModel=builder.getModelProto(),
            dataFlow=popart.DataFlow(batchesPerStep, anchor_map),
            userOptions=opts,
            deviceInfo=tu.create_test_device(numIpus=numIPUs, tilesPerIpu=20))

    if doDevicex is False:
        return None

    anchors = session.initAnchorArrays()
    session.prepareDevice()

    if batchesPerStep > 1:
        shape_d0.insert(0, batchesPerStep)
        shape_l0.insert(0, batchesPerStep)
    data = np.random.uniform(low=-10.0, high=10.0,
                             size=shape_d0).astype(np.float32)
    classes = np.prod(shape_d0) / (batchSize * batchesPerStep)
    label = np.random.randint(low=0, high=classes,
                              size=shape_l0).astype(np.int32)

    inputs = {d0: data, l0: label}
    stepio = popart.PyStepIO(inputs, anchors)

    session.weightsFromHost()

    session.run(stepio)

    if doProfiling is True:
        from gcprofile import save_popart_report
        save_popart_report(session)

    if returnRawInput is True:
        anchors["input_raw"] = data

    return anchors
예제 #3
0
def get_model_anchors_model1(doSharding,
                             doPipelining,
                             batchesPerStep,
                             doTraining,
                             doGradAccl=False,
                             gradAcclFactor=1,
                             doProfiling=False,
                             doDevicex=True,
                             anchorRestoredTensors=False,
                             labelArray=None):
    micro_batch_size = batch_size // gradAcclFactor
    builder = popart.Builder()

    input_shape = [micro_batch_size, hidden_size]
    input_ = builder.addInputTensor(popart.TensorInfo("FLOAT", input_shape))

    x = input_
    with builder.virtualGraph(0):
        for i in range(2):
            w = builder.addInitializedInputTensor(
                np.ones([hidden_size, hidden_size]).astype(np.float32),
                f"weight_0_{i}")
            x = builder.aiOnnx.matmul([x, w])
    with builder.virtualGraph(1 if doSharding else 0):
        for i in range(2):
            w = builder.addInitializedInputTensor(
                np.ones([hidden_size, hidden_size]).astype(np.float32),
                f"weight_1_{i}")
            x = builder.aiOnnx.matmul([x, w])
    with builder.virtualGraph(2 if doSharding else 0):
        for i in range(2):
            w = builder.addInitializedInputTensor(
                np.ones([hidden_size, hidden_size]).astype(np.float32),
                f"weight_2_{i}")
            if i == 1: w0 = w
            x = builder.aiOnnx.matmul([x, w])
        label = builder.addInputTensor("INT32", [micro_batch_size])
        x = builder.aiGraphcore.nllloss([x, label])

    output = x

    builder.addOutputTensor(output)

    art = popart.AnchorReturnType("All")
    anchor_map = {x: art, w0: art}
    if doTraining is True:
        anchor_map[popart.reservedGradientPrefix() + x] = art
        if doPipelining is True and anchorRestoredTensors is True:
            anchor_map[popart.reservedRestoredPrefix() + x] = art
            anchor_map[popart.reservedRestoredPrefix() + w0] = art

    opts = popart.SessionOptions()
    opts.reportOptions = {"showExecutionSteps": "true"}
    opts.enablePipelining = doPipelining
    opts.enableGradientAccumulation = doGradAccl
    opts.accumulationFactor = gradAcclFactor
    opts.virtualGraphMode = popart.VirtualGraphMode.Manual

    if doSharding is False:
        numIPUs = 1
    else:
        numIPUs = 3

    if doTraining is True:
        session = popart.TrainingSession(
            fnModel=builder.getModelProto(),
            dataFlow=popart.DataFlow(batchesPerStep, anchor_map),
            loss=output,
            optimizer=popart.ConstSGD(0.01),
            userOptions=opts,
            deviceInfo=tu.create_test_device(numIpus=numIPUs))
    else:
        session = popart.InferenceSession(
            fnModel=builder.getModelProto(),
            dataFlow=popart.DataFlow(batchesPerStep, anchor_map),
            userOptions=opts,
            deviceInfo=tu.create_test_device(numIpus=numIPUs))

    if doDevicex is False:
        return None

    anchors = session.initAnchorArrays()
    session.prepareDevice()

    outer_dim = 1
    if batchesPerStep > 1:
        # Add an outer dimension of batchesPerStep. We repeat the labels
        # as we want consistency if we have different shape inputs between examples.
        outer_dim *= batchesPerStep
        labelArray = np.repeat(labelArray[np.newaxis], batchesPerStep, 0)
    if gradAcclFactor > 1:
        # Divide up the batches per step batches into gradAcclFactor * batchesPerStep
        # samples.
        outer_dim *= gradAcclFactor
        labelArray = labelArray.reshape([gradAcclFactor * batchesPerStep, -1])
    if outer_dim > 1:
        # Add the gradAcclFactor * batchesPerStep dimension into the input.
        input_shape = [outer_dim] + input_shape

    stepio = popart.PyStepIO(
        {
            input_: np.ones(input_shape, np.float32),
            label: labelArray.astype(np.int32)
        }, anchors)

    session.weightsFromHost()

    session.run(stepio)

    return anchors
예제 #4
0
def get_model_anchors_model2(doSharding,
                             doPipelining,
                             batchesPerStep,
                             doTraining,
                             doGradAccl=False,
                             gradAcclFactor=1,
                             doProfiling=False,
                             doDevicex=True,
                             anchorRestoredTensors=False,
                             returnRawInput=False,
                             labelArray=None):

    np.random.seed(1234)
    builder = popart.Builder()
    micro_batch_size = batch_size // gradAcclFactor

    shape_d0 = [micro_batch_size, 2, 4, 4]
    shape_l0 = [batch_size]
    d0 = builder.addInputTensor(popart.TensorInfo("FLOAT", shape_d0), "inp")
    data_w0 = np.ones(shape=[2, 2, 3, 3]).astype(np.float32)
    w0 = builder.addInitializedInputTensor(data_w0, "weights")

    s0 = builder.aiOnnx.sin([d0], "s0")
    e0 = builder.aiOnnx.exp([s0], "e0")
    c0 = builder.aiOnnx.conv([e0, w0],
                             dilations=[1, 1],
                             pads=[1, 1, 1, 1],
                             strides=[1, 1],
                             debugContext="c0")
    r0 = builder.reshape_const(builder.aiOnnx, [c0], [micro_batch_size, 32])
    out = builder.aiOnnx.softmax([r0], axis=1, debugContext="sfm")

    label_shape = [micro_batch_size]
    l0 = builder.addInputTensor(popart.TensorInfo("INT32", label_shape),
                                "label")
    nll = builder.aiGraphcore.nllloss([out, l0])

    art = popart.AnchorReturnType("All")

    anchor_map = {nll: art, w0: art, e0: art, s0: art, c0: art}
    if doTraining is True:
        anchor_map[popart.reservedGradientPrefix() + d0] = art
        if doPipelining is True and anchorRestoredTensors is True:
            anchor_map[popart.reservedRestoredPrefix() + e0] = art
            anchor_map[d0] = art
            anchor_map[popart.reservedRestoredPrefix() + d0] = art

    opts = popart.SessionOptions()
    opts.reportOptions = {"showExecutionSteps": "true"}
    opts.enablePipelining = doPipelining
    opts.enableGradientAccumulation = doGradAccl
    opts.accumulationFactor = gradAcclFactor

    if doSharding is False:
        numIPUs = 1
    else:
        opts.virtualGraphMode = popart.VirtualGraphMode.Manual
        numIPUs = 3
        builder.virtualGraph(s0, 0)
        builder.virtualGraph(e0, 1)
        builder.virtualGraph(c0, 1)
        builder.virtualGraph(r0, 2)
        builder.virtualGraph(out, 2)
        builder.virtualGraph(nll, 2)

    if doTraining is True:
        session = popart.TrainingSession(
            fnModel=builder.getModelProto(),
            dataFlow=popart.DataFlow(batchesPerStep, anchor_map),
            loss=nll,
            optimizer=popart.ConstSGD(0.01),
            userOptions=opts,
            deviceInfo=tu.create_test_device(numIpus=numIPUs))
    else:
        session = popart.InferenceSession(
            fnModel=builder.getModelProto(),
            dataFlow=popart.DataFlow(batchesPerStep, anchor_map),
            userOptions=opts,
            deviceInfo=tu.create_test_device(numIpus=numIPUs))

    if doDevicex is False:
        return None

    anchors = session.initAnchorArrays()
    session.prepareDevice()

    classes = np.prod(shape_d0) / (micro_batch_size * batchesPerStep)
    label = np.random.randint(low=0, high=classes,
                              size=shape_l0).astype(np.int32)

    outer_dim = 1
    if batchesPerStep > 1:
        # Add an outer dimension of batchesPerStep. We repeat the labels
        # as we want consistency if we have different shape inputs between examples.
        outer_dim *= batchesPerStep
        label = np.repeat(label[np.newaxis], batchesPerStep, 0)
    if gradAcclFactor > 1:
        # Divide up the batches per step batches into gradAcclFactor * batchesPerStep
        # samples.
        outer_dim *= gradAcclFactor
        label = label.reshape([gradAcclFactor * batchesPerStep, -1])
    if outer_dim > 1:
        # Add the gradAcclFactor * batchesPerStep dimension into the input.
        shape_d0.insert(0, outer_dim)
    data = np.ones(shape=shape_d0).astype(np.float32)

    inputs = {d0: data, l0: label}
    stepio = popart.PyStepIO(inputs, anchors)

    session.weightsFromHost()

    for i in range(6):
        session.run(stepio)

    if returnRawInput is True:
        anchors["input_raw"] = data

    return anchors
예제 #5
0
def get_model_anchors(doSharding,
                      doPipelining,
                      batchesPerStep,
                      doTraining,
                      replicated_graph_count=1,
                      doProfiling=False,
                      doDropout=False,
                      doGradientAccl=False,
                      acclSteps=1,
                      doDevicex=True,
                      anchorRestoredTensors=False,
                      returnRawInput=False):
    np.random.seed(seed=1)

    builder = popart.Builder()
    batchSize = 16
    microBatchSize = batchSize // acclSteps

    shape_d0 = [microBatchSize, 2, 4, 4]
    shape_l0 = [microBatchSize]

    d0 = builder.addInputTensor(popart.TensorInfo("FLOAT", shape_d0))
    data_w0 = np.ones(shape=[2, 2, 3, 3]).astype(np.float32)
    w0 = builder.addInitializedInputTensor(data_w0)
    l0 = builder.addInputTensor(popart.TensorInfo("INT32", shape_l0))

    s0 = builder.aiOnnx.sin([d0], "s0")
    e0 = builder.aiOnnx.exp([s0], "e0")
    c0 = builder.aiOnnx.conv([e0, w0],
                             dilations=[1, 1],
                             pads=[1, 1, 1, 1],
                             strides=[1, 1],
                             debugContext="c0")
    r0 = builder.reshape_const(builder.aiOnnx, [c0], [microBatchSize, 32])
    if doDropout:
        do0 = builder.aiOnnx.dropout([r0], num_outputs=1, ratio=0.2)[0]
        out = builder.aiOnnx.softmax([do0], axis=1, debugContext="sfm")
    else:
        out = builder.aiOnnx.softmax([r0], axis=1, debugContext="sfm")
    nll = builder.aiGraphcore.nllloss([out, l0],
                                      reduction=popart.ReductionType.Sum)

    art = popart.AnchorReturnType("All")

    anchor_map = {nll: art, w0: art, e0: art}
    if doTraining is True:
        anchor_map[popart.reservedGradientPrefix() + d0] = art
        if doPipelining is True and anchorRestoredTensors is True:
            anchor_map[popart.reservedRestoredPrefix() + e0] = art
            anchor_map[d0] = art
            anchor_map[popart.reservedRestoredPrefix() + d0] = art

    opts = popart.SessionOptions()
    opts.reportOptions = {"showExecutionSteps": "true"}
    opts.enablePipelining = doPipelining
    opts.enableGradientAccumulation = doGradientAccl
    opts.accumulationFactor = acclSteps
    opts.enableStochasticRounding = False

    if doSharding is False:
        numIpus = 1 * replicated_graph_count
    else:
        opts.virtualGraphMode = popart.VirtualGraphMode.Manual
        numIpus = 2 * replicated_graph_count
        builder.virtualGraph(s0, 0)
        builder.virtualGraph(e0, 0)
        builder.virtualGraph(c0, 0)
        builder.virtualGraph(r0, 1)
        if doDropout:
            builder.virtualGraph(do0, 1)
        builder.virtualGraph(out, 1)
        builder.virtualGraph(nll, 1)

    if replicated_graph_count > 1:
        opts.replicatedGraphCount = replicated_graph_count
        opts.enableReplicatedGraphs = True

    device = tu.create_test_device(numIpus=numIpus)

    if doTraining is True:
        session = popart.TrainingSession(fnModel=builder.getModelProto(),
                                         dataFlow=popart.DataFlow(
                                             batchesPerStep, anchor_map),
                                         loss=nll,
                                         optimizer=popart.ConstSGD(0.01),
                                         userOptions=opts,
                                         deviceInfo=device)
    else:
        session = popart.InferenceSession(fnModel=builder.getModelProto(),
                                          dataFlow=popart.DataFlow(
                                              batchesPerStep, anchor_map),
                                          userOptions=opts,
                                          deviceInfo=device)

    if doDevicex is False:
        return None

    session.prepareDevice()
    anchors = session.initAnchorArrays()
    session.setRandomSeed(0)

    classes = np.prod(shape_d0) // (batchSize * batchesPerStep)

    label = np.random.randint(low=0, high=classes,
                              size=shape_l0).astype(np.int32)

    # With all options enabled return anchors are of the shape:
    # [batches_per_step, accl_factor, repl_factor, micro_batch, *data_shape]
    if acclSteps > 1:
        shape_d0.insert(0, acclSteps)
        label = label.reshape([acclSteps, -1])
    if batchesPerStep > 1:
        shape_d0.insert(0, batchesPerStep)
        label = np.repeat(label[np.newaxis], batchesPerStep, 0)

    data = np.random.random_sample(shape_d0).astype(np.float32)

    # This is a slightly odd case - we want the same data to be input for both
    # replicated graphs, but the dimension we need to repeat on is either the
    # first or second (the replication dimension) depending on whether we
    # have gradient accumulation enabled.
    # If we are not testing, this is a lot simpler as we can split samples however
    # we want.
    if replicated_graph_count > 1:
        if acclSteps > 1:
            data = np.repeat(data[np.newaxis], replicated_graph_count, 2)
            label = label.reshape([replicated_graph_count, -1])
        else:
            data = np.repeat(data[np.newaxis], replicated_graph_count, 1)
            label = label.reshape([replicated_graph_count, -1])

    inputs = {d0: data, l0: label}
    stepio = popart.PyStepIO(inputs, anchors)
    stepio.enableRuntimeAsserts(False)

    session.weightsFromHost()

    session.run(stepio)

    if doProfiling is True:
        from gcprofile import save_popart_report
        save_popart_report(session)

    if returnRawInput is True:
        anchors["input_raw"] = data

    return anchors