예제 #1
0
def popart_result_and_model(config, weight_transposed, is_bwd=False):
    """Run popart model based on config.

    Args:
        config (BertConfig): Popart config.
        weight_transposed: Construct embedding dict transposed.
        is_bwd (bool, optional): Construct training graph if True,
                                 else inference graph. Defaults to False.

    Returns:
        Tuple: Gathered numpy data, outputs from model, proto, post_proto
    """

    user_options = {}
    popart_model = Bert(config)
    builder = popart_model.builder

    indices_len = config.micro_batch_size * config.sequence_length
    sequence_info = popart.TensorInfo("UINT32", [indices_len])
    indices = builder.addInputTensor(sequence_info)
    data = {
        indices:
        np.random.randint(0, config.vocab_length,
                          (indices_len)).astype(np.uint32)
    }
    output = popart_model.word_embedding_serialized(indices, num_splits)

    if is_bwd:
        l1_loss = popart_model.builder.aiGraphcore.l1loss(
            [output],
            0.1,
            debugContext="l1LossVal",
            reduction=popart.ReductionType.Sum)
        proto = builder.getModelProto()
        optimizer = popart.ConstSGD(0.01)
        outputs, post_proto = run_py(proto,
                                     data, (output, l1_loss),
                                     loss=l1_loss,
                                     optimizer=optimizer,
                                     user_options=user_options)
    else:
        proto = builder.getModelProto()
        outputs, post_proto = run_py(proto,
                                     data,
                                     output,
                                     user_options=user_options)

    return [data[indices]], outputs, proto, post_proto
예제 #2
0
def test_detach_error():
    np.random.seed(0)
    Batchsize = 8
    Classes = 32

    dshape = [Batchsize, 2, 4, 4]
    lshape = [Batchsize]
    wshape = [2, 2, 3, 3]

    ip_data = np.random.rand(*dshape).astype(np.float32)
    w1_data = np.random.rand(*wshape).astype(np.float32)
    lb_data = np.random.randint(Classes, size=lshape)

    builder = popart.Builder()

    input_ = builder.addInputTensor(popart.TensorInfo("FLOAT", dshape),
                                    "input_i1")

    lb = builder.addInputTensor(popart.TensorInfo("INT32", lshape))
    w1 = builder.addInitializedInputTensor(w1_data)

    conv1 = builder.aiOnnx.conv([input_, w1],
                                dilations=[1, 1],
                                pads=[1, 1, 1, 1],
                                strides=[1, 1],
                                debugPrefix="conv")
    o = builder.reshape_const(builder.aiOnnx, [conv1], [Batchsize, Classes])
    o = builder.aiGraphcore.detach([o])

    o = builder.aiOnnx.softmax([o], axis=np.size(lshape))

    loss = builder.aiGraphcore.nllloss([o, lb])

    dataFlow = popart.DataFlow(
        1, [o, loss, popart.reservedGradientPrefix() + input_])
    opts = popart.SessionOptions()
    with pytest.raises(popart.popart_exception) as e_info:
        session = popart.TrainingSession(
            fnModel=builder.getModelProto(),
            dataFlow=dataFlow,
            loss=loss,
            optimizer=popart.ConstSGD(LEARNING_RATE, WEIGHT_DECAY),
            userOptions=opts,
            deviceInfo=popart.DeviceManager().createIpuModelDevice({}))

    assert (e_info.value.args[0].startswith(
        f"Anchor tensor `{popart.reservedGradientPrefix() + input_}' not in Ir Tensors."
    ))
예제 #3
0
    def runModel(pipeline, recompute):
        builder = popart.Builder()
        in0 = builder.addInputTensor("FLOAT", dshape)
        in1 = builder.addInputTensor("INT32", lshape)
        w0 = builder.addInitializedInputTensor(w0_data)
        with builder.virtualGraph(0), builder.pipelineStage(0):
            x = builder.aiOnnx.matmul([in0, w0])
        with builder.virtualGraph(1), builder.pipelineStage(1):
            x = builder.aiOnnx.sqrt([x])
        with builder.virtualGraph(0), builder.pipelineStage(2):
            x = builder.aiOnnx.add([w0, x])
            loss = builder.aiGraphcore.nllloss([x, in1])

        opts = popart.SessionOptions()
        opts.virtualGraphMode = popart.VirtualGraphMode.Manual
        opts.enablePipelining = pipeline
        if pipeline == True:
            opts.enableGradientAccumulation = True
            opts.accumulationFactor = bps
            test_bps = 1
        else:
            test_bps = bps

        if recompute == True:
            opts.autoRecomputation = popart.RecomputationType.Pipeline

        session = popart.TrainingSession(
            deviceInfo=popart.DeviceManager().createIpuModelDevice(
                {"numIPUs": "2"}),
            dataFlow=popart.DataFlow(test_bps, [loss]),
            fnModel=builder.getModelProto(),
            loss=loss,
            optimizer=popart.ConstSGD(0.1),
            userOptions=opts)

        session.prepareDevice()
        session.weightsFromHost()
        anchors = session.initAnchorArrays()
        stepio = popart.PyStepIO({in0: in0_data, in1: in1_data}, anchors)
        session.run(stepio)

        weights = {}
        weights[w0] = np.empty(shape=dshape, dtype=np.float32)
        weightsIo = popart.PyWeightsIO(weights)
        session.weightsToHost()
        session.readWeights(weightsIo)
        return weights[w0]
예제 #4
0
    def create_session(inplacing):
        builder = popart.Builder()

        input_ = builder.addInputTensor(popart.TensorInfo("FLOAT", data.shape),
                                        "data")
        w = builder.addInitializedInputTensor(weight, 'input_weights')

        shape = builder.aiOnnx.constant(np.array(input_size))

        transpose = builder.aiOnnx.transpose([w], [0, 2, 1, 3], "transpose")
        builder.setInplacePreferences(transpose, {"TransposeInplace": +1e8})
        reshape = builder.aiOnnx.reshape([transpose, shape], "reshape")
        builder.setInplacePreferences(reshape, {"ReshapeInplace": +1e6})

        add = builder.aiOnnx.add([input_, reshape], "add")
        builder.setInplacePreferences(add, {"AddRhsInplace": +1e7})

        loss = builder.aiGraphcore.l1loss([add],
                                          0.1,
                                          reduction=popart.ReductionType.Mean)

        builder.addOutputTensor(loss)

        patterns = popart.Patterns(popart.PatternsLevel.Default)
        patterns.InPlace = inplacing

        opts = popart.SessionOptions()
        opts.constantWeights = constantWeights

        session = popart.TrainingSession(fnModel=builder.getModelProto(),
                                         dataFlow=popart.DataFlow(1, [loss]),
                                         deviceInfo=tu.create_test_device(),
                                         userOptions=opts,
                                         patterns=patterns,
                                         loss=loss,
                                         optimizer=popart.ConstSGD(1e-3))

        session.prepareDevice()

        anchorRets = session.initAnchorArrays()

        inputs = {"data": data.copy()}
        stepio = popart.PyStepIO(inputs, anchorRets)

        session.weightsFromHost()
        return session, stepio, anchorRets
예제 #5
0
def test_batchnorm_train_half_fp32var(op_tester):
    # create test data
    d1 = np.random.rand(1, 3, 2, 2).astype(np.float16) * 100
    scale = np.random.rand(3).astype(np.float16)
    b = np.random.rand(3).astype(np.float16)
    mean = np.random.rand(3).astype(np.float16)
    var = np.random.rand(3).astype(np.float32)
    epsilon = 1e-05
    momentum = 0.1

    builder = popart.Builder()
    i1 = builder.addInputTensor(popart.TensorInfo(d1))
    iScale = builder.addInitializedInputTensor(scale)
    iB = builder.addInitializedInputTensor(b)
    iMean = builder.addInitializedInputTensor(mean)
    iVar = builder.addInitializedInputTensor(var)
    o_y, o_mean, o_var, o_smean, o_svar = builder.aiOnnx.batchnormalization(
        [i1, iScale, iB, iMean, iVar], 5, epsilon, momentum)
    builder.addOutputTensor(o_y)
    lossId = builder.aiGraphcore.identityloss([o_y])
    proto = builder.getModelProto()

    dataFlow = popart.DataFlow(1, {o_y: popart.AnchorReturnType("All")})

    device = tu.create_test_device()

    options = popart.SessionOptions()
    options.enableStochasticRounding = False

    session = popart.TrainingSession(fnModel=proto,
                                     loss=lossId,
                                     dataFlow=dataFlow,
                                     deviceInfo=device,
                                     optimizer=popart.ConstSGD(0.01),
                                     userOptions=options)

    anchors = session.initAnchorArrays()

    session.prepareDevice()

    inputs = {i1: d1}
    stepio = popart.PyStepIO(inputs, anchors)
    session.weightsFromHost()
    session.run(stepio)
    stepio = popart.PyStepIO(inputs, anchors)
    session.run(stepio)
예제 #6
0
def test_mini_resnet_like():
    dirpath = os.path.dirname(os.path.realpath(__file__))
    sys.path.append(dirpath + "/../../graph_util")
    batch_size = 1
    training = True
    norm_type = 'BatchNorm'

    # Get model proto
    proto, ip, op = get_resnet18_proto(batch_size, training, norm_type)

    # Create the onnx session
    opts = popart.SessionOptions()

    session = popart.TrainingSession(fnModel=proto,
                                     dataFlow=popart.DataFlow(
                                         1,
                                         {op: popart.AnchorReturnType("All")}),
                                     optimizer=popart.ConstSGD(0.001),
                                     loss=op,
                                     deviceInfo=tu.create_test_device(),
                                     userOptions=opts)

    session.prepareDevice()

    graph_report = session.getGraphReport()
    graph_report = json.loads(graph_report)

    total_mem = sum(graph_report['memory']['byTile']['total'])
    max_mem = max(graph_report['memory']['byTile']['totalIncludingGaps'])
    print(f'total_mem: {total_mem}')
    print(f'max_mem: {max_mem}')

    # Check that the total memory is within 5% of the reference
    ref_total = 67_201_630
    # If it is more than 5% over, it needs investigating
    assert total_mem / ref_total < 1.05
    # If it is move than 5% under, the reference should probably be updated
    assert total_mem / ref_total > 0.95

    # Check that the maximum memory is within 5% of the reference
    ref_max = 134_840
    # If it is more than 5% over, it needs investigating
    assert max_mem / ref_max < 1.05
    # If it is move than 5% under, the reference should probably be updated
    assert max_mem / ref_max > 0.95
예제 #7
0
def get_session(anchorIds, proto, device, loss, bps=1):
    dfAnchors = {}
    for anchorId in anchorIds:
        dfAnchors.update({anchorId: popart.AnchorReturnType("All")})

    session = popart.TrainingSession(fnModel=proto,
                                     dataFlow=popart.DataFlow(bps, dfAnchors),
                                     optimizer=popart.ConstSGD(0.1),
                                     loss=loss,
                                     patterns=popart.Patterns(
                                         popart.PatternsLevel.All),
                                     deviceInfo=device)

    session.prepareDevice()
    session.weightsFromHost()
    anchors = session.initAnchorArrays()

    return session, anchors
예제 #8
0
def test_stochastic_rounding():
    # create test data
    d1 = np.random.rand(1, 3, 2, 2).astype(np.float16) * 100
    scale = np.random.rand(3).astype(np.float16)
    b = np.random.rand(3).astype(np.float16)
    mean = np.random.rand(3).astype(np.float16)
    var = np.random.rand(3).astype(np.float16)
    epsilon = 1e-05
    momentum = 0.1

    builder = popart.Builder()
    i1 = builder.addInputTensor(popart.TensorInfo(d1))
    iScale = builder.addInitializedInputTensor(scale)
    iB = builder.addInitializedInputTensor(b)
    iMean = builder.addInitializedInputTensor(mean)
    iVar = builder.addInitializedInputTensor(var)
    [o_y, o_mean, o_var, o_smean,
     o_svar] = builder.aiOnnx.batchnormalization([i1, iScale, iB, iMean, iVar],
                                                 5, epsilon, momentum)
    loss = builder.aiGraphcore.identityloss([o_y])
    builder.addOutputTensor(o_y)
    proto = builder.getModelProto()

    dataFlow = popart.DataFlow(1, {o_y: popart.AnchorReturnType("All")})

    device = tu.create_test_device()

    options = popart.SessionOptions()
    options.enableStochasticRounding = True

    sess = popart.TrainingSession(fnModel=proto,
                                  optimizer=popart.ConstSGD(0.1),
                                  loss=loss,
                                  dataFlow=dataFlow,
                                  deviceInfo=device,
                                  userOptions=options)

    anchors = sess.initAnchorArrays()
    sess.prepareDevice()

    # Confirm that you are able to set the random seed when
    # enableStochasticRounding is true, even though the random seed tensor
    # is not consumed by any op in the Ir
    sess.setRandomSeed(0)
예제 #9
0
def popart_result_and_model(popart_config, is_bwd=False, momentum=0.0):
    popart_model = Bert(popart_config)

    input_info = popart.TensorInfo(popart_config.popart_dtype, [
        popart_config.micro_batch_size * popart_config.sequence_length,
        popart_config.hidden_size
    ])
    input_tensor = popart_model.builder.addInputTensor(input_info)

    data = {
        input_tensor:
        np.random.normal(0, 0.02,
                         input_info.shape()).astype(popart_config.dtype)
    }

    output = popart_model.feed_forward(input_tensor)

    if is_bwd:
        l1 = popart_model.builder.aiGraphcore.l1loss(
            [output],
            0.1,
            debugContext="l1LossVal",
            reduction=popart.ReductionType.Sum)
        proto = popart_model.builder.getModelProto()

        if momentum > 0.0:
            optimizer = popart.SGD({
                "defaultLearningRate": (lr, False),
                "defaultMomentum": (momentum, False),
                "defaultWeightDecay": (0.0, False)
            })
        else:
            optimizer = popart.ConstSGD(lr)

        outputs, post_proto = run_py(proto,
                                     data, (output, l1),
                                     loss=l1,
                                     optimizer=optimizer,
                                     num_reps=num_reps_bwd)
    else:
        proto = popart_model.builder.getModelProto()
        outputs, post_proto = run_py(proto, data, output)

    return data[input_tensor], outputs, proto, post_proto
예제 #10
0
def test_cast_no_grad(npSrcType, builderDstType):
    """Check that CastOp, doesn't return gradient Op when casted-from type is 
    not float/half.
    """
    np.random.seed(0)
    # Is randomly generated data ok here? Also, the tested range is [0, 10], so
    # no negative inputs are tested.
    inputData = np.random.uniform(0, 10, 10).astype(npSrcType)

    builder = popart.Builder()
    input_ = builder.addInputTensor(popart.TensorInfo(inputData))
    output_ = builder.aiOnnx.cast([input_], builderDstType)
    builder.addOutputTensor(output_)
    lossId = builder.aiGraphcore.identityloss([output_])
    proto = builder.getModelProto()

    dataFlow = popart.DataFlow(
        1, {
            output_:
            popart.AnchorReturnType("All"),
            popart.reservedGradientPrefix() + input_:
            popart.AnchorReturnType("All"),
        })

    device = tu.create_test_device()

    patterns = popart.Patterns(['PreUniRepl', 'PostNRepl',
                                'SqrtGradOp']).enableRuntimeAsserts(False)
    options = popart.SessionOptions()
    options.enableStochasticRounding = False

    with pytest.raises(popart.popart_exception) as e_info:
        popart.TrainingSession(fnModel=proto,
                               loss=lossId,
                               dataFlow=dataFlow,
                               deviceInfo=device,
                               optimizer=popart.ConstSGD(0.01),
                               patterns=patterns,
                               userOptions=options)

    assert (e_info.value.args[0].startswith(
        f"Anchor tensor `{popart.reservedGradientPrefix() + input_}' not in Ir Tensors."
    ))
예제 #11
0
    def _setup(self):
        ti = popart.TensorInfo("FLOAT", self.input_data.shape)
        builder = popart.Builder()
        self.input = builder.addInputTensor(ti)
        self.input_grad = popart.reservedGradientPrefix() + self.input

        self.output = builder.aiGraphcore.shapeddropout([self.input],
                                                        shape=self.drop_shape,
                                                        ratio=self.drop_ratio)

        self.loss = builder.aiGraphcore.identityloss([self.output])
        builder.addOutputTensor(self.loss)

        dfAnchors = [self.input, self.input_grad, self.output, self.loss]
        dfAnchors = {a: popart.AnchorReturnType("All") for a in dfAnchors}
        df = popart.DataFlow(self.batches_per_step, dfAnchors)

        model_proto = builder.getModelProto()
        all_patterns = popart.Patterns(popart.PatternsLevel.All)
        device = tu.create_test_device(self.replication_factor)

        session_opts = popart.SessionOptions()

        if self.replication_factor > 1:
            session_opts.enableReplicatedGraphs = True
            session_opts.replicatedGraphCount = self.replication_factor

        self.session = popart.TrainingSession(fnModel=model_proto,
                                              dataFlow=df,
                                              patterns=all_patterns,
                                              optimizer=popart.ConstSGD(0.1),
                                              loss=self.loss,
                                              userOptions=session_opts,
                                              deviceInfo=device)

        self.session.prepareDevice()
        self.session.setRandomSeed(self.seed)
        self.anchors = self.session.initAnchorArrays()
        batched_data = np.tile(
            self.input_data,
            [self.batches_per_step * self.replication_factor, 1, 1])
        self.stepio = popart.PyStepIO({self.input: batched_data}, self.anchors)
예제 #12
0
def test_reducemedian_indices_1_shape_infer(keepdims, axes):
    data = np.random.randn(1, 3, 4, 7, 8).astype(np.float32)

    builder = popart.Builder()
    tensor = builder.addInputTensor(popart.TensorInfo(data))
    out = builder.aiGraphcore.reducemedian(
        [tensor],
        axes=axes,
        keepdims=keepdims,
        debugContext='test_reducemedian_values_{}_{}'.format(axes, keepdims),
    )
    builder.addOutputTensor(out[0])
    builder.addOutputTensor(out[1])
    lossId = builder.aiGraphcore.identityloss([out[0]])
    proto = builder.getModelProto()
    anchors = [out[0], out[1]]
    art = popart.AnchorReturnType("All")
    dataFlow = popart.DataFlow(1, {a: art for a in anchors})
    device = tu.create_test_device()
    options = popart.SessionOptions()
    options.enableStochasticRounding = False
    # store the shapes here to make sure we are checking shapes
    # before the IR is complete (i.e. testing onnx shape inference)
    shapes = []
    for a in anchors:
        shapes.append(tuple(builder.getTensorShape(a)))
    session = popart.TrainingSession(fnModel=proto,
                                     loss=lossId,
                                     dataFlow=dataFlow,
                                     deviceInfo=device,
                                     optimizer=popart.ConstSGD(0.01),
                                     userOptions=options)
    anchors = session.initAnchorArrays()
    session.prepareDevice()
    inputs = {tensor: data}
    stepio = popart.PyStepIO(inputs, anchors)
    session.weightsFromHost()
    session.run(stepio)
    stepio = popart.PyStepIO(inputs, anchors)
    # This tests the shape inference has run
    for a, b in zip([out[0], out[1]], shapes):
        assert anchors[a].shape == b
예제 #13
0
    def run_test():
        proto, data, w, loss = model()

        options = popart.SessionOptions()
        optimizer = popart.ConstSGD(0.1)
        options.enableStochasticRounding = True
        options.enableReplicatedGraphs = True
        options.replicatedGraphCount = 2
        options.engineOptions = {"target.deterministicWorkers": "portable"}

        data = {k: np.repeat(v[np.newaxis], 2, 0) for k, v in data.items()}

        device = tu.create_test_device(2, pattern=popart.SyncPattern.Full)

        w_updated = popart.reservedUpdatedVarPrefix() + w

        dataFlow = popart.DataFlow(
            1, {
                loss: popart.AnchorReturnType("ALL"),
                w_updated: popart.AnchorReturnType("FINAL")
            })

        session = popart.TrainingSession(fnModel=proto,
                                         dataFlow=dataFlow,
                                         userOptions=options,
                                         loss=loss,
                                         optimizer=optimizer,
                                         deviceInfo=device)

        session.prepareDevice()

        session.weightsFromHost()

        anchors = session.initAnchorArrays()

        stepio = popart.PyStepIO(data, anchors)

        session.run(stepio)

        device.detach()

        return anchors[w_updated]
예제 #14
0
def test_lstm_explicit_recompute():
    d1 = np.array([[[1., 2., 3.], [4., 5., 6.]],
                   [[7., 8., 9.], [10., 11., 12.]]]).astype(np.float32)

    input_size = d1.shape[2]
    hidden_size = 7

    d2 = np.random.rand(1, 4 * hidden_size, input_size).astype(np.float32)
    d3 = np.zeros((1, 4 * hidden_size, hidden_size)).astype(np.float32)

    builder = popart.Builder()
    i1 = builder.addInputTensor(popart.TensorInfo(d1))
    i2 = builder.addInitializedInputTensor(d2)
    i3 = builder.addInitializedInputTensor(d3)
    Y, Y_h, Y_c = builder.aiOnnx.lstm([i1, i2, i3], 3, clip=None)
    builder.recomputeOutputInBackwardPass(set([Y, Y_h, Y_c]))
    out = builder.aiOnnx.relu([Y])
    loss = builder.aiGraphcore.identityloss([out])

    builder.addOutputTensor(loss)

    device = tu.create_test_device()

    opts = popart.SessionOptions()
    opts.explicitRecomputation = True

    session = popart.TrainingSession(
        fnModel=builder.getModelProto(),
        dataFlow=popart.DataFlow(1, {out: popart.AnchorReturnType("All")}),
        deviceInfo=device,
        optimizer=popart.ConstSGD(0.1),
        userOptions=opts,
        loss=loss)

    # Now the test is passing we can add a check to make sure there
    # are 2 lstm ops. This is to check that the lstm is being cloned
    # and we are actually testing what we intended.
    ir = json.loads(session._serializeIr(popart.IrSerializationFormat.JSON))
    maingraph = ir['maingraph']
    lstms = [i for i in maingraph if i['type'] == 'LSTM']
    assert len(lstms) == 2
def simple_training_session(tmpdir, inputShape, inputArray, BPS, art, GA=1):

    builder = popart.Builder()

    inInfo = popart.TensorInfo("FLOAT", inputShape)

    i1 = builder.addInputTensor(inInfo)
    w1 = builder.addInitializedInputTensor(
        np.zeros(inputShape, dtype=np.float32), "w1")
    o = builder.aiOnnx.add([i1, w1])
    l1 = builder.aiGraphcore.l1loss([o], 0.0)

    proto = builder.getModelProto()

    batchesPerStep = BPS
    dataFlow = popart.DataFlow(batchesPerStep, {o: art})

    opts = popart.SessionOptions()
    opts.accumulationFactor = GA
    opts.enableGradientAccumulation = GA > 1

    session = popart.TrainingSession(fnModel=proto,
                                     dataFlow=dataFlow,
                                     deviceInfo=tu.create_test_device(),
                                     userOptions=opts,
                                     loss=l1,
                                     optimizer=popart.ConstSGD(0.01))

    session.prepareDevice()
    session.weightsFromHost()

    anchors = session.initAnchorArrays()

    inputs = {
        i1: np.array(inputArray, dtype=np.float32),
    }
    stepio = popart.PyStepIO(inputs, anchors)

    session.run(stepio)

    return anchors[o]
예제 #16
0
    def get_session(expand_op=True):
        builder = popart.Builder()

        i1 = builder.addInputTensor(popart.TensorInfo("FLOAT", in_.shape))
        w_in = builder.addInitializedInputTensor(w)
        mul = builder.aiOnnx.matmul([w_in, i1])

        if expand_op:
            c = builder.aiOnnx.constant(const)
            o = builder.aiOnnx.expand([mul, c])
        else:
            ones = np.ones(shape=const).astype(np.float32)
            ones_in = builder.aiOnnx.constant(ones)
            o = builder.aiOnnx.mul([mul, ones_in])

        builder.addOutputTensor(o)
        loss = builder.aiGraphcore.identityloss([o])

        anchor_returns = [
            o,
            popart.reservedGradientPrefix() + o,
            popart.reservedGradientPrefix() + i1,
            popart.reservedGradientPrefix() + w_in
        ]

        opts = popart.SessionOptions()
        session = popart.TrainingSession(fnModel=builder.getModelProto(),
                                         dataFlow=popart.DataFlow(
                                             1, anchor_returns),
                                         deviceInfo=tu.create_test_device(),
                                         optimizer=popart.ConstSGD(0.1),
                                         loss=loss,
                                         userOptions=opts)

        anchors = session.initAnchorArrays()
        inputs = {i1: in_}
        stepio = popart.PyStepIO(inputs, anchors)

        session.prepareDevice()
        session.weightsFromHost()
        return session, stepio, anchors, anchor_returns
    def run_session(proto, loss_id):
        anchor_id = popart.reservedGradientPrefix() + t0

        opts = popart.SessionOptions()
        opts.enableReplicatedGraphs = True
        opts.replicatedGraphCount = repl_factor
        opts.accumulationAndReplicationReductionType = reduction

        session = popart.TrainingSession(
            fnModel=proto,
            deviceInfo=tu.create_test_device(repl_factor),
            dataFlow=popart.DataFlow(1, [anchor_id]),
            loss=loss_id,
            optimizer=popart.ConstSGD(0.1),
            userOptions=opts)

        session.prepareDevice()
        anchors = session.initAnchorArrays()
        stepio = popart.PyStepIO({t0: t0_data}, anchors)
        session.run(stepio)
        return anchors[anchor_id]
예제 #18
0
    def prepare(self, init_builder):
        self._builder = _Builder()
        anchorIds = init_builder(self._builder)
        self._builder._check_inputs()
        anchors = _get_anchors(anchorIds, self._builder)

        dataFlow = popart.DataFlow(self.batchesPerStep, anchors)
        proto = self._builder.getModelProto()
        loss = self._get_loss(anchorIds)
        device = self._get_device()

        optimizer = popart.ConstSGD(0.01)

        self._session = self._get_session(fnModel=proto,
                                          dataFlow=dataFlow,
                                          loss=loss,
                                          optimizer=optimizer,
                                          deviceInfo=device,
                                          patterns=self.patterns,
                                          userOptions=self.options)
        self._device_prepared = False
예제 #19
0
def test_nll_loss_input_with_invalid_input():
    # fix the random seed for this test
    np.random.seed(0)

    ## input data
    Batchsize = 2
    ExtraDim = 4  # e.g. sequence length in language model
    Classes = 3

    dshape = [Batchsize, ExtraDim, Classes]
    lshape = [Batchsize, ExtraDim + 1]  # Doesn't match!

    ip_data = np.random.rand(*dshape).astype(np.float32)
    lb_data = np.random.randint(Classes, size=lshape)

    ###
    # Popart
    ###
    builder = popart.Builder()
    ip = builder.addInitializedInputTensor(ip_data)
    lb = builder.addInputTensor(popart.TensorInfo("INT32", lshape))
    out = builder.aiOnnx.softmax([ip], axis=np.size(lshape))

    nll0 = builder.aiGraphcore.nllloss([out, lb],
                                       popart.ReductionType.NoReduction)

    patterns = popart.PatternsLevel.NoPatterns

    with pytest.raises(popart.popart_exception) as e_info:
        session = popart.TrainingSession(fnModel=builder.getModelProto(),
                                         dataFlow=popart.DataFlow(1, [nll0]),
                                         optimizer=popart.ConstSGD(
                                             LEARNING_RATE, WEIGHT_DECAY),
                                         loss=nll0,
                                         patterns=popart.Patterns(patterns),
                                         deviceInfo=tu.create_test_device())

    assert (e_info.value.args[0].startswith(
        "The label tensor (INT32   [2 5]) must have shape [2 4] to match all but the final dimension of the probabilities tensor (FLOAT   [2 4 3])"
    ))
예제 #20
0
def test_load_onnx_model_from_file(tmpdir):

    # Create a builder, store the model in a file and load it into a different
    # builder.
    builder = popart.Builder()

    shape = popart.TensorInfo("FLOAT", [2])

    i1 = builder.addInputTensor(shape)
    i2 = builder.addInputTensor(shape)
    o = builder.aiOnnx.add([i1, i2])
    builder.addOutputTensor(o)
    filename = tmpdir + "/model.onnx"
    with open(filename, 'wb') as out:
        out.write(builder.getModelProto())

    builder2 = popart.Builder(str(filename))

    dataFlow = popart.DataFlow(1, {o: popart.AnchorReturnType("All")})
    optimizer = popart.ConstSGD(0.01)

    proto = builder2.getModelProto()

    session = popart.InferenceSession(fnModel=proto,
                                      dataFlow=dataFlow,
                                      deviceInfo=getDevice())

    anchors = session.initAnchorArrays()

    session.prepareDevice()

    inputs = {
        i1: np.array([1, 2], dtype=np.float32),
        i2: np.array([3, 4], dtype=np.float32)
    }
    stepio = popart.PyStepIO(inputs, anchors)

    session.run(stepio)

    assert (np.array_equal(anchors[o], [4, 6]))
예제 #21
0
def test_nll_no_underflow():
    dtype = np.float16

    # Input probabilities
    probs_np = np.array(
        [[1., 0., 0., 0., 0.], [0.5, 0.5, 0., 0., 0.],
         [1 / 3.0, 1 / 3.0, 1 / 3.0, 0., 0.], [0.25, 0.25, 0.25, 0.25, 0.],
         [0.2, 0.2, 0.2, 0.2, 0.2]],
        dtype=dtype)

    labels_np = np.array([0, 1, 2, 3, 4], dtype=np.int32)

    builder = popart.Builder()
    probs = builder.addInitializedInputTensor(probs_np, "probs")
    builder.addOutputTensor(builder.aiOnnx.identity([probs]))
    dprobs = popart.reservedGradientPrefix() + probs
    labels = builder.addInputTensor(popart.TensorInfo("INT32", [5]))
    loss = builder.aiGraphcore.nllloss([probs, labels],
                                       popart.ReductionType.Sum,
                                       debugPrefix="nllLossVal")

    anchor_desc = {
        dprobs: popart.AnchorReturnType("ALL"),
        loss: popart.AnchorReturnType("ALL")
    }
    dataFlow = popart.DataFlow(1, anchor_desc)
    session = popart.TrainingSession(
        fnModel=builder.getModelProto(),
        loss=loss,
        deviceInfo=popart.DeviceManager().createIpuModelDevice({}),
        optimizer=popart.ConstSGD(0.00001),
        dataFlow=dataFlow)
    session.prepareDevice()
    session.weightsFromHost()
    anchors = session.initAnchorArrays()
    stepio = popart.PyStepIO({labels: labels_np}, anchors)
    session.run(stepio)

    assert not np.isnan(anchors[loss]).any()
    assert not np.isnan(anchors[dprobs]).any()
예제 #22
0
def test_randomuniform_repeatable_replica(dtypes):
    seed = 8
    replication_factor = 2
    builder = popart.Builder()
    out = builder.aiOnnx.randomuniform(shape=[10, 1], dtype=dtypes[1])
    loss = builder.aiGraphcore.identityloss([out])

    builder.addOutputTensor(loss)

    opts = popart.SessionOptions()
    opts.enableReplicatedGraphs = True
    opts.replicatedGraphCount = replication_factor

    device = tu.create_test_device(replication_factor)

    session = popart.TrainingSession(
        fnModel=builder.getModelProto(),
        dataFlow=popart.DataFlow(1, {out: popart.AnchorReturnType("All")}),
        patterns=popart.Patterns(popart.PatternsLevel.All),
        deviceInfo=device,
        userOptions=opts,
        optimizer=popart.ConstSGD(0.1),
        loss=loss)

    session.prepareDevice()
    session.setRandomSeed(seed)
    session.weightsFromHost()

    anchors = session.initAnchorArrays()
    stepio = popart.PyStepIO({}, anchors)
    session.run(stepio)

    o = anchors[out]
    for ai, bi in itertools.combinations(
        [i for i in range(replication_factor)], 2):
        print(f'Checking o[{ai}] is not equal to o[{bi}]')
        a = o[ai]
        b = o[bi]
        assert not np.allclose(a, b)
예제 #23
0
def init_session(proto, loss, dataFlow, userOpts, device, training=True):
    # Create a session to compile and execute the graph
    if training:
        session = popart.TrainingSession(fnModel=proto,
                                         loss=loss,
                                         deviceInfo=device,
                                         optimizer=popart.ConstSGD(0.001),
                                         dataFlow=dataFlow,
                                         userOptions=userOpts)
    else:
        session = popart.InferenceSession(fnModel=proto,
                                          deviceInfo=device,
                                          dataFlow=dataFlow,
                                          userOptions=userOpts)
    print("Compiling the {} graph.".format(
        "training" if training else "validation"))
    session.prepareDevice()
    session.setRandomSeed(1)

    # Create buffers to receive results from the execution
    anchors = session.initAnchorArrays()
    return Session(session, anchors)
예제 #24
0
def test_set_weights_from_host():

    # Run the first builder
    builder = popart.Builder()

    shape = popart.TensorInfo("FLOAT", [2])

    i1 = builder.addInputTensor(shape)

    data = np.array([1, 2], dtype=np.float32)

    i2 = builder.addInitializedInputTensor(data)
    o = builder.aiOnnx.add([i1, i2])
    loss = builder.aiGraphcore.identityloss([o])

    proto = builder.getModelProto()

    dataFlow = popart.DataFlow(1, {o: popart.AnchorReturnType("All")})

    optimizer = popart.ConstSGD(0.01)

    session = popart.TrainingSession(fnModel=proto,
                                     dataFlow=dataFlow,
                                     loss=loss,
                                     optimizer=optimizer,
                                     deviceInfo=getDevice())

    anchors = session.initAnchorArrays()

    session.prepareDevice()

    inputs = {i1: np.array([1, 2], dtype=np.float32)}
    stepio = popart.PyStepIO(inputs, anchors)

    with pytest.raises(popart.popart_exception) as e_info:
        session.run(stepio)

    assert (e_info.value.args[0].startswith(
        "Must call weightsFromHost before run as the"))
예제 #25
0
def test_no_prepare_device():
    popart.getLogger().setLevel("TRACE")

    # Check that `session.modelToHost` can be called when using a
    # model with a constant node, without throwing an exceptions
    builder = popart.Builder()

    i1 = builder.addInputTensor(popart.TensorInfo("FLOAT", [2, 2]))
    i2 = builder.addInputTensor(popart.TensorInfo("FLOAT", [2, 2]))
    c = builder.aiOnnx.constant(np.array([[1, 2], [3, 4]], dtype=np.float32))
    o1 = builder.aiOnnx.add([i1, i2])
    o2 = builder.aiOnnx.add([o1, c])
    loss = builder.aiGraphcore.identityloss([o2])

    proto = builder.getModelProto()

    anchors = {o2: popart.AnchorReturnType("All")}

    dataFlow = popart.DataFlow(1, anchors)

    optimizer = popart.ConstSGD(0.01)

    opts = popart.SessionOptions()

    session = popart.TrainingSession(fnModel=proto,
                                     dataFlow=dataFlow,
                                     userOptions=opts,
                                     loss=loss,
                                     optimizer=optimizer,
                                     deviceInfo=tu.create_test_device())

    # No session.prepareDevice()

    with pytest.raises(popart.popart_exception) as e_info:
        session.modelToHost('session_proto.onnx')

    assert (e_info.value.args[0].startswith(
        "Devicex::prepare() must be called before Devicex::weightsToHost"))
예제 #26
0
def session(skip_execution=False, include_patterns=True, momentum=False):
    proto, data, x = model()
    # Required
    patterns = [
        "MatMulOp", "MatMulLhsGradOp", "MatMulRhsGradOp", "OpToIdentity",
        "PreUniRepl", "PostNRepl", "InPlace"
    ]
    if include_patterns:
        patterns += ["InplaceWorkaroundPattern"]
    optimizer = popart.ConstSGD(0.1)
    if momentum:
        optimizer = popart.SGD({
            "defaultLearningRate": (0.1, True),
            "defaultMomentum": (0.9, True)
        })
    return run_py(proto,
                  data=data,
                  outputs=x,
                  loss=popart.L1Loss(x, 'loss', 0.1),
                  optimizer=optimizer,
                  patterns=popart.Patterns(patterns),
                  user_options={"enableOutlining": False},
                  skip_execution=skip_execution)
예제 #27
0
def get_replicated_dropout_session(replication_factor=4,
                                   dsize=10,
                                   num_layers=1,
                                   ratio=0.3,
                                   batches_per_step=1,
                                   seed=0):
    builder = popart.Builder()
    ip = builder.addInputTensor(popart.TensorInfo("FLOAT", [dsize]))
    d__ip = popart.reservedGradientPrefix() + ip
    out = ip
    for layer in range(num_layers):
        [out] = builder.aiOnnx.dropout([out], num_outputs=1, ratio=ratio)
    loss = builder.aiGraphcore.identityloss([out])
    builder.addOutputTensor(loss)

    device = tu.create_test_device(replication_factor)

    dfAnchors = [out, ip, d__ip]
    dfAnchors = {i: popart.AnchorReturnType("All") for i in dfAnchors}

    opts = popart.SessionOptions()
    opts.enableReplicatedGraphs = True
    opts.replicatedGraphCount = replication_factor

    session = popart.TrainingSession(fnModel=builder.getModelProto(),
                                     dataFlow=popart.DataFlow(
                                         batches_per_step, dfAnchors),
                                     optimizer=popart.ConstSGD(0.1),
                                     loss=loss,
                                     userOptions=opts,
                                     deviceInfo=device)

    session.prepareDevice()
    session.setRandomSeed(seed)
    session.weightsFromHost()
    anchors = session.initAnchorArrays()
    return session, ip, out, d__ip, anchors
예제 #28
0
            def getAnchors(extraReduction):
                builder = popart.Builder()
                ip = builder.addInitializedInputTensor(ip_data)
                lb = builder.addInputTensor("INT32", lshape)

                sm = builder.aiOnnx.softmax([ip], axis=np.size(lshape))
                if extraReduction == True:
                    nll = builder.aiGraphcore.nllloss(
                        [sm, lb], reduction=popart.ReductionType.NoReduction)
                    loss = builder.aiOnnx.reducesum([nll])
                else:
                    loss = builder.aiGraphcore.nllloss(
                        [sm, lb], reduction=popart.ReductionType.Sum)

                anchors = [popart.reservedGradientPrefix() + ip]
                # Always test 'loss' too, except for when we want to test with
                # the SoftmaxGradDirect pattern, which requires 'loss' to be
                # anchored
                if 'SoftmaxGradDirect' not in patternsList or 'NlllWithSoftmaxGradDirect' in patternsList:
                    anchors.append(loss)

                session = popart.TrainingSession(
                    fnModel=builder.getModelProto(),
                    loss=loss,
                    dataFlow=popart.DataFlow(1, anchors),
                    optimizer=popart.ConstSGD(0.1),
                    deviceInfo=tu.create_test_device(),
                    patterns=popart.Patterns(
                        patternsList).enableRuntimeAsserts(False))
                session.prepareDevice()
                session.weightsFromHost()
                anchors = session.initAnchorArrays()
                stepio = popart.PyStepIO({lb: lb_data.astype(np.int32)},
                                         anchors)
                session.run(stepio)
                return anchors
예제 #29
0
def popart_result_and_model(popart_config, is_bwd=False):
    builder = popart.Builder()
    popart_model = Bert(popart_config, builder=builder)

    input_info = popart.TensorInfo(popart_config.popart_dtype, [
        popart_config.batch_size * popart_config.sequence_length,
        popart_config.hidden_size
    ])
    input_tensor = builder.addInputTensor(input_info)

    data = {
        input_tensor:
        np.random.normal(0, 0.02,
                         input_info.shape()).astype(popart_config.dtype)
    }

    output = popart_model.feed_forward(input_tensor)
    proto = builder.getModelProto()

    if is_bwd:
        l1_lambda = 0.1
        l1 = builder.aiGraphcore.l1loss([output],
                                        l1_lambda,
                                        debugPrefix="l1LossVal",
                                        reduction=popart.ReductionType.Sum)
        proto = builder.getModelProto()
        optimizer = popart.ConstSGD(0.01)

        outputs, post_proto = run_py(proto,
                                     data, (output, l1),
                                     loss=l1,
                                     optimizer=optimizer)
    else:
        outputs, post_proto = run_py(proto, data, output)

    return data[input_tensor], outputs, proto, post_proto
def test_valid_recompute_options():
    builder = popart.Builder()

    i1 = builder.addInputTensor(popart.TensorInfo("FLOAT", [1]))
    r1 = builder.aiOnnx.relu([i1])
    o = builder.aiOnnx.relu([r1])

    # specify manual recomputation
    builder.recomputeOutputInBackwardPass(r1)

    # specify auto recomputation as well
    opts = popart.SessionOptions()
    opts.autoRecomputation = popart.RecomputationType.Standard

    with pytest.raises(popart.popart_exception) as e_info:
        session = popart.TrainingSession(fnModel=builder.getModelProto(),
                                         dataFlow=popart.DataFlow(1, [o]),
                                         optimizer=popart.ConstSGD(0.001),
                                         loss=o,
                                         patterns=popart.Patterns([]),
                                         userOptions=opts,
                                         deviceInfo=tu.create_test_device())
    assert (e_info.value.args[0] ==
            "A mixture of auto and manual recomputaion is not supported")