Example #1
0
def popart_result_and_model(popart_config, weight_decay=0, lr=0, l1_lambda=0):
    builder = popart.Builder()
    popart_model = Bert(popart_config, builder=builder)

    input_info = popart.TensorInfo(popart_config.popart_dtype, [
        popart_config.batch_size * popart_config.sequence_length,
        popart_config.hidden_size
    ])
    input_tensor = builder.addInputTensor(input_info)

    data = {
        input_tensor:
        np.random.normal(0, 0.02,
                         input_info.shape()).astype(popart_config.dtype)
    }

    output = popart_model.feed_forward(input_tensor)
    proto = builder.getModelProto()

    l1 = popart.L1Loss(output, "l1LossVal", l1_lambda)

    iteration = MockIteration()
    args = MockArgs(lr, weight_decay)
    optimizer_factory = BaseOptimizerFactory(args, iteration,
                                             popart_model.tensors)
    optimizer = optimizer_factory.create()

    outputs, post_proto = run_py(proto,
                                 data, (output, l1.output(0)),
                                 loss=l1,
                                 optimizer=optimizer)

    return data[input_tensor], outputs, proto, post_proto
Example #2
0
def popart_result_and_model(popart_config, is_bwd=False):
    builder = popart.Builder()
    popart_model = Bert(popart_config, builder=builder)

    input_info = popart.TensorInfo(popart_config.popart_dtype, [
        popart_config.batch_size * popart_config.sequence_length,
        popart_config.hidden_size
    ])
    input_tensor = builder.addInputTensor(input_info)

    data = {
        input_tensor:
        np.random.normal(0, 0.02,
                         input_info.shape()).astype(popart_config.dtype)
    }

    output = popart_model.feed_forward(input_tensor)
    proto = builder.getModelProto()

    if is_bwd:
        l1_lambda = 0.1
        l1 = popart.L1Loss(output, "l1LossVal", l1_lambda)
        optimizer = popart.ConstSGD(0.01)

        outputs, post_proto = run_py(proto,
                                     data, (output, l1.output(0)),
                                     loss=l1,
                                     optimizer=optimizer)
    else:
        outputs, post_proto = run_py(proto, data, output)

    return data[input_tensor], outputs, proto, post_proto
Example #3
0
def popart_result_and_model(popart_config, is_bwd=False, momentum=0.0):
    popart_model = Bert(popart_config)

    input_info = popart.TensorInfo(popart_config.popart_dtype, [
        popart_config.micro_batch_size * popart_config.sequence_length,
        popart_config.hidden_size
    ])
    input_tensor = popart_model.builder.addInputTensor(input_info)

    data = {
        input_tensor:
        np.random.normal(0, 0.02,
                         input_info.shape()).astype(popart_config.dtype)
    }

    output = popart_model.feed_forward(input_tensor)

    if is_bwd:
        l1 = popart_model.builder.aiGraphcore.l1loss(
            [output],
            0.1,
            debugContext="l1LossVal",
            reduction=popart.ReductionType.Sum)
        proto = popart_model.builder.getModelProto()

        if momentum > 0.0:
            optimizer = popart.SGD({
                "defaultLearningRate": (lr, False),
                "defaultMomentum": (momentum, False),
                "defaultWeightDecay": (0.0, False)
            })
        else:
            optimizer = popart.ConstSGD(lr)

        outputs, post_proto = run_py(proto,
                                     data, (output, l1),
                                     loss=l1,
                                     optimizer=optimizer,
                                     num_reps=num_reps_bwd)
    else:
        proto = popart_model.builder.getModelProto()
        outputs, post_proto = run_py(proto, data, output)

    return data[input_tensor], outputs, proto, post_proto
Example #4
0
def popart_result_and_model(popart_config,
                            weight_decay=0.0,
                            lr=0.0,
                            l1_lambda=0.0):
    popart_model = Bert(popart_config)
    builder = popart_model.builder

    input_info = popart.TensorInfo(popart_config.popart_dtype, [
        popart_config.micro_batch_size * popart_config.sequence_length,
        popart_config.hidden_size
    ])
    input_tensor = builder.addInputTensor(input_info)

    data = {
        input_tensor:
        np.random.normal(0, 0.02,
                         input_info.shape()).astype(popart_config.dtype)
    }

    output = popart_model.feed_forward(input_tensor)

    l1 = builder.aiGraphcore.l1loss([output],
                                    l1_lambda,
                                    debugContext="l1LossVal",
                                    reduction=popart.ReductionType.Sum)
    proto = builder.getModelProto()

    iteration = MockIteration()
    args = MockArgs("SGD", lr, weight_decay)
    optimizer_factory = BaseOptimizerFactory(args, iteration,
                                             popart_model.tensors)
    optimizer = optimizer_factory.create()

    outputs, post_proto = run_py(proto,
                                 data, (output, l1),
                                 loss=l1,
                                 optimizer=optimizer)

    return data[input_tensor], outputs, proto, post_proto