def load_model_from_tf(
    file_path,
    is_checkpoint,
    config,
    indices,
    positions,
    segments,
    task,
    builder=popart.Builder(),
):
    """
    Loads weights, etc. from Tensorflow files into the Graphcore IPU BERT
    implementation.

    Can read either checkpoint files, or frozen graphs, according to the
    `is_checkpoint` flag, passed in as the second argument.

    Requires input tensors to be provided to initialise the graph build.

    The user can optionally pass in a builder object (e.g. for compatibility
    with an older ONNX version). If not provided, a default builder is created.
    """
    initializers = load_initializers_from_tf(file_path, is_checkpoint, config, task)
    popart_model = Bert(config, builder=builder, initializers=initializers)

    output_tensor = popart_model.build_graph(indices, positions, segments)
    proto = builder.getModelProto()
    return popart_model, proto, output_tensor
Exemple #2
0
def get_model_proto(config, initializers=None):
    model = Bert(config, pipeline=True, initializers=initializers)

    sequence_info = popart.TensorInfo("UINT32", [config.micro_batch_size * config.sequence_length])
    indices = model.builder.addInputTensor(sequence_info)
    positions = model.builder.addInputTensor(sequence_info)
    segments = model.builder.addInputTensor(sequence_info)

    output = model.build_graph(indices, positions, segments)

    return onnx.load_model_from_string(model.builder.getModelProto())
def test_warmup(custom_ops, num_steps=100000):
    builder = popart.Builder(opsets={
        "ai.onnx": 9,
        "ai.onnx.ml": 1,
        "ai.graphcore": 1
    })
    config = BertConfig(vocab_length=9728,
                        num_layers=1,
                        batch_size=1,
                        hidden_size=768,
                        sequence_length=128,
                        popart_dtype="FLOAT",
                        no_dropout=True,
                        custom_ops=['gather', 'attention'])
    popart_model = Bert(config, builder=builder)

    sequence_info = popart.TensorInfo(
        "UINT32", [config.batch_size * config.sequence_length])
    indices = builder.addInputTensor(sequence_info)
    positions = builder.addInputTensor(sequence_info)
    data = {
        indices:
        np.random.randint(0, config.vocab_length,
                          (config.batch_size * config.sequence_length)).astype(
                              np.uint32),
        positions:
        np.random.randint(0, config.sequence_length,
                          (config.batch_size * config.sequence_length)).astype(
                              np.uint32)
    }

    output = popart_model.build_graph(indices, positions)[0]

    losses = [popart.L1Loss(output, "l1LossVal", 0.1)]

    for loss in losses:
        loss.virtualGraph(popart_model.ipu)

    proto = popart_model.builder.getModelProto()
    optimizer = popart.SGD(0.00001)

    ipus = math.ceil(config.num_layers / config.layers_per_ipu) \
        + popart_model.layer_offset

    # Analagous to run_py, but only the setup stages
    print("Creating session and compiling graph")
    session, anchors, device = create_session(proto,
                                              data,
                                              output,
                                              optimizer,
                                              losses,
                                              ipus=ipus)

    print("Running with opimiser updates")
    times_with_optimiser = timed_run_steps(session,
                                           anchors,
                                           data,
                                           0.1,
                                           num_steps=num_steps)
    print("Running without opimiser updates")
    times_no_optimiser = timed_run_steps(session,
                                         anchors,
                                         data,
                                         None,
                                         num_steps=num_steps)

    device.detach()

    # Convert seconds to milliseconds.
    opt_np = 1000 * times_with_optimiser
    noopt_np = 1000 * times_no_optimiser

    print(f"W/  Optimiser Update")
    print(f"\tMean: {opt_np.mean():.5f}")
    print(f"\tSum:  {opt_np.sum():.5f}")
    print(f"\tRng: {opt_np.min():.5f} -> {opt_np.max():.5f}")

    print(f"W/o  Optimiser Update")
    print(f"\tMean: {noopt_np.mean():.5f}")
    print(f"\tSum:  {noopt_np.sum():.5f}")
    print(f"\tRng: {noopt_np.min():.5f} -> {noopt_np.max():.5f}")

    mean_diff = opt_np.mean() - noopt_np.mean()
    percentage_difference = 100 * mean_diff / noopt_np.mean()
    print(
        f"Mean difference, {mean_diff:.5f}ms (~{percentage_difference:.1f}%)")

    assert (percentage_difference < 5)
Exemple #4
0
def main(args):
    set_library_seeds(args.seed)

    config = bert_config_from_args(args)

    initializers = bert_pretrained_initialisers(config, args)

    logger.info("Building Model")
    model = Bert(config, pipeline=args.pipeline, initializers=initializers)

    if not config.use_packed_sequence_format:
        # If config.host_embedding is enabled, indices and positions will have the matrices instead of the index vector.
        indices, positions, segments, masks, labels = bert_add_inputs(
            args, model)
        logits = model.build_graph(indices, positions, segments, masks)
        outputs, accuracies, losses, final_loss, writer = bert_add_outputs(
            args, model, logits, labels)
        dataset = get_bert_dataset(
            model, args, [indices, positions, segments, masks, labels])

    else:  # use_packed_sequence_format
        if args.task != "PRETRAINING":
            raise RuntimeError(
                "Packed sequence format currently only supported for pretraining."
            )
        input_tensor_shapes = packed_bert_utils.add_inputs(model)
        logits = packed_bert_utils.logits_graph(model)
        losses, accuracies, final_loss, outputs = packed_bert_utils.pretraining_loss_and_accuracy(
            model, logits)
        writer = bert_writer(args) if not args.inference else None
        dataset = get_pretraining_dataset(args, input_tensor_shapes)

    device = acquire_device(args, bert_required_ipus(args, model))

    logger.info(f"Dataset length: {len(dataset)}")

    data_flow = popart.DataFlow(args.batches_per_step, outputs)

    iteration = bert_iteration(args, dataset, writer)

    if args.inference:
        session, anchors = bert_inference_session(model, args, data_flow,
                                                  device)
        logger.info("Inference Started")
        inputs = [indices, positions, segments, *masks, *labels]
        bert_infer_loop(args, session, dataset, inputs, logits, anchors,
                        accuracies, losses, iteration)
        device.detach()
    else:
        if not args.no_training:
            optimizer_factory = bert_optimizer_factory(args, model, iteration)
            if args.save_initializers_externally:
                save_dir = Path(args.checkpoint_dir,
                                f'model_{args.continue_training_from_epoch}')
                save_dir.mkdir(parents=True, exist_ok=True)
                weight_tensors = [
                    item for sublist in model.tensors.values()
                    for item in sublist
                ]
                vars_path = f'vars_{args.continue_training_from_epoch}.onnx'
                vars_path = os.path.join(save_dir, vars_path)
                model.builder.saveInitializersExternally(
                    weight_tensors, vars_path)

            session, anchors = bert_training_session(model, args, data_flow,
                                                     final_loss, device,
                                                     optimizer_factory)
            logger.info("Training Started")
            bert_train_loop(args, session, writer, dataset, accuracies, losses,
                            anchors, iteration, optimizer_factory)

            save_model(args, session, iteration.count)
            if args.wandb_save_checkpoints:
                artifact = wandb.Artifact(name=args.wandb_save_checkpoints,
                                          type="model")
                artifact.add_dir(args.checkpoint_dir)
                wandb.log_artifact(artifact)

            device.detach()
            logger.info("Training Finished")

    return session, iteration