Beispiel #1
0
def bert_process_data(args, session, labels, data, anchors, losses,
                      predictions, iteration: Iteration,
                      optimizer_factory: ScheduledOptimizerFactory):
    labels_data = [data[label] for label in labels]
    if not np.any([np.any(label) for label in labels_data]):
        # Label may be all padding due to args.vocab_length being smaller than when the data was generated
        return

    stepio = popart.PyStepIO(data, anchors)

    start = time.time()
    session.run(stepio)
    duration = time.time() - start
    hw_cycles = session.getCycleCount() if args.report_hw_cycle_count else None

    iteration.add_stats(duration, hw_cycles, labels_data, anchors, losses,
                        predictions)

    if (iteration.count % iteration.steps_per_log) == 0:
        iteration.report_stats()

    utils.fetch_reports(args, session=session, execution=True)

    # The following will only be true if:
    #   Learning rate mode is STEP and the current total step counter is in the schedule
    #   Learning rate mode is EPOCH and the current epoch has just changed to one in the schedule
    if optimizer_factory.should_update(iteration):
        optimizer = optimizer_factory.update_and_create(iteration)
        session.updateOptimizerFromHost(optimizer)

    iteration.count += 1
Beispiel #2
0
def run_scheduled_optimizer_factory_case(config,
                                         iteration,
                                         epoch_truth=None,
                                         step_truth=None,
                                         option_name="defaultLearningRate"):
    """Runs a single case of the schedule optimizer factory tests. Simulates running through
    every step of every epoch required by the config, and updates the optimizer factory as
    defined by the schedule. Then checks the optimizer parameters to ensure they are correct."""
    if epoch_truth is None:
        epoch_truth = {}

    if step_truth is None:
        step_truth = {}

    factory = ScheduledOptimizerFactory(config, iteration)
    for iteration.epoch in range(iteration.epochs):
        for _ in range(iteration.steps_per_epoch):
            if factory.should_update(iteration):
                factory.update(iteration)

            if iteration.count in step_truth:
                lr = factory.option_values[option_name]
                assert lr == step_truth[iteration.count]
            iteration.count += 1

        if iteration.epoch in epoch_truth:
            lr = factory.option_values[option_name]
            assert lr == epoch_truth[iteration.epoch]
Beispiel #3
0
    def test_case(config, iteration, epoch_truth={}, step_truth={}, option_name="defaultLearningRate"):
        factory = ScheduledOptimizerFactory(config, iteration)
        for iteration.epoch in range(iteration.epochs):
            for step in range(iteration.steps_per_epoch):
                if factory.should_update(iteration):
                    factory.update(iteration)

                if iteration.count in step_truth:
                    lr = factory.option_values[option_name]
                    assert(lr == step_truth[iteration.count])
                iteration.count += 1

            if iteration.epoch in epoch_truth:
                lr = factory.option_values[option_name]
                assert(lr == epoch_truth[iteration.epoch])
def training_run(bert_args, config, initializers, checkpoint_paths):
    logger.info("Building Model")
    model = Bert(config,
                 builder=popart.Builder(opsets={
                     "ai.onnx": 9,
                     "ai.onnx.ml": 1,
                     "ai.graphcore": 1
                 }),
                 initializers=initializers,
                 execution_mode=bert_args.execution_mode)

    indices, positions, segments, masks, labels = bert_add_inputs(
        bert_args, model)
    logits = bert_logits_graph(model, indices, positions, segments, masks,
                               bert_args.execution_mode)

    predictions, probs = bert_infer_graph(model, logits)
    losses = bert_loss_graph(model, probs, labels)
    outputs = bert_add_validation_outputs(model, predictions, losses)

    embedding_dict, positional_dict = model.get_model_embeddings()
    dataset = get_bert_dataset(model, bert_args,
                               [indices, positions, segments, masks, labels],
                               embedding_dict, positional_dict)

    data_flow = popart.DataFlow(dataset.batches_per_step, outputs)

    request_ipus, _ = calc_required_ipus(bert_args, model)
    device = acquire_device(bert_args, request_ipus)

    logger.info(f"Dataset length: {len(dataset)}")

    writer = bert_writer(bert_args)
    iteration = Iteration(
        bert_args,
        batches_per_step=dataset.batches_per_step,
        steps_per_epoch=len(dataset),
        writer=writer,
        recording_steps=bert_args.aggregate_metrics_over_steps)
    optimizer_factory = ScheduledOptimizerFactory(bert_args, iteration, "SGD",
                                                  model.tensors)
    session, anchors = bert_training_session(model, bert_args, data_flow,
                                             losses, device, optimizer_factory)

    for path in checkpoint_paths:
        ckpt_name = os.path.splitext(os.path.basename(path))[0]
        session.resetHostWeights(os.path.abspath(path))
        session.weightsFromHost()

        logger.info(f"Fine-tuning started for checkpoint: {path}")

        run_fine_tuning_store_ckpt(bert_args, model, ckpt_name, session,
                                   dataset, predictions, losses, labels,
                                   anchors)

    device.detach()
Beispiel #5
0
    def test_case(config):
        iteration = MockIteration(20, 100)
        factory = ScheduledOptimizerFactory(config, iteration)

        expected_non_const = []
        if config.lr_schedule_by_epoch is not None or config.lr_schedule_by_step is not None:
            expected_non_const.append("defaultLearningRate")
        if config.ls_schedule_by_epoch is not None or config.ls_schedule_by_step is not None:
            expected_non_const.append("lossScaling")

        optimizer_options = factory.optimizer_options

        for key, value in optimizer_options.items():
            assert(not value[1] if key in expected_non_const else value[1])
Beispiel #6
0
def test_scheduled_optimiser_params_const_flag(config):
    """Check that scheduled parameters are correctly set to non-const, with others remaining const"""
    iteration = MockIteration(20, 100)
    factory = ScheduledOptimizerFactory(config, iteration)

    expected_non_const = []
    if config.lr_schedule_by_epoch is not None or config.lr_schedule_by_step is not None:
        expected_non_const.append("defaultLearningRate")
    if config.ls_schedule_by_epoch is not None or config.ls_schedule_by_step is not None:
        expected_non_const.append("lossScaling")

    optimizer_options = factory.optimizer_options

    for key, value in optimizer_options.items():
        assert not value[1] if key in expected_non_const else value[1]
Beispiel #7
0
def test_schedule_with_continue_from_epoch(start_epoch, steps_per_epoch, num_epochs, lr_schedule, expected):

    config = TestConfig(**{
        "continue_training_from_epoch": start_epoch,
        "epochs": num_epochs,
        "lr_schedule_by_step": lr_schedule
    })

    iteration = Iteration(
        config,
        batches_per_step=10,
        steps_per_epoch=steps_per_epoch,
        writer=None,
        recording_steps=1)

    factory = ScheduledOptimizerFactory(config, iteration)
    lr = factory.option_values["defaultLearningRate"]
    assert(lr == expected)
Beispiel #8
0
def test_schedule_with_continue_from_epoch(start_epoch, steps_per_epoch,
                                           num_epochs, lr_schedule, expected):
    """Make sure the optimiser restarts the schedule from the correct point when resuming training
    from a given epoch"""

    config = TestConfig(
        **{
            "continue_training_from_epoch": start_epoch,
            "epochs": num_epochs,
            "lr_schedule_by_step": lr_schedule,
            "batches_per_step": 10
        })

    iteration = Iteration(config,
                          steps_per_epoch=steps_per_epoch,
                          writer=None,
                          recording_steps=1)

    factory = ScheduledOptimizerFactory(config, iteration)
    lr = factory.option_values["defaultLearningRate"]
    assert lr == expected
Beispiel #9
0
def run_fine_tuning_store_ckpt(bert_args, model, ckpt_name, session, dataset,
                               predictions, losses, labels, anchors):

    writer = bert_writer(bert_args)
    iteration = Iteration(
        bert_args,
        batches_per_step=dataset.batches_per_step,
        steps_per_epoch=len(dataset),
        writer=writer,
        recording_steps=bert_args.aggregate_metrics_over_steps)
    optimizer_factory = ScheduledOptimizerFactory(bert_args, iteration,
                                                  model.tensors)

    for iteration.epoch in range(iteration.start_epoch, bert_args.epochs):
        for data in dataset:
            bert_process_data(bert_args, session, labels, data, anchors,
                              losses, predictions, iteration,
                              optimizer_factory)

    model_fn = os.path.join(bert_args.checkpoint_dir, "squad_output",
                            f"squad_final_{ckpt_name}.onnx")
    session.modelToHost(model_fn)
Beispiel #10
0
def main(args):
    set_library_seeds(args.seed)

    config = bert_config_from_args(args)

    initializers = bert_pretrained_initialisers(config, args)

    logger.info("Building Model")
    # Specifying ai.onnx opset9 for the slice syntax
    model = Bert(config,
                 builder=popart.Builder(opsets={
                     "ai.onnx": 9,
                     "ai.onnx.ml": 1,
                     "ai.graphcore": 1
                 }),
                 initializers=initializers,
                 execution_mode=args.execution_mode)

    # If config.host_embedding is enabled, indices and positions will have the matrices instead of the index vector.
    indices, positions, segments, masks, labels = bert_add_inputs(args, model)
    logits = bert_logits_graph(model, indices, positions, segments, masks)

    if args.inference:

        predictions = None
        losses = []
        if args.task == "PRETRAINING":
            # If this is a pretraining session, labels for NSP and MLM are already within the dataset,
            # so we can always calculate prediction performance
            predictions, _ = bert_infer_graph(model,
                                              logits,
                                              include_probs=False)

            if args.inference_lm_perplexity:
                losses = bert_perplexity_graph(model, logits, labels)

            outputs = bert_add_validation_outputs(model, predictions, losses)
        else:
            if args.inference_lm_perplexity:
                raise RuntimeError(
                    "Masked LM perplexity is only supported in pretraining.")

            outputs = bert_add_logit_outputs(model, logits)

        writer = None
    else:
        predictions, probs = bert_infer_graph(model, logits)
        losses = bert_loss_graph(model, probs, labels)
        outputs = bert_add_validation_outputs(model, predictions, losses)
        writer = bert_writer(args)

    embedding_dict, positional_dict = model.get_model_embeddings()

    dataset = get_bert_dataset(model, args,
                               [indices, positions, segments, masks, labels],
                               embedding_dict, positional_dict,
                               config.host_embedding == "MERGE")
    logger.info(f"Dataset length: {len(dataset)}")

    data_flow = popart.DataFlow(dataset.batches_per_step, outputs)

    iteration = Iteration(args,
                          batches_per_step=dataset.batches_per_step,
                          steps_per_epoch=len(dataset),
                          writer=writer,
                          recording_steps=args.aggregate_metrics_over_steps)

    request_ipus, required_ipus = calc_required_ipus(args, model)

    device = acquire_device(args, request_ipus)

    if args.inference:
        session, anchors = bert_inference_session(model, args, data_flow,
                                                  device)
        logger.info("Inference Started")
        inputs = [indices, positions, segments, *masks, *labels]
        bert_infer_loop(args, session, dataset, inputs, logits, anchors,
                        labels, predictions, losses, iteration)
        device.detach()
    else:
        if not args.no_training:
            optimizer_factory = ScheduledOptimizerFactory(
                args, iteration, model.tensors)

            session, anchors = bert_training_session(model, args, data_flow,
                                                     losses, device,
                                                     optimizer_factory)
            logger.info("Training Started")
            bert_train_loop(args, session, writer, dataset, labels,
                            predictions, losses, anchors, iteration,
                            optimizer_factory)

            device.detach()
            logger.info("Training Finished")

    return session, iteration
Beispiel #11
0
    def test(config, iteration, true_scaling, test_case):
        builder = popart.Builder()

        w0name = "weight_0"
        w1name = "weight_1"
        w2name = "weight_2"

        input0Shape = [1, 1, 1]
        input0 = builder.addInputTensor(
            popart.TensorInfo("FLOAT", input0Shape), "input0")

        w0data = np.array([test_case[0][0]], dtype=np.float32)
        w0R = np.empty([
            1,
        ], dtype=np.float32)
        w0Id = builder.addInitializedInputTensor(w0data, w0name)

        w1data = np.array([test_case[1][0]], dtype=np.float32)
        w1R = np.empty([
            1,
        ], dtype=np.float32)
        w1Id = builder.addInitializedInputTensor(w1data, w1name)

        w2data = np.array([test_case[2][0]], dtype=np.float32)
        w2R = np.empty([
            1,
        ], dtype=np.float32)
        w2Id = builder.addInitializedInputTensor(w2data, w2name)

        add0 = builder.aiOnnx.add([w0Id, input0])
        add1 = builder.aiOnnx.add([w1Id, add0])
        add2 = builder.aiOnnx.add([w2Id, add1])
        loss = builder.aiGraphcore.l1loss([add2],
                                          1.0,
                                          debugContext="l1LossVal")
        builder.addOutputTensor(add2)

        proto = builder.getModelProto()
        dataFlow = popart.DataFlow(1, {})
        opts = popart.SessionOptions()
        opts.reportOptions = {"showExecutionSteps": "true"}
        pat = popart.Patterns(popart.PatternsLevel.Default)
        dm = popart.DeviceManager()
        dm.setOnDemandAttachTimeout(int(1e4))
        device = dm.acquireAvailableDevice(
            1,
            connectionType=popart.DeviceConnectionType.OnDemand,
            selectionCriterion=popart.DeviceSelectionCriterion.Random)
        if device is None:
            raise OSError("Failed to acquire IPU.")

        # The stage->tensor map would come from the Bert model in reality
        # (see model.tensors)
        mock_tensor_map = {0: [w0Id], 1: [w1Id], 2: [w2Id]}

        factory = ScheduledOptimizerFactory(config,
                                            iteration,
                                            tensors=mock_tensor_map)
        assert_scaled_lr(factory, true_scaling)

        optimizer_step0 = factory.create()

        session = popart.TrainingSession(fnModel=proto,
                                         dataFlow=dataFlow,
                                         userOptions=opts,
                                         loss=loss,
                                         optimizer=optimizer_step0,
                                         patterns=pat,
                                         deviceInfo=device)

        session.prepareDevice()
        session.weightsFromHost()
        anchors = session.initAnchorArrays()

        input_data = np.array([3.1415], dtype=np.float32)
        stepio = popart.PyStepIO({input0: input_data}, anchors)

        for step in range(iteration.total_steps):
            session.run(stepio)
            session.weightsToHost()
            weightsRead = popart.PyWeightsIO({w0Id: w0R, w1Id: w1R, w2Id: w2R})
            session.readWeights(weightsRead)

            assert (np.isclose(test_case[0][step + 1], w0R))
            assert (np.isclose(test_case[1][step + 1], w1R))
            assert (np.isclose(test_case[2][step + 1], w2R))

            iteration.count += 1

            if factory.should_update(iteration):
                optimizer_step1 = factory.update_and_create(iteration)
                assert_scaled_lr(factory, true_scaling)

                session.updateOptimizerFromHost(optimizer_step1)
Beispiel #12
0
def main(args):
    set_library_seeds(args.seed)

    config = bert_config_from_args(args)

    initializers = bert_pretrained_initialisers(config, args)

    logger.info("Building Model")
    # Specifying ai.onnx opset9 for the slice syntax
    # TODO: Change slice to opset10
    model = Bert(config,
                 builder=popart.Builder(
                     opsets={"ai.onnx": 9, "ai.onnx.ml": 1, "ai.graphcore": 1}),
                 initializers=initializers)

    indices, positions, segments, masks, labels = bert_add_inputs(args, model)
    logits = bert_logits_graph(model, indices, positions, segments, masks)

    if args.inference:
        outputs = bert_add_infer_outputs(model, logits)
        losses = []
        writer = None
    else:
        predictions, probs = bert_infer_graph(model, logits)
        losses = bert_loss_graph(model, probs, labels)
        outputs = bert_add_validation_outputs(model, predictions, losses)
        writer = bert_writer(args)

    dataset = get_bert_dataset(model, args, [indices, positions, segments, masks, labels])
    logger.info(f"Dataset length: {len(dataset)}")

    data_flow = popart.DataFlow(dataset.batches_per_step, outputs)

    iteration = Iteration(
        args,
        batches_per_step=dataset.batches_per_step,
        steps_per_epoch=len(dataset),
        writer=writer,
        recording_steps=args.aggregate_metrics_over_steps)

    request_ipus, required_ipus = calc_required_ipus(args, model)

    device = acquire_device(args, request_ipus)

    if args.inference:
        session, anchors = bert_inference_session(model, args, data_flow, losses, device)
        logger.info("Inference Started")
        bert_infer_loop(args, session,
                        dataset, logits, anchors,
                        iteration)
        device.detach()
    else:
        if not args.no_training:
            optimizer_factory = ScheduledOptimizerFactory(args,
                                                          iteration,
                                                          model.pipeline_stage_tensors)

            session, anchors = bert_training_session(model,
                                                     args,
                                                     data_flow,
                                                     losses,
                                                     device,
                                                     optimizer_factory)
            logger.info("Training Started")
            bert_train_loop(args, session, writer,
                            dataset, labels, predictions, losses, anchors,
                            iteration, optimizer_factory)

            device.detach()
            logger.info("Training Finished")
        if not args.no_validation:
            logger.info("Doing Validation")
            main(utils.get_validation_args(args))

    return session, iteration
Beispiel #13
0
def bert_optimizer_factory(args, model, iteration):
    if args.learning_rate_function == "Linear":
        return LinearOptimizerFactory(args, iteration, model.tensors)
    else:
        return ScheduledOptimizerFactory(args, iteration, model.tensors)