Esempio n. 1
0
def bert_process_data(args, session, labels, data, anchors, losses,
                      predictions, iteration: Iteration,
                      optimizer_factory: ScheduledOptimizerFactory):
    labels_data = [data[label] for label in labels]
    if not np.any([np.any(label) for label in labels_data]):
        # Label may be all padding due to args.vocab_length being smaller than when the data was generated
        return

    stepio = popart.PyStepIO(data, anchors)

    start = time.time()
    session.run(stepio)
    duration = time.time() - start
    hw_cycles = session.getCycleCount() if args.report_hw_cycle_count else None

    iteration.add_stats(duration, hw_cycles, labels_data, anchors, losses,
                        predictions)

    if (iteration.count % iteration.steps_per_log) == 0:
        iteration.report_stats()

    utils.fetch_reports(args, session=session, execution=True)

    # The following will only be true if:
    #   Learning rate mode is STEP and the current total step counter is in the schedule
    #   Learning rate mode is EPOCH and the current epoch has just changed to one in the schedule
    if optimizer_factory.should_update(iteration):
        optimizer = optimizer_factory.update_and_create(iteration)
        session.updateOptimizerFromHost(optimizer)

    iteration.count += 1
Esempio n. 2
0
def run_scheduled_optimizer_factory_case(config,
                                         iteration,
                                         epoch_truth=None,
                                         step_truth=None,
                                         option_name="defaultLearningRate"):
    """Runs a single case of the schedule optimizer factory tests. Simulates running through
    every step of every epoch required by the config, and updates the optimizer factory as
    defined by the schedule. Then checks the optimizer parameters to ensure they are correct."""
    if epoch_truth is None:
        epoch_truth = {}

    if step_truth is None:
        step_truth = {}

    factory = ScheduledOptimizerFactory(config, iteration)
    for iteration.epoch in range(iteration.epochs):
        for _ in range(iteration.steps_per_epoch):
            if factory.should_update(iteration):
                factory.update(iteration)

            if iteration.count in step_truth:
                lr = factory.option_values[option_name]
                assert lr == step_truth[iteration.count]
            iteration.count += 1

        if iteration.epoch in epoch_truth:
            lr = factory.option_values[option_name]
            assert lr == epoch_truth[iteration.epoch]
Esempio n. 3
0
    def test_case(config, iteration, epoch_truth={}, step_truth={}, option_name="defaultLearningRate"):
        factory = ScheduledOptimizerFactory(config, iteration)
        for iteration.epoch in range(iteration.epochs):
            for step in range(iteration.steps_per_epoch):
                if factory.should_update(iteration):
                    factory.update(iteration)

                if iteration.count in step_truth:
                    lr = factory.option_values[option_name]
                    assert(lr == step_truth[iteration.count])
                iteration.count += 1

            if iteration.epoch in epoch_truth:
                lr = factory.option_values[option_name]
                assert(lr == epoch_truth[iteration.epoch])
Esempio n. 4
0
    def test(config, iteration, true_scaling, test_case):
        builder = popart.Builder()

        w0name = "weight_0"
        w1name = "weight_1"
        w2name = "weight_2"

        input0Shape = [1, 1, 1]
        input0 = builder.addInputTensor(
            popart.TensorInfo("FLOAT", input0Shape), "input0")

        w0data = np.array([test_case[0][0]], dtype=np.float32)
        w0R = np.empty([
            1,
        ], dtype=np.float32)
        w0Id = builder.addInitializedInputTensor(w0data, w0name)

        w1data = np.array([test_case[1][0]], dtype=np.float32)
        w1R = np.empty([
            1,
        ], dtype=np.float32)
        w1Id = builder.addInitializedInputTensor(w1data, w1name)

        w2data = np.array([test_case[2][0]], dtype=np.float32)
        w2R = np.empty([
            1,
        ], dtype=np.float32)
        w2Id = builder.addInitializedInputTensor(w2data, w2name)

        add0 = builder.aiOnnx.add([w0Id, input0])
        add1 = builder.aiOnnx.add([w1Id, add0])
        add2 = builder.aiOnnx.add([w2Id, add1])
        loss = builder.aiGraphcore.l1loss([add2],
                                          1.0,
                                          debugContext="l1LossVal")
        builder.addOutputTensor(add2)

        proto = builder.getModelProto()
        dataFlow = popart.DataFlow(1, {})
        opts = popart.SessionOptions()
        opts.reportOptions = {"showExecutionSteps": "true"}
        pat = popart.Patterns(popart.PatternsLevel.Default)
        dm = popart.DeviceManager()
        dm.setOnDemandAttachTimeout(int(1e4))
        device = dm.acquireAvailableDevice(
            1,
            connectionType=popart.DeviceConnectionType.OnDemand,
            selectionCriterion=popart.DeviceSelectionCriterion.Random)
        if device is None:
            raise OSError("Failed to acquire IPU.")

        # The stage->tensor map would come from the Bert model in reality
        # (see model.tensors)
        mock_tensor_map = {0: [w0Id], 1: [w1Id], 2: [w2Id]}

        factory = ScheduledOptimizerFactory(config,
                                            iteration,
                                            tensors=mock_tensor_map)
        assert_scaled_lr(factory, true_scaling)

        optimizer_step0 = factory.create()

        session = popart.TrainingSession(fnModel=proto,
                                         dataFlow=dataFlow,
                                         userOptions=opts,
                                         loss=loss,
                                         optimizer=optimizer_step0,
                                         patterns=pat,
                                         deviceInfo=device)

        session.prepareDevice()
        session.weightsFromHost()
        anchors = session.initAnchorArrays()

        input_data = np.array([3.1415], dtype=np.float32)
        stepio = popart.PyStepIO({input0: input_data}, anchors)

        for step in range(iteration.total_steps):
            session.run(stepio)
            session.weightsToHost()
            weightsRead = popart.PyWeightsIO({w0Id: w0R, w1Id: w1R, w2Id: w2R})
            session.readWeights(weightsRead)

            assert (np.isclose(test_case[0][step + 1], w0R))
            assert (np.isclose(test_case[1][step + 1], w1R))
            assert (np.isclose(test_case[2][step + 1], w2R))

            iteration.count += 1

            if factory.should_update(iteration):
                optimizer_step1 = factory.update_and_create(iteration)
                assert_scaled_lr(factory, true_scaling)

                session.updateOptimizerFromHost(optimizer_step1)