Example #1
0
def test_training_from_pretrained_with_head_replace(pipeline: Pipeline,
                                                    dataset: Dataset,
                                                    tmp_path: str):
    configuration = TrainerConfiguration(
        data_bucketing=True,
        batch_size=2,
        num_epochs=5,
        cuda_device=-1,
    )
    output_dir = os.path.join(tmp_path, "output")
    pipeline.train(output=output_dir,
                   trainer=configuration,
                   training=dataset,
                   quiet=True)

    pipeline.set_head(TestHead)
    pipeline.config.tokenizer_config.max_nr_of_sentences = 3
    copied = pipeline.copy()
    assert isinstance(copied.head, TestHead)
    assert copied.num_parameters == pipeline.num_parameters
    assert copied.num_trainable_parameters == pipeline.num_trainable_parameters
    copied_model_state = copied._model.state_dict()
    original_model_state = pipeline._model.state_dict()
    for key, value in copied_model_state.items():
        if "backbone" in key:
            assert torch.all(torch.eq(value, original_model_state[key]))
    assert copied.backbone.featurizer.tokenizer.config.max_nr_of_sentences == 3
Example #2
0
def test_training_with_data_bucketing(pipeline: Pipeline, dataset: Dataset,
                                      tmp_path: str):
    configuration = TrainerConfiguration(data_bucketing=True,
                                         batch_size=2,
                                         num_epochs=5)

    pipeline.copy().train(
        output=os.path.join(tmp_path, "output"),
        trainer=configuration,
        training=dataset,
        validation=dataset,
        lazy=False,
    )

    pipeline.copy().train(
        output=os.path.join(tmp_path, "output"),
        trainer=configuration,
        training=dataset,
        validation=dataset,
        lazy=True,
    )