Esempio n. 1
0
def test_load_pipeline_with_custom_head(training_dataset):
    """Testing a model training inserting a class as custom heard"""

    # Pipeline configuration dict with custom head
    config = PipelineConfiguration(
        "test-pipeline",
        head=TaskHeadConfiguration(
            type=MyCustomHead,
            labels=[
                "blue-collar",
                "technician",
                "management",
                "services",
                "retired",
                "admin.",
            ],
        ),
        features=FeaturesConfiguration(),
    )

    # Asserting that pipeline.head is an instance of MyCustomHead
    pipeline = Pipeline.from_config(config)
    assert isinstance(pipeline.head, MyCustomHead)

    # Training the model and saving it to output
    output = mkdtemp()
    pipeline.train(output=output, training=training_dataset)

    # Loading model from output
    trained_pl = Pipeline.from_pretrained(os.path.join(output, "model.tar.gz"))
    trained_pl.predict("Oh yeah")

    # Asserting that the pipeline head is recognized as `MyCustomHead` instance after loading from a model.tar.gz
    assert isinstance(trained_pl.head, MyCustomHead)
Esempio n. 2
0
def test_load_pipeline_with_custom_head():
    config = PipelineConfiguration(
        "test-pipeline",
        head=TaskHeadConfiguration(
            type=MyCustomHead,
            labels=[
                "blue-collar",
                "technician",
                "management",
                "services",
                "retired",
                "admin.",
            ],
        ),
        features=FeaturesConfiguration(),
    )

    pipeline = Pipeline.from_config(config)
    assert isinstance(pipeline.head, MyCustomHead)

    train = DataSource(
        source=os.path.join(TEST_RESOURCES,
                            "resources/data/dataset_source.csv"),
        mapping={
            "label": "job",
            "text": ["education", "marital"]
        },
    )
    output = mkdtemp()
    pipeline.create_vocabulary(VocabularyConfiguration(sources=[train]))
    pipeline.train(output=output, training=train)

    trained_pl = Pipeline.from_pretrained(os.path.join(output, "model.tar.gz"))
    trained_pl.predict("Oh yeah")
    assert isinstance(trained_pl.head, MyCustomHead)
Esempio n. 3
0
def test_mlflow_logger():

    logger = MlflowLogger(experiment_name="test-experiment",
                          run_name="test_run",
                          tag1="my-tag")

    pipeline = Pipeline.from_config(
        PipelineConfiguration(
            name="test-pipeline",
            head=TaskHeadConfiguration(type=TextClassification,
                                       labels=["A", "B"]),
        ))
    trainer = TrainerConfiguration()

    logger.init_train(pipeline, trainer, training=None)
    for epoch in range(0, 10):
        logger.log_epoch_metrics(epoch, metrics={"key": 10 * epoch})

    model_path = mkdtemp()
    metrics = {"metric": 200}
    logger.end_train(TrainingResults(model_path, metrics))

    run = mlflow.get_run(logger._run_id)
    assert run
    # Tags
    assert "test_run" == run.data.tags[mlflow_tags.MLFLOW_RUN_NAME]
    assert "my-tag" == run.data.tags["tag1"]
    # Parameters
    expected_parmams = {
        "pipeline.features.word.trainable": "True",
        "pipeline.num_parameters": "202",
        "pipeline.num_trainable_parameters": "202",
        "pipeline.features.word.embedding_dim": "50",
        "pipeline.head.type":
        "biome.text.modules.heads.classification.text_classification.TextClassification",
        "pipeline.head.labels": "['A', 'B']",
        "pipeline.name": "test-pipeline",
        "pipeline.tokenizer.lang": "en",
        "trainer.batch_size": "16",
        "trainer.validation_metric": "-loss",
        "trainer.optimizer.type": "adam",
        "trainer.patience": "2",
        "trainer.num_epochs": "20",
        "trainer.num_serialized_models_to_keep": "1",
        "pipeline.tokenizer.remove_space_tokens": "True",
    }
    assert expected_parmams == run.data.params
    # Artifacts
    assert os.path.basename(model_path) in os.listdir(
        urlparse(run.info.artifact_uri).path)
    # Metrics
    for metric in metrics:
        assert (metric in run.data.metrics
                and run.data.metrics[metric] == metrics[metric])
Esempio n. 4
0
def test_explain_without_steps():
    pipeline_config = PipelineConfiguration(
        name="test-classifier",
        head=TaskHeadConfiguration(type=TestHeadWithRaise),
        features=FeaturesConfiguration(),
    )
    pipeline = Pipeline.from_config(pipeline_config)
    with pytest.raises(NotImplementedError):
        pipeline.explain("This is a simple test with only tokens in explain")

    prediction = pipeline.explain(
        "This is a simple test with only tokens in explain", n_steps=0)
    assert "explain" in prediction
Esempio n. 5
0
def test_predict_batch():
    pipeline_config = PipelineConfiguration(
        name="test-classifier",
        head=TaskHeadConfiguration(type=TestHead),
        features=FeaturesConfiguration(),
    )
    pipeline = Pipeline.from_config(pipeline_config)
    predictions = pipeline.predict_batch([{
        "text": "test1"
    }, {
        "text": "test2"
    }])

    assert len(predictions) == 2
    assert all([isinstance(prediction, dict) for prediction in predictions])
Esempio n. 6
0
def test_explain_tokenized_as_default():
    pipeline_config = PipelineConfiguration(
        name="test-classifier",
        head=TaskHeadConfiguration(type=TestHead),
        features=FeaturesConfiguration(),
    )
    pipeline = Pipeline.from_config(pipeline_config)
    prediction = pipeline.explain(
        "This is a simple test with only tokens in explain")
    explain = prediction["explain"]

    assert explain
    assert explain.get("text")
    for token_info in explain["text"]:
        assert isinstance(token_info.get("token"), str)
        assert token_info.get("attribution") == 0.0
def test_check_pipeline_inputs_and_output():
    config = PipelineConfiguration(
        "test-pipeline",
        head=TaskHeadConfiguration(
            type=MyCustomHead,
            labels=[
                "blue-collar",
                "technician",
                "management",
                "services",
                "retired",
                "admin.",
            ],
        ),
        features=FeaturesConfiguration(),
    )

    pipeline = Pipeline.from_config(config)

    assert pipeline.inputs == ["text", "second_text"]
    assert pipeline.output == "label"
Esempio n. 8
0
def test_explain_batch():
    pipeline_config = PipelineConfiguration(
        name="test-classifier",
        head=TaskHeadConfiguration(type=TestHead),
        features=FeaturesConfiguration(),
    )
    pipeline = Pipeline.from_config(pipeline_config)
    predictions = pipeline.explain_batch([{
        "text": "test1"
    }, {
        "text": "test2"
    }])
    assert len(predictions) == 2

    for prediction in predictions:
        explain: Dict[str, Any] = prediction["explain"]

        assert explain
        assert explain.get("text")
        for token_info in explain["text"]:
            assert isinstance(token_info.get("token"), str)
            assert token_info.get("attribution") == 0.0
Esempio n. 9
0
def pipeline() -> Pipeline:
    config = PipelineConfiguration(
        name="test-classifier",
        head=TextClassificationConfiguration(labels=["one", "zero"]),
    )
    return Pipeline.from_config(config)