Esempio n. 1
0
def test_should_log_model(dataset_binomial, dataset_multinomial):
    mlflow.pyspark.ml.autolog(log_models=True)
    lor = LogisticRegression()

    ova1 = OneVsRest(classifier=lor)
    mlor_model = lor.fit(dataset_multinomial)
    assert _should_log_model(mlor_model)

    ova1_model = ova1.fit(dataset_multinomial)
    assert _should_log_model(ova1_model)

    with mock.patch(
            "mlflow.pyspark.ml._log_model_allowlist",
        {
            "pyspark.ml.regression.LinearRegressionModel",
            "pyspark.ml.classification.OneVsRestModel"
        },
    ), mock.patch("mlflow.pyspark.ml._logger.warning") as mock_warning:
        lr = LinearRegression()
        lr_model = lr.fit(dataset_binomial)
        assert _should_log_model(lr_model)
        lor_model = lor.fit(dataset_binomial)
        assert not _should_log_model(lor_model)
        mock_warning.called_once_with(
            _get_warning_msg_for_skip_log_model(lor_model))
        assert not _should_log_model(ova1_model)
def test_should_log_model(dataset_binomial, dataset_multinomial, dataset_text):
    mlflow.pyspark.ml.autolog(log_models=True)
    lor = LogisticRegression()

    ova1 = OneVsRest(classifier=lor)
    with mlflow.start_run():
        mlor_model = lor.fit(dataset_multinomial)
    assert _should_log_model(mlor_model)

    with mlflow.start_run():
        ova1_model = ova1.fit(dataset_multinomial)
    assert _should_log_model(ova1_model)

    tokenizer = Tokenizer(inputCol="text", outputCol="words")
    hashingTF = HashingTF(inputCol=tokenizer.getOutputCol(),
                          outputCol="features")
    lr = LogisticRegression(maxIter=2)
    pipeline = Pipeline(stages=[tokenizer, hashingTF, lr])
    with mlflow.start_run():
        pipeline_model = pipeline.fit(dataset_text)
    assert _should_log_model(pipeline_model)

    nested_pipeline = Pipeline(
        stages=[tokenizer, Pipeline(stages=[hashingTF, lr])])
    with mlflow.start_run():
        nested_pipeline_model = nested_pipeline.fit(dataset_text)
    assert _should_log_model(nested_pipeline_model)

    with mock.patch(
            "mlflow.pyspark.ml._log_model_allowlist",
        {
            "pyspark.ml.regression.LinearRegressionModel",
            "pyspark.ml.classification.OneVsRestModel",
            "pyspark.ml.pipeline.PipelineModel",
        },
    ), mock.patch("mlflow.pyspark.ml._logger.warning") as mock_warning:
        lr = LinearRegression()
        with mlflow.start_run():
            lr_model = lr.fit(dataset_binomial)
        assert _should_log_model(lr_model)
        with mlflow.start_run():
            lor_model = lor.fit(dataset_binomial)
        assert not _should_log_model(lor_model)
        mock_warning.called_once_with(
            _get_warning_msg_for_skip_log_model(lor_model))
        assert not _should_log_model(ova1_model)
        assert not _should_log_model(pipeline_model)
        assert not _should_log_model(nested_pipeline_model)
def test_should_log_model_with_wildcards_in_allowlist(dataset_binomial,
                                                      dataset_multinomial):
    mlflow.pyspark.ml.autolog(log_models=True)
    lor = LogisticRegression()
    ova1 = OneVsRest(classifier=lor)
    ova1_model = ova1.fit(dataset_multinomial)

    with mock.patch(
            "mlflow.pyspark.ml._log_model_allowlist",
        {
            "pyspark.ml.regression.*",
            "pyspark.ml.classification.LogisticRegressionModel",
            "pyspark.ml.feature.*",
        },
    ):
        lr = LinearRegression()
        with mlflow.start_run():
            lr_model = lr.fit(dataset_binomial)
        assert _should_log_model(lr_model)
        with mlflow.start_run():
            lor_model = lor.fit(dataset_binomial)
        assert _should_log_model(lor_model)
        assert not _should_log_model(ova1_model)