def test_should_log_model(dataset_binomial, dataset_multinomial): mlflow.pyspark.ml.autolog(log_models=True) lor = LogisticRegression() ova1 = OneVsRest(classifier=lor) mlor_model = lor.fit(dataset_multinomial) assert _should_log_model(mlor_model) ova1_model = ova1.fit(dataset_multinomial) assert _should_log_model(ova1_model) with mock.patch( "mlflow.pyspark.ml._log_model_allowlist", { "pyspark.ml.regression.LinearRegressionModel", "pyspark.ml.classification.OneVsRestModel" }, ), mock.patch("mlflow.pyspark.ml._logger.warning") as mock_warning: lr = LinearRegression() lr_model = lr.fit(dataset_binomial) assert _should_log_model(lr_model) lor_model = lor.fit(dataset_binomial) assert not _should_log_model(lor_model) mock_warning.called_once_with( _get_warning_msg_for_skip_log_model(lor_model)) assert not _should_log_model(ova1_model)
def test_should_log_model(dataset_binomial, dataset_multinomial, dataset_text): mlflow.pyspark.ml.autolog(log_models=True) lor = LogisticRegression() ova1 = OneVsRest(classifier=lor) with mlflow.start_run(): mlor_model = lor.fit(dataset_multinomial) assert _should_log_model(mlor_model) with mlflow.start_run(): ova1_model = ova1.fit(dataset_multinomial) assert _should_log_model(ova1_model) tokenizer = Tokenizer(inputCol="text", outputCol="words") hashingTF = HashingTF(inputCol=tokenizer.getOutputCol(), outputCol="features") lr = LogisticRegression(maxIter=2) pipeline = Pipeline(stages=[tokenizer, hashingTF, lr]) with mlflow.start_run(): pipeline_model = pipeline.fit(dataset_text) assert _should_log_model(pipeline_model) nested_pipeline = Pipeline( stages=[tokenizer, Pipeline(stages=[hashingTF, lr])]) with mlflow.start_run(): nested_pipeline_model = nested_pipeline.fit(dataset_text) assert _should_log_model(nested_pipeline_model) with mock.patch( "mlflow.pyspark.ml._log_model_allowlist", { "pyspark.ml.regression.LinearRegressionModel", "pyspark.ml.classification.OneVsRestModel", "pyspark.ml.pipeline.PipelineModel", }, ), mock.patch("mlflow.pyspark.ml._logger.warning") as mock_warning: lr = LinearRegression() with mlflow.start_run(): lr_model = lr.fit(dataset_binomial) assert _should_log_model(lr_model) with mlflow.start_run(): lor_model = lor.fit(dataset_binomial) assert not _should_log_model(lor_model) mock_warning.called_once_with( _get_warning_msg_for_skip_log_model(lor_model)) assert not _should_log_model(ova1_model) assert not _should_log_model(pipeline_model) assert not _should_log_model(nested_pipeline_model)
def test_should_log_model_with_wildcards_in_allowlist(dataset_binomial, dataset_multinomial): mlflow.pyspark.ml.autolog(log_models=True) lor = LogisticRegression() ova1 = OneVsRest(classifier=lor) ova1_model = ova1.fit(dataset_multinomial) with mock.patch( "mlflow.pyspark.ml._log_model_allowlist", { "pyspark.ml.regression.*", "pyspark.ml.classification.LogisticRegressionModel", "pyspark.ml.feature.*", }, ): lr = LinearRegression() with mlflow.start_run(): lr_model = lr.fit(dataset_binomial) assert _should_log_model(lr_model) with mlflow.start_run(): lor_model = lor.fit(dataset_binomial) assert _should_log_model(lor_model) assert not _should_log_model(ova1_model)