def test_get_params_to_log(spark_session):  # pylint: disable=unused-argument
    lor = LogisticRegression(maxIter=3, standardization=False)
    lor_params = get_params_to_log(lor)
    assert (
        lor_params["maxIter"] == 3
        and not lor_params["standardization"]
        and lor_params["family"] == lor.getOrDefault(lor.family)
    )

    ova = OneVsRest(classifier=lor, labelCol="abcd")
    ova_params = get_params_to_log(ova)
    assert (
        ova_params["classifier"] == "LogisticRegression"
        and ova_params["labelCol"] == "abcd"
        and ova_params["LogisticRegression.maxIter"] == 3
        and ova_params["LogisticRegression.family"] == lor.getOrDefault(lor.family)
    )

    tokenizer = Tokenizer(inputCol="text", outputCol="words")
    hashingTF = HashingTF(inputCol=tokenizer.getOutputCol(), outputCol="features")
    pipeline = Pipeline(stages=[tokenizer, hashingTF, ova])
    inner_pipeline = Pipeline(stages=[hashingTF, ova])
    nested_pipeline = Pipeline(stages=[tokenizer, inner_pipeline])

    pipeline_params = get_params_to_log(pipeline)
    nested_pipeline_params = get_params_to_log(nested_pipeline)

    assert pipeline_params["stages"] == ["Tokenizer", "HashingTF", "OneVsRest"]
    assert nested_pipeline_params["stages"] == ["Tokenizer", "Pipeline_2"]
    assert nested_pipeline_params["Pipeline_2.stages"] == ["HashingTF", "OneVsRest"]
    assert nested_pipeline_params["OneVsRest.classifier"] == "LogisticRegression"

    for params_to_test in [pipeline_params, nested_pipeline_params]:
        assert (
            params_to_test["Tokenizer.inputCol"] == "text"
            and params_to_test["Tokenizer.outputCol"] == "words"
        )
        assert params_to_test["HashingTF.outputCol"] == "features"
        assert params_to_test["OneVsRest.classifier"] == "LogisticRegression"
        assert params_to_test["LogisticRegression.maxIter"] == 3
def test_get_instance_param_map(spark_session):  # pylint: disable=unused-argument
    lor = LogisticRegression(maxIter=3, standardization=False)
    lor_params = _get_instance_param_map(lor)
    assert (lor_params["maxIter"] == 3 and not lor_params["standardization"]
            and lor_params["family"] == lor.getOrDefault(lor.family))

    ova = OneVsRest(classifier=lor, labelCol="abcd")
    ova_params = _get_instance_param_map(ova)
    assert (ova_params["classifier"] == lor.uid
            and ova_params["labelCol"] == "abcd"
            and ova_params[f"{lor.uid}.maxIter"] == 3 and
            ova_params[f"{lor.uid}.family"] == lor.getOrDefault(lor.family))

    tokenizer = Tokenizer(inputCol="text", outputCol="words")
    hashingTF = HashingTF(inputCol=tokenizer.getOutputCol(),
                          outputCol="features")
    pipeline = Pipeline(stages=[tokenizer, hashingTF, ova])
    inner_pipeline = Pipeline(stages=[hashingTF, ova])
    nested_pipeline = Pipeline(stages=[tokenizer, inner_pipeline])

    pipeline_params = _get_instance_param_map(pipeline)
    nested_pipeline_params = _get_instance_param_map(nested_pipeline)

    assert pipeline_params["stages"] == [tokenizer.uid, hashingTF.uid, ova.uid]
    assert nested_pipeline_params["stages"] == [
        tokenizer.uid,
        {
            inner_pipeline.uid: [hashingTF.uid, ova.uid]
        },
    ]

    for params_to_test in [pipeline_params, nested_pipeline_params]:
        assert (params_to_test[f"{tokenizer.uid}.inputCol"] == "text"
                and params_to_test[f"{tokenizer.uid}.outputCol"] == "words")
        assert params_to_test[f"{hashingTF.uid}.outputCol"] == "features"
        assert params_to_test[f"{ova.uid}.classifier"] == lor.uid
        assert params_to_test[f"{lor.uid}.maxIter"] == 3
def test_param_map_captures_wrapped_params(dataset_binomial):
    lor = LogisticRegression(maxIter=3, standardization=False)
    ova = OneVsRest(classifier=lor, labelCol="abcd")

    param_map = _get_instance_param_map(ova)
    assert param_map["labelCol"] == "abcd"
    assert param_map["classifier"] == lor.uid
    assert param_map[f"{lor.uid}.maxIter"] == 3
    assert not param_map[f"{lor.uid}.standardization"]
    assert param_map[f"{lor.uid}.tol"] == lor.getOrDefault(lor.tol)

    mlflow.pyspark.ml.autolog()
    with mlflow.start_run() as run:
        ova.fit(dataset_binomial.withColumn("abcd", dataset_binomial.label))
    run_id = run.info.run_id
    run_data = get_run_data(run_id)
    assert run_data.params == truncate_param_dict(
        stringify_dict_values(_get_instance_param_map(ova)))
def test_param_map_captures_wrapped_params(dataset_binomial):
    lor = LogisticRegression(maxIter=3, standardization=False)
    ova = OneVsRest(classifier=lor, labelCol="abcd")

    param_map = get_params_to_log(ova)
    assert param_map["labelCol"] == "abcd"
    assert param_map["classifier"] == "LogisticRegression"
    assert param_map["LogisticRegression.maxIter"] == 3
    assert not param_map["LogisticRegression.standardization"]
    assert param_map["LogisticRegression.tol"] == lor.getOrDefault(lor.tol)

    mlflow.pyspark.ml.autolog()
    with mlflow.start_run() as run:
        ova.fit(dataset_binomial.withColumn("abcd", dataset_binomial.label))
        metadata = _gen_estimator_metadata(ova)
        estimator_info = load_json_artifact("estimator_info.json")
        assert metadata.hierarchy == estimator_info["hierarchy"]
    run_id = run.info.run_id
    run_data = get_run_data(run_id)
    assert run_data.params == truncate_param_dict(stringify_dict_values(get_params_to_log(ova)))