Beispiel #1
0
def test_cbm_init():
    with pytest.raises(
            ValueError,
            match="Parameters to CostBenefitMatrix must all be numeric values."
    ):
        CostBenefitMatrix(true_positive=None,
                          true_negative=-1,
                          false_positive=-7,
                          false_negative=-2)
    with pytest.raises(
            ValueError,
            match="Parameters to CostBenefitMatrix must all be numeric values."
    ):
        CostBenefitMatrix(true_positive=1,
                          true_negative=-1,
                          false_positive=None,
                          false_negative=-2)
    with pytest.raises(
            ValueError,
            match="Parameters to CostBenefitMatrix must all be numeric values."
    ):
        CostBenefitMatrix(true_positive=1,
                          true_negative=None,
                          false_positive=-7,
                          false_negative=-2)
    with pytest.raises(
            ValueError,
            match="Parameters to CostBenefitMatrix must all be numeric values."
    ):
        CostBenefitMatrix(true_positive=3,
                          true_negative=-1,
                          false_positive=-7,
                          false_negative=None)
Beispiel #2
0
def test_cbm_objective_function_floats():
    y_true = pd.Series([0, 0, 0, 1, 1, 1, 1, 1, 1, 1])
    y_predicted = pd.Series([0, 0, 1, 0, 0, 0, 0, 1, 1, 1])
    cbm = CostBenefitMatrix(true_positive=5.1,
                            true_negative=-1.2,
                            false_positive=-6.7,
                            false_negative=-0.1)
    assert np.isclose(cbm.objective_function(y_true, y_predicted),
                      ((3 * 5.1) + (-1.2 * 2) + (1 * -6.7) + (4 * -0.1)) / 10)
Beispiel #3
0
def test_cbm_zero_input_lengths():
    cbm = CostBenefitMatrix(true_positive=10,
                            true_negative=-1,
                            false_positive=-7,
                            false_negative=-2)
    y_predicted = pd.Series([])
    y_true = pd.Series([])
    with pytest.raises(ValueError, match="Length of inputs is 0"):
        cbm.score(y_true, y_predicted)
Beispiel #4
0
def test_cbm_objective_function(data_type, make_data_type):
    y_true = pd.Series([0, 0, 0, 1, 1, 1, 1, 1, 1, 1])
    y_predicted = pd.Series([0, 0, 1, 0, 0, 0, 0, 1, 1, 1])
    y_true = make_data_type(data_type, y_true)
    y_predicted = make_data_type(data_type, y_predicted)
    cbm = CostBenefitMatrix(true_positive=10,
                            true_negative=-1,
                            false_positive=-7,
                            false_negative=-2)
    assert np.isclose(cbm.objective_function(y_true, y_predicted),
                      ((3 * 10) + (-1 * 2) + (1 * -7) + (4 * -2)) / 10)
Beispiel #5
0
def test_get_objective_return_instance_does_not_work_for_some_objectives():

    with pytest.raises(
            ObjectiveCreationError,
            match=
            "In get_objective, cannot pass in return_instance=True for Cost Benefit Matrix"
    ):
        get_objective("Cost Benefit Matrix", return_instance=True)

    cbm = CostBenefitMatrix(0, 0, 0, 0)
    assert get_objective(cbm) == cbm
def test_calculate_percent_difference_negative_and_equal_numbers():

    assert CostBenefitMatrix.calculate_percent_difference(
        score=5, baseline_score=5) == 0

    assert CostBenefitMatrix.calculate_percent_difference(
        score=-5, baseline_score=-10) == 50
    assert CostBenefitMatrix.calculate_percent_difference(
        score=-10, baseline_score=-5) == -100
    assert CostBenefitMatrix.calculate_percent_difference(
        score=-5, baseline_score=10) == -150
    assert CostBenefitMatrix.calculate_percent_difference(
        score=10, baseline_score=-5) == 300

    # These values are not possible for LogLossBinary but we need them for 100% coverage
    # We might add an objective where lower is better that can take negative values in the future
    assert LogLossBinary.calculate_percent_difference(
        score=-5, baseline_score=-10) == -50
    assert LogLossBinary.calculate_percent_difference(score=-10,
                                                      baseline_score=-5) == 100
    assert LogLossBinary.calculate_percent_difference(score=-5,
                                                      baseline_score=10) == 150
    assert LogLossBinary.calculate_percent_difference(
        score=10, baseline_score=-5) == -300
Beispiel #7
0
def test_cbm_different_input_lengths():
    cbm = CostBenefitMatrix(true_positive=10,
                            true_negative=-1,
                            false_positive=-7,
                            false_negative=-2)
    y_predicted = pd.Series([0, 0])
    y_true = pd.Series([1])
    with pytest.raises(ValueError, match="Inputs have mismatched dimensions"):
        cbm.score(y_true, y_predicted)

    y_true = pd.Series([0, 0])
    y_predicted = pd.Series([1, 2, 0])
    with pytest.raises(ValueError, match="Inputs have mismatched dimensions"):
        cbm.score(y_true, y_predicted)
Beispiel #8
0
def test_cbm_objective_automl(optimize_thresholds, X_y_binary):
    X, y = X_y_binary
    cbm = CostBenefitMatrix(true_positive=10,
                            true_negative=-1,
                            false_positive=-7,
                            false_negative=-2)
    automl = AutoMLSearch(X_train=X,
                          y_train=y,
                          problem_type='binary',
                          objective=cbm,
                          max_iterations=2,
                          optimize_thresholds=optimize_thresholds)
    automl.search()

    pipeline = automl.best_pipeline
    pipeline.fit(X, y)
    predictions = pipeline.predict(X, cbm)
    assert not np.isnan(predictions.to_series()).values.any()
    assert not np.isnan(pipeline.predict_proba(X).to_dataframe()).values.any()
    assert not np.isnan(pipeline.score(X, y, [cbm])['Cost Benefit Matrix'])
Beispiel #9
0
def test_cbm_input_contains_nan(X_y_binary):
    y_predicted = pd.Series([np.nan, 0, 0])
    y_true = pd.Series([1, 2, 1])
    cbm = CostBenefitMatrix(true_positive=10,
                            true_negative=-1,
                            false_positive=-7,
                            false_negative=-2)
    with pytest.raises(ValueError,
                       match="y_predicted contains NaN or infinity"):
        cbm.score(y_true, y_predicted)

    y_true = pd.Series([np.nan, 0, 0])
    y_predicted = pd.Series([1, 2, 0])
    with pytest.raises(ValueError, match="y_true contains NaN or infinity"):
        cbm.score(y_true, y_predicted)
Beispiel #10
0
def test_cbm_input_contains_inf(capsys):
    cbm = CostBenefitMatrix(true_positive=10,
                            true_negative=-1,
                            false_positive=-7,
                            false_negative=-2)
    y_predicted = np.array([np.inf, 0, 0])
    y_true = np.array([1, 0, 0])
    with pytest.raises(ValueError,
                       match="y_predicted contains NaN or infinity"):
        cbm.score(y_true, y_predicted)

    y_true = pd.Series([np.inf, 0, 0])
    y_predicted = pd.Series([1, 0, 0])
    with pytest.raises(ValueError, match="y_true contains NaN or infinity"):
        cbm.score(y_true, y_predicted)
Beispiel #11
0
def test_cbm_binary_more_than_two_unique_values():
    cbm = CostBenefitMatrix(true_positive=10,
                            true_negative=-1,
                            false_positive=-7,
                            false_negative=-2)
    y_predicted = pd.Series([0, 1, 2])
    y_true = pd.Series([1, 0, 1])
    with pytest.raises(
            ValueError,
            match="y_predicted contains more than two unique values"):
        cbm.score(y_true, y_predicted)

    y_true = pd.Series([0, 1, 2])
    y_predicted = pd.Series([1, 0, 1])
    with pytest.raises(ValueError,
                       match="y_true contains more than two unique values"):
        cbm.score(y_true, y_predicted)