Ejemplo n.º 1
0
def test_baseline_binary_mode(data_type, make_data_type):
    X = pd.DataFrame({
        'one': [1, 2, 3, 4],
        'two': [2, 3, 4, 5],
        'three': [1, 2, 3, 4]
    })
    y = pd.Series([10, 11, 10, 10])
    X = make_data_type(data_type, X)
    y = make_data_type(data_type, y)

    clf = BaselineClassifier(strategy="mode")
    fitted = clf.fit(X, y)
    assert isinstance(fitted, BaselineClassifier)
    assert clf.classes_ == [10, 11]
    expected_predictions = pd.Series(np.array([10] * X.shape[0]),
                                     dtype="Int64")
    predictions = clf.predict(X)
    assert_series_equal(expected_predictions, predictions.to_series())

    predicted_proba = clf.predict_proba(X)
    assert predicted_proba.shape == (X.shape[0], 2)
    expected_predictions_proba = pd.DataFrame({
        10: [1., 1., 1., 1.],
        11: [0., 0., 0., 0.]
    })
    assert_frame_equal(expected_predictions_proba,
                       predicted_proba.to_dataframe())

    np.testing.assert_allclose(clf.feature_importance,
                               np.array([0.0] * X.shape[1]))
Ejemplo n.º 2
0
def test_baseline_binary_random_weighted(X_y_binary):
    X, y = X_y_binary
    values, counts = np.unique(y, return_counts=True)
    percent_freq = counts.astype(float) / len(y)
    assert percent_freq.sum() == 1.0

    clf = BaselineClassifier(strategy="random_weighted", random_state=0)
    clf.fit(X, y)

    assert clf.classes_ == [0, 1]
    expected_predictions = pd.Series(get_random_state(0).choice(
        np.unique(y), len(X), p=percent_freq),
                                     dtype="Int64")
    predictions = clf.predict(X)
    assert_series_equal(expected_predictions, predictions.to_series())

    predicted_proba = clf.predict_proba(X)
    assert predicted_proba.shape == (len(X), 2)
    expected_predictions_proba = pd.DataFrame(
        np.array([[percent_freq[i] for i in range(len(values))]] * len(X)))
    assert_frame_equal(expected_predictions_proba,
                       predicted_proba.to_dataframe())

    np.testing.assert_allclose(clf.feature_importance,
                               np.array([0.0] * X.shape[1]))
Ejemplo n.º 3
0
def test_baseline_multiclass_mode():
    X = pd.DataFrame({
        'one': [1, 2, 3, 4],
        'two': [2, 3, 4, 5],
        'three': [1, 2, 3, 4]
    })
    y = pd.Series([10, 12, 11, 11])
    clf = BaselineClassifier(strategy="mode")
    clf.fit(X, y)

    assert clf.classes_ == [10, 11, 12]
    predictions = clf.predict(X)
    expected_predictions = pd.Series([11] * len(X), dtype="Int64")
    assert_series_equal(expected_predictions, predictions.to_series())

    predicted_proba = clf.predict_proba(X)
    assert predicted_proba.shape == (len(X), 3)
    expected_predictions_proba = pd.DataFrame({
        10: [0., 0., 0., 0.],
        11: [1., 1., 1., 1.],
        12: [0., 0., 0., 0.]
    })
    assert_frame_equal(expected_predictions_proba,
                       predicted_proba.to_dataframe())

    np.testing.assert_allclose(clf.feature_importance,
                               np.array([0.0] * X.shape[1]))
def test_baseline_no_mode():
    X = pd.DataFrame([[1, 2, 3, 0, 1]])
    y = pd.Series([1, 0, 2, 0, 1])
    clf = BaselineClassifier()
    clf.fit(X, y)
    assert clf.classes_ == [0, 1, 2]
    np.testing.assert_allclose(clf.predict(X), np.array([0] * len(X)))
    predicted_proba = clf.predict_proba(X)
    assert predicted_proba.shape == (len(X), 3)
    np.testing.assert_allclose(
        predicted_proba,
        np.array([[1.0 if i == 0 else 0.0 for i in range(3)]] * len(X)))
    np.testing.assert_allclose(clf.feature_importance,
                               np.array([0.0] * X.shape[1]))
Ejemplo n.º 5
0
def test_baseline_multiclass_random(X_y_multi):
    X, y = X_y_multi
    values = np.unique(y)
    clf = BaselineClassifier(strategy="random", random_seed=0)
    clf.fit(X, y)

    assert clf.classes_ == [0, 1, 2]
    expected_predictions = pd.Series(get_random_state(0).choice(np.unique(y), len(X)), dtype="Int64")
    predictions = clf.predict(X)
    assert_series_equal(expected_predictions, predictions.to_series())

    predicted_proba = clf.predict_proba(X)
    assert predicted_proba.shape == (len(X), 3)
    assert_frame_equal(pd.DataFrame(np.array([[1. / 3 for i in range(len(values))]] * len(X))), predicted_proba.to_dataframe())
    np.testing.assert_allclose(clf.feature_importance, np.array([0.0] * X.shape[1]))
Ejemplo n.º 6
0
def test_baseline_no_mode():
    X = pd.DataFrame([[1, 2, 3, 0, 1]])
    y = pd.Series([1, 0, 2, 0, 1])
    clf = BaselineClassifier()
    clf.fit(X, y)

    assert clf.classes_ == [0, 1, 2]
    expected_predictions = pd.Series([0] * len(X), dtype="Int64")
    predictions = clf.predict(X)
    assert_series_equal(expected_predictions, predictions.to_series())

    predicted_proba = clf.predict_proba(X)
    assert predicted_proba.shape == (len(X), 3)
    assert_frame_equal(pd.DataFrame(np.array([[1.0 if i == 0 else 0.0 for i in range(3)]] * len(X))), predicted_proba.to_dataframe())

    np.testing.assert_allclose(clf.feature_importance, np.array([0.0] * X.shape[1]))
def test_baseline_binary_random(X_y_binary):
    X, y = X_y_binary
    values = np.unique(y)
    clf = BaselineClassifier(strategy="random", random_state=0)
    clf.fit(X, y)
    assert clf.classes_ == [0, 1]
    np.testing.assert_allclose(
        clf.predict(X),
        get_random_state(0).choice(np.unique(y), len(X)))
    predicted_proba = clf.predict_proba(X)
    assert predicted_proba.shape == (len(X), 2)
    np.testing.assert_allclose(
        predicted_proba,
        np.array([[0.5 for i in range(len(values))]] * len(X)))
    np.testing.assert_allclose(clf.feature_importance,
                               np.array([0.0] * X.shape[1]))
def test_baseline_binary_random_weighted(X_y_binary):
    X, y = X_y_binary
    values, counts = np.unique(y, return_counts=True)
    percent_freq = counts.astype(float) / len(y)
    assert percent_freq.sum() == 1.0
    clf = BaselineClassifier(strategy="random_weighted", random_state=0)
    clf.fit(X, y)
    assert clf.classes_ == [0, 1]
    np.testing.assert_allclose(
        clf.predict(X),
        get_random_state(0).choice(np.unique(y), len(X), p=percent_freq))
    predicted_proba = clf.predict_proba(X)
    assert predicted_proba.shape == (len(X), 2)
    np.testing.assert_allclose(
        predicted_proba,
        np.array([[percent_freq[i] for i in range(len(values))]] * len(X)))
    np.testing.assert_allclose(clf.feature_importance,
                               np.array([0.0] * X.shape[1]))
def test_baseline_binary_mode(data_type, X_y_binary):
    X = pd.DataFrame({
        'one': [1, 2, 3, 4],
        'two': [2, 3, 4, 5],
        'three': [1, 2, 3, 4]
    })
    y = pd.Series([10, 11, 10, 10])
    if data_type == 'ww':
        X = ww.DataTable(X)
        y = ww.DataColumn(y)
    clf = BaselineClassifier(strategy="mode")
    fitted = clf.fit(X, y)
    assert isinstance(fitted, BaselineClassifier)
    assert clf.classes_ == [10, 11]
    np.testing.assert_allclose(clf.predict(X), np.array([10] * X.shape[0]))
    predicted_proba = clf.predict_proba(X)
    assert predicted_proba.shape == (X.shape[0], 2)
    expected_predicted_proba = pd.DataFrame({
        10: [1., 1., 1., 1.],
        11: [0., 0., 0., 0.]
    })
    pd.testing.assert_frame_equal(expected_predicted_proba, predicted_proba)
    np.testing.assert_allclose(clf.feature_importance,
                               np.array([0.0] * X.shape[1]))