예제 #1
0
def test_easy_preprocessor_transform():
    titanic = load_titanic()
    titanic_clean = clean(titanic)
    X, y = titanic_clean.drop("survived", axis=1), titanic_clean.survived
    X_train, X_val, y_train, y_val = train_test_split(X, y, stratify=y,
                                                      random_state=42)
    pipe = make_pipeline(EasyPreprocessor(), LogisticRegression(C=0.1))
    pipe.fit(X_train, y_train)
    pipe.predict(X_train)
    pipe.predict(X_val)
예제 #2
0
def test_loading():
    # smoke test
    ames = load_ames()
    assert ames.shape == (2930, 82)
    titanic = load_titanic()
    assert titanic.shape == (1309, 14)
    adult = load_adult()
    assert adult.shape == (32561, 15)

    pd.read_csv(data_path("titanic.csv"))
예제 #3
0
def test_explain_smoke_titanic():
    titanic = load_titanic()
    titanic_clean = clean(titanic)
    sc = SimpleClassifier().fit(titanic_clean, target_col='survived')
    explain(sc)
    X, y = titanic_clean.drop("survived", axis=1), titanic_clean.survived
    ep = EasyPreprocessor()
    preprocessed = ep.fit_transform(X)
    tree = DecisionTreeClassifier().fit(preprocessed, y)
    explain(tree, feature_names=ep.get_feature_names())
    pipe = make_pipeline(EasyPreprocessor(), LogisticRegression())
    pipe.fit(X, y)
    explain(pipe, feature_names=pipe[0].get_feature_names())
예제 #4
0
def test_explain_titanic_val(model):
    # add multi-class
    # add regression
    titanic = load_titanic()
    titanic_clean = clean(titanic)
    X, y = titanic_clean.drop("survived", axis=1), titanic_clean.survived
    X_train, X_val, y_train, y_val = train_test_split(X, y, stratify=y,
                                                      random_state=42)
    pipe = make_pipeline(EasyPreprocessor(), model)
    pipe.fit(X_train, y_train)
    # without validation set
    explain(pipe, feature_names=X.columns)
    # with validation set
    explain(pipe, X_val, y_val, feature_names=X.columns)
예제 #5
0
def test_dirty_float_target_regression():
    titanic_data = load_titanic()
    data = pd.DataFrame({'one': np.repeat(np.arange(50), 2)})
    dirty = make_dirty_float()
    data['target'] = dirty
    with pytest.warns(UserWarning, match="Discarding dirty_float targets that "
                                         "cannot be converted to float."):
        clean(data, target_col="target")
    with pytest.warns(UserWarning, match="Discarding dirty_float targets that "
                                         "cannot be converted to float."):
        plot(data, target_col="target")

    # check if works for non dirty_float targets
    plot(titanic_data, 'survived')
예제 #6
0
파일: test_models.py 프로젝트: pdhinwa/dabl
def test_any_classifier_titanic(monkeypatch):
    monkeypatch.setattr(AnyClassifier, '_get_estimators',
                        mock_get_estimators_logreg)
    titanic = load_titanic()
    ac = AnyClassifier()
    ac.fit(titanic, target_col='survived')
예제 #7
0
파일: test_models.py 프로젝트: pdhinwa/dabl
def test_simple_classifier_titanic():
    titanic = load_titanic()
    ec = SimpleClassifier()
    ec.fit(titanic, target_col='survived')
    ec.predict(titanic.drop('survived', axis=1))
예제 #8
0
def test_simple_classifier_titanic():
    titanic = load_titanic()[::10]
    ec = SimpleClassifier()
    ec.fit(titanic, target_col='survived')
예제 #9
0
파일: plot_mosaic.py 프로젝트: thecobb/dabl
"""
Mosaic Plot Example
==========================================
"""
import matplotlib.pyplot as plt
from dabl.datasets import load_titanic
from dabl.plot import mosaic_plot

data = load_titanic()

# Mosaic plot for frequencies of Titanic passengers grouped
# by gender and survival status

mosaic_plot(data, 'sex', 'survived')
plt.show()