def test_easy_preprocessor_transform(): titanic = load_titanic() titanic_clean = clean(titanic) X, y = titanic_clean.drop("survived", axis=1), titanic_clean.survived X_train, X_val, y_train, y_val = train_test_split(X, y, stratify=y, random_state=42) pipe = make_pipeline(EasyPreprocessor(), LogisticRegression(C=0.1)) pipe.fit(X_train, y_train) pipe.predict(X_train) pipe.predict(X_val)
def test_loading(): # smoke test ames = load_ames() assert ames.shape == (2930, 82) titanic = load_titanic() assert titanic.shape == (1309, 14) adult = load_adult() assert adult.shape == (32561, 15) pd.read_csv(data_path("titanic.csv"))
def test_explain_smoke_titanic(): titanic = load_titanic() titanic_clean = clean(titanic) sc = SimpleClassifier().fit(titanic_clean, target_col='survived') explain(sc) X, y = titanic_clean.drop("survived", axis=1), titanic_clean.survived ep = EasyPreprocessor() preprocessed = ep.fit_transform(X) tree = DecisionTreeClassifier().fit(preprocessed, y) explain(tree, feature_names=ep.get_feature_names()) pipe = make_pipeline(EasyPreprocessor(), LogisticRegression()) pipe.fit(X, y) explain(pipe, feature_names=pipe[0].get_feature_names())
def test_explain_titanic_val(model): # add multi-class # add regression titanic = load_titanic() titanic_clean = clean(titanic) X, y = titanic_clean.drop("survived", axis=1), titanic_clean.survived X_train, X_val, y_train, y_val = train_test_split(X, y, stratify=y, random_state=42) pipe = make_pipeline(EasyPreprocessor(), model) pipe.fit(X_train, y_train) # without validation set explain(pipe, feature_names=X.columns) # with validation set explain(pipe, X_val, y_val, feature_names=X.columns)
def test_dirty_float_target_regression(): titanic_data = load_titanic() data = pd.DataFrame({'one': np.repeat(np.arange(50), 2)}) dirty = make_dirty_float() data['target'] = dirty with pytest.warns(UserWarning, match="Discarding dirty_float targets that " "cannot be converted to float."): clean(data, target_col="target") with pytest.warns(UserWarning, match="Discarding dirty_float targets that " "cannot be converted to float."): plot(data, target_col="target") # check if works for non dirty_float targets plot(titanic_data, 'survived')
def test_any_classifier_titanic(monkeypatch): monkeypatch.setattr(AnyClassifier, '_get_estimators', mock_get_estimators_logreg) titanic = load_titanic() ac = AnyClassifier() ac.fit(titanic, target_col='survived')
def test_simple_classifier_titanic(): titanic = load_titanic() ec = SimpleClassifier() ec.fit(titanic, target_col='survived') ec.predict(titanic.drop('survived', axis=1))
def test_simple_classifier_titanic(): titanic = load_titanic()[::10] ec = SimpleClassifier() ec.fit(titanic, target_col='survived')
""" Mosaic Plot Example ========================================== """ import matplotlib.pyplot as plt from dabl.datasets import load_titanic from dabl.plot import mosaic_plot data = load_titanic() # Mosaic plot for frequencies of Titanic passengers grouped # by gender and survival status mosaic_plot(data, 'sex', 'survived') plt.show()