def test_numpy_integration(self): """ Test on a real dataset with NumPy arrays """ data = load_mushroom(return_dataset=True) X, y = data.to_numpy() X = OneHotEncoder().fit_transform(X).toarray() cv = StratifiedKFold(n_splits=4, random_state=32) oz = LearningCurve(GaussianNB(), cv=cv, random_state=23) oz.fit(X, y) oz.finalize() self.assert_images_similar(oz)
def test_numpy_integration(self): """ Test on mushroom dataset with NumPy arrays """ data = load_mushroom(return_dataset=True) X, y = data.to_numpy() X = OneHotEncoder().fit_transform(X).toarray() cv = StratifiedKFold(n_splits=2, random_state=11) oz = CVScores(BernoulliNB(), cv=cv) oz.fit(X, y) oz.finalize() self.assert_images_similar(oz, tol=2.0)
def test_numpy_integration(self): """ Test on mushroom dataset with NumPy arrays """ data = load_mushroom(return_dataset=True) X, y = data.to_numpy() X = OneHotEncoder().fit_transform(X).toarray() cv = StratifiedKFold(n_splits=2, random_state=11) pr = np.linspace(0.1, 3.0, 6) oz = ValidationCurve(BernoulliNB(), cv=cv, param_range=pr, param_name="alpha") oz.fit(X, y) oz.finalize() self.assert_images_similar(oz)
def test_pandas_integration(self): """ Test on a real dataset with pandas DataFrame and Series """ data = load_mushroom(return_dataset=True) X, y = data.to_pandas() X = pd.get_dummies(X) assert isinstance(X, pd.DataFrame) assert isinstance(y, pd.Series) cv = StratifiedKFold(n_splits=4, random_state=32) oz = LearningCurve(GaussianNB(), cv=cv, random_state=23) oz.fit(X, y) oz.finalize() self.assert_images_similar(oz)
def test_pandas_integration(self): """ Test on mushroom dataset with pandas DataFrame and Series and NB """ data = load_mushroom(return_dataset=True) X, y = data.to_pandas() X = pd.get_dummies(X) assert isinstance(X, pd.DataFrame) assert isinstance(y, pd.Series) cv = StratifiedKFold(n_splits=2, random_state=11) oz = CVScores(BernoulliNB(), cv=cv) oz.fit(X, y) oz.finalize() self.assert_images_similar(oz, tol=2.0)
def test_pandas_integration(self): """ Test on mushroom dataset with pandas DataFrame and Series and NB """ data = load_mushroom(return_dataset=True) X, y = data.to_pandas() X = pd.get_dummies(X) assert isinstance(X, pd.DataFrame) assert isinstance(y, pd.Series) cv = StratifiedKFold(n_splits=2, random_state=11) pr = np.linspace(0.1, 3.0, 6) oz = ValidationCurve(BernoulliNB(), cv=cv, param_range=pr, param_name="alpha") oz.fit(X, y) oz.finalize() self.assert_images_similar(oz)
def get_mushroom_data(): X, y = load_mushroom() labels = y.unique().tolist() return X, y, labels
} def visualize_model(X, y, estimator, path, **kwargs): """ Test various estimators. """ y = LabelEncoder().fit_transform(y) model = Pipeline([("one_hot_encoder", OneHotEncoder()), ("estimator", estimator)]) _, ax = plt.subplots() # Instantiate the classification model and visualizer visualizer = ClassificationReport(model, classes=["edible", "poisonous"], cmap="YlGn", size=(600, 360), ax=ax, **kwargs) visualizer.fit(X, y) visualizer.score(X, y) visualizer.poof(outpath=path) if __name__ == "__main__": X, y = load_mushroom() for clf in ESTIMATORS.values(): visualize_model(X, y, clf["model"], clf["path"])
# -*- coding: utf-8 -*- """ Spyder Editor This is a temporary script file. """ import matplotlib.pyplot as plt import pandas as pd from yellowbrick.datasets import load_mushroom import seaborn as sn import numpy as np dataset = load_mushroom(return_dataset=True) df = dataset.to_dataframe() df.head() X = df.drop(columns=['target']) y = df['target'] print('\nDataset Mushroom\n') # Preprocessing from sklearn.preprocessing import LabelEncoder, OneHotEncoder le = LabelEncoder() ohe = OneHotEncoder(handle_unknown='ignore') y_scale = le.fit_transform(y) X_scale = ohe.fit_transform(X) from yellowbrick.target import class_balance class_balance(y_scale, labels=['edible', 'poisonous'])