def test_wines(self): df = load_wines_dataset() X = df.drop(['quality', 'color'], axis=1) X = X[['alcohol', 'volatile_acidity', 'density']] y = df['quality'] color = df['color'] X_train, X_test, y_train, y_test, color_train, color_test = train_test_split( X, y, color) model = SkBaseLearnerCategory("color", LogisticRegression(solver="liblinear")) new_x_train = pandas.concat([X_train, color_train], axis=1) model.fit(new_x_train, y_train) new_x_test = pandas.concat([X_test, color_test], axis=1) acc1 = accuracy_score(y_test, model.predict(new_x_test)) try: self.assertEqualDataFrame(model.models['red'].coef_, model.models['white'].coef_) ok = False except AssertionError as e: ok = True self.assertTrue(ok) clr = LogisticRegression(solver="liblinear") clr.fit(X_train, y_train) acc2 = accuracy_score(y_test, clr.predict(X_test)) self.assertGreater(acc1, 0.45) self.assertGreater(acc2, 0.45) self.assertGreater(acc1, acc2 * 0.99)
recense tous les prétraitements que la librairie :epkg:`scikit-learn` implémente. .. contents:: :local: """ ######################################### # Un jeu de données from sklearn.model_selection import train_test_split from sklearn.preprocessing import Normalizer from sklearn.preprocessing import normalize from papierstat.datasets import load_wines_dataset df = load_wines_dataset() X = df.drop(['quality', 'color'], axis=1) y = df['quality'] print(X.head()) ######################################### # Normalisation naïve # ------------------- X_norm = normalize(X) print(X_norm[:5]) ######################################### # Normalisation supervisée # ------------------------ #
def test_wines_local(self): df = load_wines_dataset(download=False, shuffle=True) self.assertEqual(df.shape, (6497, 13))