def test_wines(self):
        df = load_wines_dataset()

        X = df.drop(['quality', 'color'], axis=1)
        X = X[['alcohol', 'volatile_acidity', 'density']]
        y = df['quality']
        color = df['color']

        X_train, X_test, y_train, y_test, color_train, color_test = train_test_split(
            X, y, color)

        model = SkBaseLearnerCategory("color",
                                      LogisticRegression(solver="liblinear"))
        new_x_train = pandas.concat([X_train, color_train], axis=1)
        model.fit(new_x_train, y_train)
        new_x_test = pandas.concat([X_test, color_test], axis=1)
        acc1 = accuracy_score(y_test, model.predict(new_x_test))

        try:
            self.assertEqualDataFrame(model.models['red'].coef_,
                                      model.models['white'].coef_)
            ok = False
        except AssertionError as e:
            ok = True
        self.assertTrue(ok)

        clr = LogisticRegression(solver="liblinear")
        clr.fit(X_train, y_train)
        acc2 = accuracy_score(y_test, clr.predict(X_test))
        self.assertGreater(acc1, 0.45)
        self.assertGreater(acc2, 0.45)
        self.assertGreater(acc1, acc2 * 0.99)
Example #2
0
recense tous les prétraitements que la librairie
:epkg:`scikit-learn` implémente.


.. contents::
    :local:
"""

#########################################
# Un jeu de données

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import Normalizer
from sklearn.preprocessing import normalize
from papierstat.datasets import load_wines_dataset
df = load_wines_dataset()
X = df.drop(['quality', 'color'], axis=1)
y = df['quality']

print(X.head())

#########################################
# Normalisation naïve
# -------------------
X_norm = normalize(X)
print(X_norm[:5])

#########################################
# Normalisation supervisée
# ------------------------
#
Example #3
0
 def test_wines_local(self):
     df = load_wines_dataset(download=False, shuffle=True)
     self.assertEqual(df.shape, (6497, 13))