def test_wines(self):
        df = load_wines_dataset()

        X = df.drop(['quality', 'color'], axis=1)
        X = X[['alcohol', 'volatile_acidity', 'density']]
        y = df['quality']
        color = df['color']

        X_train, X_test, y_train, y_test, color_train, color_test = train_test_split(
            X, y, color)

        model = SkBaseLearnerCategory("color",
                                      LogisticRegression(solver="liblinear"))
        new_x_train = pandas.concat([X_train, color_train], axis=1)
        model.fit(new_x_train, y_train)
        new_x_test = pandas.concat([X_test, color_test], axis=1)
        acc1 = accuracy_score(y_test, model.predict(new_x_test))

        try:
            self.assertEqualDataFrame(model.models['red'].coef_,
                                      model.models['white'].coef_)
            ok = False
        except AssertionError as e:
            ok = True
        self.assertTrue(ok)

        clr = LogisticRegression(solver="liblinear")
        clr.fit(X_train, y_train)
        acc2 = accuracy_score(y_test, clr.predict(X_test))
        self.assertGreater(acc1, 0.45)
        self.assertGreater(acc2, 0.45)
        self.assertGreater(acc1, acc2 * 0.99)
 def test_fit_cat_array(self):
     df = pandas.DataFrame(
         dict(y=[0, 1, 0, 1, 0, 1, 0, 1],
              X1=[0.5, 0.6, 0.52, 0.62, 0.5, 0.6, 0.51, 0.61],
              X2=[0.5, 0.6, 0.7, 0.5, 1.5, 1.6, 1.7, 1.8],
              cat=[9, 10, 9, 10, 9, 10, 9, 10]))
     X = df.drop('y', axis=1)
     y = df['y']
     model = SkBaseLearnerCategory('cat', DecisionTreeClassifier())
     model.fit(X, y)
     pred = model.predict(X)
     self.assertGreater(len(pred), 0)
     self.assertEqualArray(y, pred)
    def test_fit_predict(self):
        df = pandas.DataFrame(
            dict(y=[0, 1, 0, 1, 0, 1, 0, 1],
                 X1=[0.5, 0.6, 0.52, 0.62, 0.5, 0.6, 0.51, 0.61],
                 X2=[0.5, 0.6, 0.7, 0.5, 1.5, 1.6, 1.7, 1.8],
                 cat=[
                     'red', 'red', 'blue', 'blue', 'red', 'red', 'red', 'blue'
                 ]))
        X = df.drop('y', axis=1)
        y = df['y']
        model = SkBaseLearnerCategory('cat', DecisionTreeClassifier())
        model.fit(X, y)

        pred = model.predict(X)
        self.assertGreater(len(pred), 0)
        self.assertEqualArray(y, pred)

        pred = model.score(X, y)
        self.assertGreater(pred, 0)

        pred = model.predict_proba(X)
        self.assertGreater(len(pred), 0)
        self.assertEqualArray(y, pred[:, 1])
        self.assertRaise(lambda: model.decision_function(X),
                         NotImplementedError)
    def test_pickle(self):
        df = pandas.DataFrame(
            dict(y=[0, 1, 0, 1, 0, 1, 0, 1],
                 X1=[0.5, 0.6, 0.52, 0.62, 0.5, 0.6, 0.51, 0.61],
                 X2=[0.5, 0.6, 0.7, 0.5, 1.5, 1.6, 1.7, 1.8],
                 cat=[
                     'red', 'red', 'blue', 'blue', 'red', 'red', 'red', 'blue'
                 ]))
        X = df.drop('y', axis=1)
        y = df['y']
        model = SkBaseLearnerCategory('cat', DecisionTreeClassifier())
        model.fit(X, y)

        pred = model.predict(X)

        st = BytesIO()
        pickle.dump(model, st)
        st = BytesIO(st.getvalue())
        rec = pickle.load(st)
        pred2 = rec.predict(X)
        self.assertEqualArray(pred, pred2)
    def test_grid(self):
        df = pandas.DataFrame(
            dict(y=[0, 1, 0, 1, 0, 1, 0, 1],
                 X1=[0.5, 0.6, 0.52, 0.62, 0.5, 0.6, 0.51, 0.61],
                 X2=[0.5, 0.6, 0.7, 0.5, 1.5, 1.6, 1.7, 1.8],
                 cat=[
                     'red', 'red', 'blue', 'blue', 'red', 'red', 'red', 'blue'
                 ]))
        X = df.drop('y', axis=1)
        y = df['y']
        model = SkBaseLearnerCategory('cat', DecisionTreeClassifier())
        res = model.get_params(True)
        del res['model']
        for c in ['model__ccp_alpha', 'model__presort']:
            if c in res:
                del res[c]

        self.assertEqual(
            res, {
                'colnameind': 'cat',
                'model__class_weight': None,
                'model__criterion': 'gini',
                'model__max_depth': None,
                'model__max_features': None,
                'model__max_leaf_nodes': None,
                'model__min_impurity_decrease': 0.0,
                'model__min_impurity_split': None,
                'model__min_samples_leaf': 1,
                'model__min_samples_split': 2,
                'model__min_weight_fraction_leaf': 0.0,
                'model__random_state': None,
                'model__splitter': 'best'
            })

        parameters = {'model__max_depth': [2, 3]}
        clf = GridSearchCV(model, parameters, cv=3)
        clf.fit(X, y)

        pred = clf.predict(X)
        self.assertEqualArray(y, pred)