Exemple #1
0
    SVC(probability=True),
    KNeighborsClassifier()
]

# Define some combination methods to be compared
classifiers = {
    'Logistic Regression':
    LogisticRegression(),
    'Gaussian NB':
    GaussianNB(),
    'Support Vector Machine':
    SVC(probability=True),
    'k Nearst Neighbors':
    KNeighborsClassifier(),
    'Simple Average':
    SimpleClassifierAggregator(base_estimators=classifiers, method='average'),
    'Simple Maximization':
    SimpleClassifierAggregator(base_estimators=classifiers,
                               method='maximization'),
    'Stacking':
    Stacking(base_estimators=classifiers, shuffle_data=True),
    'Stacking_RF':
    Stacking(base_estimators=classifiers,
             shuffle_data=True,
             meta_clf=RandomForestClassifier(random_state=random_state))
}

# Show all classifiers
for i, clf in enumerate(classifiers.keys()):
    print('Model', i + 1, clf)
Exemple #2
0
class TestAverage(unittest.TestCase):
    def setUp(self):
        self.roc_floor = 0.9
        self.accuracy_floor = 0.9

        random_state = 42
        X, y = load_breast_cancer(return_X_y=True)

        self.X_train, self.X_test, self.y_train, self.y_test = \
            train_test_split(X, y, test_size=0.4, random_state=random_state)

        classifiers = [DecisionTreeClassifier(random_state=random_state),
                       LogisticRegression(random_state=random_state),
                       KNeighborsClassifier(),
                       RandomForestClassifier(random_state=random_state),
                       GradientBoostingClassifier(random_state=random_state)]

        self.clf = SimpleClassifierAggregator(classifiers, method='average')
        self.clf.fit(self.X_train, self.y_train)

    def test_parameters(self):
        assert_true(hasattr(self.clf, 'base_estimators') and
                    self.clf.base_estimators is not None)

    def test_train_scores(self):
        y_train_predicted = self.clf.predict(self.X_train)
        assert_equal(len(y_train_predicted), self.X_train.shape[0])

        # check performance
        assert_greater(accuracy_score(self.y_train, y_train_predicted),
                       self.accuracy_floor)

    def test_prediction_scores(self):
        y_test_predicted = self.clf.predict(self.X_test)
        assert_equal(len(y_test_predicted), self.X_test.shape[0])

        # check performance
        assert_greater(accuracy_score(self.y_test, y_test_predicted),
                       self.accuracy_floor)

        # test utility function
        evaluate_print('averaging', self.y_test, y_test_predicted)

    def test_prediction_proba(self):
        y_test_predicted = self.clf.predict_proba(self.X_test)
        assert_greater_equal(y_test_predicted.min(), 0)
        assert_less_equal(y_test_predicted.max(), 1)

        # check performance
        assert_greater(roc_auc_score(self.y_test, y_test_predicted[:, 1]),
                       self.roc_floor)

        # check shape of integrity
        n_classes = len(np.unique(self.y_train))
        assert_equal(y_test_predicted.shape, (self.X_test.shape[0], n_classes))

        # check probability sum is 1
        y_test_predicted_sum = np.sum(y_test_predicted, axis=1)
        assert_allclose(np.ones([self.X_test.shape[0], ]),
                        y_test_predicted_sum)

    def tearDown(self):
        pass
class TestWeightedAverage(unittest.TestCase):
    def setUp(self):
        self.roc_floor = 0.9
        self.accuracy_floor = 0.9

        random_state = 42
        X, y = load_breast_cancer(return_X_y=True)

        self.X_train, self.X_test, self.y_train, self.y_test = \
            train_test_split(X, y, test_size=0.4, random_state=random_state)

        clf_weights = np.array([0.1, 0.4, 0.1, 0.2, 0.2])

        classifiers = [
            DecisionTreeClassifier(random_state=random_state),
            LogisticRegression(random_state=random_state),
            KNeighborsClassifier(),
            RandomForestClassifier(random_state=random_state),
            GradientBoostingClassifier(random_state=random_state)
        ]

        self.clf = SimpleClassifierAggregator(classifiers,
                                              method='average',
                                              weights=clf_weights)

        self.clf.fit(self.X_train, self.y_train)

    def test_weights(self):
        assert_equal(np.sum(self.clf.weights), self.clf.n_base_estimators_)

    def test_parameters(self):
        assert (hasattr(self.clf, 'base_estimators')
                and self.clf.base_estimators is not None)

        # print clf details
        print(self.clf)

        # set parameters
        self.clf.set_params()

    def test_train_scores(self):
        y_train_predicted = self.clf.predict(self.X_train)
        assert_equal(len(y_train_predicted), self.X_train.shape[0])

        # check performance
        assert (accuracy_score(self.y_train, y_train_predicted) >=
                self.accuracy_floor)

    def test_prediction_scores(self):
        y_test_predicted = self.clf.predict(self.X_test)
        assert_equal(len(y_test_predicted), self.X_test.shape[0])

        # check performance
        assert (accuracy_score(self.y_test, y_test_predicted) >=
                self.accuracy_floor)

    def test_prediction_proba(self):
        y_test_predicted = self.clf.predict_proba(self.X_test)
        assert (y_test_predicted.min() >= 0)
        assert (y_test_predicted.max() <= 1)

        # check performance
        assert (roc_auc_score(self.y_test, y_test_predicted[:, 1]) >=
                self.roc_floor)

        # check shape of integrity
        n_classes = len(np.unique(self.y_train))
        assert_equal(y_test_predicted.shape, (self.X_test.shape[0], n_classes))

        # check probability sum is 1
        y_test_predicted_sum = np.sum(y_test_predicted, axis=1)
        assert_allclose(np.ones([
            self.X_test.shape[0],
        ]), y_test_predicted_sum)

    def tearDown(self):
        pass
    clf.fit(X_train, y_train)
    evaluate_print('Random Forest        |', y_test, clf.predict(X_test))

    print()

    # initialize a group of classifiers
    classifiers = [
        DecisionTreeClassifier(random_state=random_state),
        LogisticRegression(random_state=random_state),
        KNeighborsClassifier(),
        RandomForestClassifier(random_state=random_state),
        GradientBoostingClassifier(random_state=random_state)
    ]

    # combine by averaging
    clf = SimpleClassifierAggregator(classifiers, method='average')
    clf.fit(X_train, y_train)
    y_test_predicted = clf.predict(X_test)
    evaluate_print('Combination by avg   |', y_test, y_test_predicted)

    # combine by weighted averaging
    clf_weights = np.array([0.1, 0.4, 0.1, 0.2, 0.2])
    clf = SimpleClassifierAggregator(classifiers,
                                     method='average',
                                     weights=clf_weights)
    clf.fit(X_train, y_train)
    y_test_predicted = clf.predict(X_test)
    evaluate_print('Combination by w_avg |', y_test, y_test_predicted)

    # combine by maximization
    clf = SimpleClassifierAggregator(classifiers, method='maximization')
                               clf.predict_proba(X_test)[:, 1]),
                 decimals=4))

    print()

    # initialize a group of classifiers
    classifiers = [
        DecisionTreeClassifier(random_state=random_state),
        LogisticRegression(random_state=random_state),
        KNeighborsClassifier(),
        RandomForestClassifier(random_state=random_state),
        GradientBoostingClassifier(random_state=random_state)
    ]

    # combine by averaging
    clf = SimpleClassifierAggregator(classifiers, method='average')
    clf.fit(X_train, y_train)
    y_test_predicted = clf.predict_proba(X_test)
    print(
        'Combination by avg  |',
        np.round(roc_auc_score(y_test,
                               clf.predict_proba(X_test)[:, 1]),
                 decimals=4))

    # combine by weighted averaging
    clf_weights = np.array([0.1, 0.4, 0.1, 0.2, 0.2])
    clf = SimpleClassifierAggregator(classifiers, method='average')
    clf.fit(X_train, y_train)
    y_test_predicted = clf.predict_proba(X_test)
    print(
        'Combination by w_avg|',