Beispiel #1
0
    def test_grid_search_precomputed_kernel(self):
        # Test that grid search works when the input features are given in the
        # form of a precomputed kernel matrix
        X_, y_ = make_classification(n_samples=200,
                                     n_features=100,
                                     random_state=0)

        # compute the training kernel matrix corresponding to the linear kernel
        K_train = np.dot(X_[:180], X_[:180].T)
        y_train = y_[:180]

        clf = SVC(kernel='precomputed')
        cv = ATGridSearchCV(clf, {'C': [0.1, 1.0]},
                            webserver_url=self.live_server_url)
        wait(cv.fit(K_train, y_train))

        assert_true(cv.best_score_ >= 0)

        # compute the test kernel matrix
        K_test = np.dot(X_[180:], X_[:180].T)
        y_test = y_[180:]

        y_pred = cv.predict(K_test)

        assert_true(np.mean(y_pred == y_test) >= 0)

        # test error is raised when the precomputed kernel is not array-like
        # or sparse
        assert_raises(ValueError, cv.fit, K_train.tolist(), y_train)
Beispiel #2
0
    def test_grid_search_sparse_scoring(self):
        X_, y_ = make_classification(n_samples=200,
                                     n_features=100,
                                     random_state=0)

        clf = LinearSVC()
        cv = ATGridSearchCV(clf, {'C': [0.1, 1.0]},
                            scoring="f1",
                            webserver_url=self.live_server_url)
        wait(cv.fit(X_[:180], y_[:180]))
        y_pred = cv.predict(X_[180:])
        C = cv.best_estimator_.C

        X_ = sp.csr_matrix(X_)
        clf = LinearSVC()
        cv = ATGridSearchCV(clf, {'C': [0.1, 1.0]},
                            scoring="f1",
                            webserver_url=self.live_server_url)
        wait(cv.fit(X_[:180], y_[:180]))
        y_pred2 = cv.predict(X_[180:])
        C2 = cv.best_estimator_.C

        assert_array_equal(y_pred, y_pred2)
        assert_equal(C, C2)

        # Smoke test the score
        # np.testing.assert_allclose(f1_score(cv.predict(X_[:180]), y[:180]),
        #                            cv.score(X_[:180], y[:180]))

        # test loss where greater is worse
        def f1_loss(y_true_, y_pred_):
            return -f1_score(y_true_, y_pred_)

        F1Loss = make_scorer(f1_loss, greater_is_better=False)
        cv = ATGridSearchCV(clf, {'C': [0.1, 1.0]},
                            scoring=F1Loss,
                            webserver_url=self.live_server_url)
        wait(cv.fit(X_[:180], y_[:180]))
        y_pred3 = cv.predict(X_[180:])
        C3 = cv.best_estimator_.C

        assert_equal(C, C3)
        assert_array_equal(y_pred, y_pred3)