def test_CCA_PLSCannonical(self, algo):
        n = 500

        with tm.RNGContext(1):
            # 2 latents vars:
            l1 = np.random.normal(size=n)
            l2 = np.random.normal(size=n)

            latents = np.array([l1, l1, l2, l2]).T
            X = latents + np.random.normal(size=4 * n).reshape((n, 4))
            Y = latents + np.random.normal(size=4 * n).reshape((n, 4))

        X_train = X[:n // 2]
        Y_train = Y[:n // 2]
        X_test = X[n // 2:]
        Y_test = Y[n // 2:]

        train = pdml.ModelFrame(X_train, target=Y_train)
        test = pdml.ModelFrame(X_test, target=Y_test)

        # check multi target columns
        self.assertTrue(train.has_target())
        tm.assert_numpy_array_equal(train.data.values, X_train)
        tm.assert_numpy_array_equal(train.target.values, Y_train)
        tm.assert_numpy_array_equal(test.data.values, X_test)
        tm.assert_numpy_array_equal(test.target.values, Y_test)
        expected = pd.MultiIndex.from_tuples([('.target', 0), ('.target', 1),
                                              ('.target', 2), ('.target', 3)])
        tm.assert_index_equal(train.target_name, expected)
        self.assertEqual(train.data.shape, X_train.shape)
        self.assertEqual(train.target.shape, Y_train.shape)

        mod1 = getattr(train.cross_decomposition, algo)(n_components=2)
        mod2 = getattr(cd, algo)(n_components=2)

        train.fit(mod1)
        mod2.fit(X_train, Y_train)

        # 2nd cols are different on travis-CI
        self.assert_numpy_array_almost_equal(mod1.x_weights_[:, 0],
                                             mod2.x_weights_[:, 0])
        self.assert_numpy_array_almost_equal(mod1.y_weights_[:, 0],
                                             mod2.y_weights_[:, 0])

        result_tr = train.transform(mod1)
        result_test = test.transform(mod1)

        expected_tr = mod2.transform(X_train, Y_train)
        expected_test = mod2.transform(X_test, Y_test)

        self.assertIsInstance(result_tr, pdml.ModelFrame)
        self.assertIsInstance(result_test, pdml.ModelFrame)
        self.assert_numpy_array_almost_equal(result_tr.data.values[:, 0],
                                             expected_tr[0][:, 0])
        self.assert_numpy_array_almost_equal(result_tr.target.values[:, 0],
                                             expected_tr[1][:, 0])
        self.assert_numpy_array_almost_equal(result_test.data.values[:, 0],
                                             expected_test[0][:, 0])
        self.assert_numpy_array_almost_equal(result_test.target.values[:, 0],
                                             expected_test[1][:, 0])
예제 #2
0
    def test_grid_search(self):
        tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1e-3, 1e-4],
                             'C': [1, 10, 100]},
                            {'kernel': ['linear'], 'C': [1, 10, 100]}]

        df = pdml.ModelFrame(datasets.load_digits())
        cv = df.model_selection.GridSearchCV(df.svm.SVC(C=1), tuned_parameters, cv=5)

        with tm.RNGContext(1):
            df.fit(cv)

        result = df.model_selection.describe(cv)
        expected = pd.DataFrame(cv.cv_results_)
        self.assertIsInstance(result, pdml.ModelFrame)
        tm.assert_frame_equal(result, expected)
예제 #3
0
    def test_grid_search(self):
        tuned_parameters = [{'max_depth': [3, 4], 'n_estimators': [50, 100]}]

        df = pdml.ModelFrame(datasets.load_digits())
        cv = df.model_selection.GridSearchCV(df.xgb.XGBClassifier(),
                                             tuned_parameters,
                                             cv=5)

        with tm.RNGContext(1):
            df.fit(cv)

        result = df.model_selection.describe(cv)
        expected = pd.DataFrame(cv.cv_results_)
        self.assertIsInstance(result, pdml.ModelFrame)
        tm.assert_frame_equal(result, expected)
예제 #4
0
    def test_grid_search(self):
        tuned_parameters = [{'max_depth': [3, 4],
                             'n_estimators': [50, 100]}]

        df = pdml.ModelFrame(datasets.load_digits())
        cv = df.grid_search.GridSearchCV(df.xgb.XGBClassifier(), tuned_parameters, cv=5)

        with tm.RNGContext(1):
            df.fit(cv)

        result = df.grid_search.describe(cv)
        expected = pd.DataFrame({'mean': [0.89705064, 0.91764051, 0.91263216, 0.91930996],
                                 'std': [0.03244061, 0.03259985, 0.02764891, 0.0266436],
                                 'max_depth': [3, 3, 4, 4],
                                 'n_estimators': [50, 100, 50, 100]},
                                columns=['mean', 'std', 'max_depth', 'n_estimators'])
        self.assertIsInstance(result, pdml.ModelFrame)
        self.assert_frame_equal(result, expected)
예제 #5
0
    def test_grid_search(self):
        tuned_parameters = [{
            'kernel': ['rbf'],
            'gamma': [1e-3, 1e-4],
            'C': [1, 10, 100]
        }, {
            'kernel': ['linear'],
            'C': [1, 10, 100]
        }]

        df = pdml.ModelFrame(datasets.load_digits())
        cv = df.grid_search.GridSearchCV(df.svm.SVC(C=1),
                                         tuned_parameters,
                                         cv=5)

        with tm.RNGContext(1):
            df.fit(cv)

        result = df.grid_search.describe(cv)
        expected = pd.DataFrame(
            {
                'mean': [
                    0.97161937, 0.9476906, 0.97273233, 0.95937674, 0.97273233,
                    0.96271564, 0.94936004, 0.94936004, 0.94936004
                ],
                'std': [
                    0.01546977, 0.0221161, 0.01406514, 0.02295168, 0.01406514,
                    0.01779749, 0.01911084, 0.01911084, 0.01911084
                ],
                'C': [1, 1, 10, 10, 100, 100, 1, 10, 100],
                'gamma': [
                    0.001, 0.0001, 0.001, 0.0001, 0.001, 0.0001, np.nan,
                    np.nan, np.nan
                ],
                'kernel': ['rbf'] * 6 + ['linear'] * 3
            },
            columns=['mean', 'std', 'C', 'gamma', 'kernel'])
        self.assertTrue(isinstance(result, pdml.ModelFrame))
        self.assert_frame_equal(result, expected)