def test_CCA_PLSCannonical(self, algo): n = 500 with tm.RNGContext(1): # 2 latents vars: l1 = np.random.normal(size=n) l2 = np.random.normal(size=n) latents = np.array([l1, l1, l2, l2]).T X = latents + np.random.normal(size=4 * n).reshape((n, 4)) Y = latents + np.random.normal(size=4 * n).reshape((n, 4)) X_train = X[:n // 2] Y_train = Y[:n // 2] X_test = X[n // 2:] Y_test = Y[n // 2:] train = pdml.ModelFrame(X_train, target=Y_train) test = pdml.ModelFrame(X_test, target=Y_test) # check multi target columns self.assertTrue(train.has_target()) tm.assert_numpy_array_equal(train.data.values, X_train) tm.assert_numpy_array_equal(train.target.values, Y_train) tm.assert_numpy_array_equal(test.data.values, X_test) tm.assert_numpy_array_equal(test.target.values, Y_test) expected = pd.MultiIndex.from_tuples([('.target', 0), ('.target', 1), ('.target', 2), ('.target', 3)]) tm.assert_index_equal(train.target_name, expected) self.assertEqual(train.data.shape, X_train.shape) self.assertEqual(train.target.shape, Y_train.shape) mod1 = getattr(train.cross_decomposition, algo)(n_components=2) mod2 = getattr(cd, algo)(n_components=2) train.fit(mod1) mod2.fit(X_train, Y_train) # 2nd cols are different on travis-CI self.assert_numpy_array_almost_equal(mod1.x_weights_[:, 0], mod2.x_weights_[:, 0]) self.assert_numpy_array_almost_equal(mod1.y_weights_[:, 0], mod2.y_weights_[:, 0]) result_tr = train.transform(mod1) result_test = test.transform(mod1) expected_tr = mod2.transform(X_train, Y_train) expected_test = mod2.transform(X_test, Y_test) self.assertIsInstance(result_tr, pdml.ModelFrame) self.assertIsInstance(result_test, pdml.ModelFrame) self.assert_numpy_array_almost_equal(result_tr.data.values[:, 0], expected_tr[0][:, 0]) self.assert_numpy_array_almost_equal(result_tr.target.values[:, 0], expected_tr[1][:, 0]) self.assert_numpy_array_almost_equal(result_test.data.values[:, 0], expected_test[0][:, 0]) self.assert_numpy_array_almost_equal(result_test.target.values[:, 0], expected_test[1][:, 0])
def test_grid_search(self): tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1e-3, 1e-4], 'C': [1, 10, 100]}, {'kernel': ['linear'], 'C': [1, 10, 100]}] df = pdml.ModelFrame(datasets.load_digits()) cv = df.model_selection.GridSearchCV(df.svm.SVC(C=1), tuned_parameters, cv=5) with tm.RNGContext(1): df.fit(cv) result = df.model_selection.describe(cv) expected = pd.DataFrame(cv.cv_results_) self.assertIsInstance(result, pdml.ModelFrame) tm.assert_frame_equal(result, expected)
def test_grid_search(self): tuned_parameters = [{'max_depth': [3, 4], 'n_estimators': [50, 100]}] df = pdml.ModelFrame(datasets.load_digits()) cv = df.model_selection.GridSearchCV(df.xgb.XGBClassifier(), tuned_parameters, cv=5) with tm.RNGContext(1): df.fit(cv) result = df.model_selection.describe(cv) expected = pd.DataFrame(cv.cv_results_) self.assertIsInstance(result, pdml.ModelFrame) tm.assert_frame_equal(result, expected)
def test_grid_search(self): tuned_parameters = [{'max_depth': [3, 4], 'n_estimators': [50, 100]}] df = pdml.ModelFrame(datasets.load_digits()) cv = df.grid_search.GridSearchCV(df.xgb.XGBClassifier(), tuned_parameters, cv=5) with tm.RNGContext(1): df.fit(cv) result = df.grid_search.describe(cv) expected = pd.DataFrame({'mean': [0.89705064, 0.91764051, 0.91263216, 0.91930996], 'std': [0.03244061, 0.03259985, 0.02764891, 0.0266436], 'max_depth': [3, 3, 4, 4], 'n_estimators': [50, 100, 50, 100]}, columns=['mean', 'std', 'max_depth', 'n_estimators']) self.assertIsInstance(result, pdml.ModelFrame) self.assert_frame_equal(result, expected)
def test_grid_search(self): tuned_parameters = [{ 'kernel': ['rbf'], 'gamma': [1e-3, 1e-4], 'C': [1, 10, 100] }, { 'kernel': ['linear'], 'C': [1, 10, 100] }] df = pdml.ModelFrame(datasets.load_digits()) cv = df.grid_search.GridSearchCV(df.svm.SVC(C=1), tuned_parameters, cv=5) with tm.RNGContext(1): df.fit(cv) result = df.grid_search.describe(cv) expected = pd.DataFrame( { 'mean': [ 0.97161937, 0.9476906, 0.97273233, 0.95937674, 0.97273233, 0.96271564, 0.94936004, 0.94936004, 0.94936004 ], 'std': [ 0.01546977, 0.0221161, 0.01406514, 0.02295168, 0.01406514, 0.01779749, 0.01911084, 0.01911084, 0.01911084 ], 'C': [1, 1, 10, 10, 100, 100, 1, 10, 100], 'gamma': [ 0.001, 0.0001, 0.001, 0.0001, 0.001, 0.0001, np.nan, np.nan, np.nan ], 'kernel': ['rbf'] * 6 + ['linear'] * 3 }, columns=['mean', 'std', 'C', 'gamma', 'kernel']) self.assertTrue(isinstance(result, pdml.ModelFrame)) self.assert_frame_equal(result, expected)