def test_fit_plus_transform(self): iris, features = _load_iris() decomp = trans(pd_decomposition.PCA(n_components=2), None, ['pc1', 'pc2']) tr = decomp.fit(iris).transform(iris) self.assertEqual(set(tr.columns), set(['pc1', 'pc2']))
def test_direct_single(self): iris, features = _load_iris() trn = pd_decomposition.PCA() unpickled_trn = pickle.loads(pickle.dumps(trn)) pca_unpickled = unpickled_trn.fit_transform(iris[features]) pca = trn.fit_transform(iris[features]) self.assertTrue(pca_unpickled.equals(pca))
def test_cv(self): digits, features = _load_digits() clf = pd_decomposition.PCA() | pd_linear_model.LogisticRegression() estimator = PDGridSearchCV( clf, { 'pca__n_components': [20, 40, 64], 'logisticregression__C': np.logspace(-4, 4, 3) }) if _level < 1: return estimator.fit(digits[features], digits.digit)
def test_direct_pipe_adapter(self): clf = pd_decomposition.PCA() | pd_linear_model.LinearRegression() unpickled_clf = pickle.loads(pickle.dumps(clf))
_iris, _features = _load_iris() _dataset_names.append('iris') _Xs.append(_iris[_features]) _ys.append(_iris['class']) _Xs.append(_iris[_features]) _ys.append(_iris['class'] == _iris['class'].values[0]) _iris = _iris.copy() _iris.index = ['i%d' % i for i in range(len(_iris))] _dataset_names.append('iris_str_index') _Xs.append(_iris[_features]) _ys.append(_iris['class']) _estimators = [] _estimators.append( (preprocessing.StandardScaler(), pd_preprocessing.StandardScaler(), True)) _estimators.append((decomposition.PCA(), pd_decomposition.PCA(), True)) _estimators.append((linear_model.LinearRegression(), frame(pd_linear_model.LinearRegression()), True)) _estimators.append((linear_model.LinearRegression(), pd_linear_model.LinearRegression(), True)) _estimators.append( (pipeline.make_pipeline(decomposition.PCA(), linear_model.LinearRegression()), pd_decomposition.PCA() | pd_linear_model.LinearRegression(), True)) _estimators.append( (pipeline.make_pipeline(feature_selection.SelectKBest(k=2), decomposition.PCA(), linear_model.LinearRegression()), pd_feature_selection.SelectKBest(k=2) | pd_decomposition.PCA() | pd_linear_model.LinearRegression(), True)) _estimators.append(