def test_make_classification(self): X, y = make_classification(random_state=42) pipe = PipelineCache([('pca', PCA(2)), ('lr', LogisticRegression())], 'cache__') pipe.fit(X, y) cache = MLCache.get_cache('cache__') self.assertEqual(len(cache), 1) key = list(cache.keys())[0] self.assertIn("[('X',", key) self.assertIn("('copy', 'True')", key) MLCache.remove_cache('cache__')
def test_grid_search_model(self): X, y = make_classification(random_state=42) param_grid = [{ 'pca': [PCA(2)], 'lr__fit_intercept': [False, True] }, { 'pca': [SVD(2)], 'lr__fit_intercept': [False, True] }] pipe = Pipeline([('pca', 'passthrough'), ('lr', LogisticRegression())]) grid0 = GridSearchCV(pipe, param_grid, error_score='raise') grid0.fit(X, y) pipe = PipelineCache([('pca', 'passthrough'), ('lr', LogisticRegression())], 'cache__3') grid = GridSearchCV(pipe, param_grid, error_score='raise') grid.fit(X, y) cache = MLCache.get_cache('cache__3') # 0.22 increases the number of cached results self.assertIn(len(cache), (7, 11)) key = list(cache.keys())[0] self.assertIn("[('X',", key) self.assertIn("('copy', 'True')", key) MLCache.remove_cache('cache__3') self.assertEqual(grid0.best_params_, grid.best_params_)
def test_make_classification(self): X, y = make_classification(random_state=42) pipe0 = Pipeline([('pca', PCA(2)), ('lr', LogisticRegression())]) pipe = PipelineCache([('pca', PCA(2)), ('lr', LogisticRegression())], 'cache__') if hasattr(pipe0, '_check_fit_params'): pars0 = pipe0._check_fit_params() # pylint: disable=W0212,E1101 pars1 = pipe._check_fit_params() # pylint: disable=W0212,E1101 self.assertEqual(pars0, pars1) pipe0.fit(X, y) pipe.fit(X, y) cache = MLCache.get_cache('cache__') self.assertEqual(len(cache), 1) key = list(cache.keys())[0] self.assertIn("[('X',", key) self.assertIn("('copy', 'True')", key) MLCache.remove_cache('cache__')
def test_grid_search_1(self): X, y = make_classification(random_state=42) param_grid = { 'pca__n_components': [2, 3], 'pca__whiten': [True, False], 'lr__fit_intercept': [True, False] } pipe = Pipeline([('pca', PCA(2)), ('lr', LogisticRegression())]) grid0 = GridSearchCV(pipe, param_grid, error_score='raise', n_jobs=1) grid0.fit(X, y) pipe = PipelineCache([('pca', PCA(2)), ('lr', LogisticRegression())], 'cache__1') grid = GridSearchCV(pipe, param_grid, error_score='raise', n_jobs=1) grid.fit(X, y) cache = MLCache.get_cache('cache__1') # 0.22 increases the number of cached results self.assertIn(len(cache), (13, 21)) key = list(cache.keys())[0] self.assertIn("[('X',", key) self.assertIn("('copy', 'True')", key) MLCache.remove_cache('cache__1') self.assertEqual(grid0.best_params_, grid.best_params_)