def dont_test_with_gridsearchcv2_auto(self): from sklearn.datasets import load_iris from sklearn.metrics import accuracy_score, make_scorer from sklearn.model_selection import GridSearchCV lr = LogisticRegression(random_state=42) pca = PCA(random_state=42, svd_solver="arpack") trainable = pca >> lr from sklearn.pipeline import Pipeline scikit_pipeline = Pipeline([ (pca.name(), PCA(random_state=42, svd_solver="arpack")), (lr.name(), LogisticRegression(random_state=42)), ]) all_parameters = get_grid_search_parameter_grids(trainable, num_samples=1) # otherwise the test takes too long parameters = random.sample(all_parameters, 2) with warnings.catch_warnings(): warnings.simplefilter("ignore") clf = GridSearchCV(scikit_pipeline, parameters, cv=2, scoring=make_scorer(accuracy_score)) iris = load_iris() clf.fit(iris.data, iris.target) predicted = clf.predict(iris.data) accuracy_with_lale_operators = accuracy_score( iris.target, predicted) from sklearn.decomposition import PCA as SklearnPCA from sklearn.linear_model import LogisticRegression as SklearnLR from sklearn.pipeline import Pipeline scikit_pipeline = Pipeline([ (pca.name(), SklearnPCA(random_state=42, svd_solver="arpack")), (lr.name(), SklearnLR(random_state=42)), ]) with warnings.catch_warnings(): warnings.simplefilter("ignore") clf = GridSearchCV(scikit_pipeline, parameters, cv=2, scoring=make_scorer(accuracy_score)) iris = load_iris() clf.fit(iris.data, iris.target) predicted = clf.predict(iris.data) accuracy_with_scikit_operators = accuracy_score( iris.target, predicted) self.assertEqual(accuracy_with_lale_operators, accuracy_with_scikit_operators)
def test_with_gridsearchcv3_auto(self): from sklearn.datasets import load_iris from sklearn.metrics import accuracy_score, make_scorer from sklearn.model_selection import GridSearchCV lr = LogisticRegression() from sklearn.pipeline import Pipeline scikit_pipeline = Pipeline( [(Nystroem().name(), Nystroem()), (lr.name(), LogisticRegression())] ) all_parameters = get_grid_search_parameter_grids( Nystroem() >> lr, num_samples=1 ) # otherwise the test takes too long parameters = random.sample(all_parameters, 2) with warnings.catch_warnings(): warnings.simplefilter("ignore") clf = GridSearchCV( scikit_pipeline, parameters, cv=2, scoring=make_scorer(accuracy_score) ) iris = load_iris() clf.fit(iris.data, iris.target) _ = clf.predict(iris.data)