def setUpClass(cls): from sklearn.datasets import load_iris irisArr = load_iris() cls._irisArr = {"X": irisArr.data, "y": irisArr.target} from lale.datasets import sklearn_to_pandas (train_X, train_y), (test_X, test_y) = sklearn_to_pandas.load_iris_df() cls._irisDf = {"X": train_X, "y": train_y} (train_X, train_y), (test_X, test_y) = sklearn_to_pandas.digits_df() cls._digits = {"X": train_X, "y": train_y} (train_X, train_y), (test_X, test_y) = sklearn_to_pandas.california_housing_df() cls._housing = {"X": train_X, "y": train_y} from lale.datasets import openml (train_X, train_y), (test_X, test_y) = openml.fetch("credit-g", "classification", preprocess=False) cls._creditG = {"X": train_X, "y": train_y} from lale.datasets import load_movie_review train_X, train_y = load_movie_review() cls._movies = {"X": train_X, "y": train_y} from lale.datasets.uci.uci_datasets import fetch_drugscom train_X, train_y, test_X, test_y = fetch_drugscom() cls._drugRev = {"X": train_X, "y": train_y}
def test_preprocessing_union(self): from lale.datasets import openml (train_X, train_y), (test_X, test_y) = openml.fetch( 'credit-g', 'classification', preprocess=False) from lale.lib.lale import Project from lale.lib.sklearn import Normalizer, OneHotEncoder from lale.lib.lale import ConcatFeatures as Concat from lale.lib.sklearn import RandomForestClassifier as Forest prep_num = Project(columns={'type': 'number'}) >> Normalizer prep_cat = Project(columns={'not': {'type': 'number'}}) >> OneHotEncoder(sparse=False) planned = (prep_num & prep_cat) >> Concat >> Forest from lale.lib.lale import Hyperopt hyperopt_classifier = Hyperopt(estimator=planned, max_evals=1) best_found = hyperopt_classifier.fit(train_X, train_y)