Example #1
0
    def setUpClass(cls):
        from sklearn.datasets import load_iris

        irisArr = load_iris()
        cls._irisArr = {"X": irisArr.data, "y": irisArr.target}
        from lale.datasets import sklearn_to_pandas

        (train_X, train_y), (test_X, test_y) = sklearn_to_pandas.load_iris_df()
        cls._irisDf = {"X": train_X, "y": train_y}
        (train_X, train_y), (test_X, test_y) = sklearn_to_pandas.digits_df()
        cls._digits = {"X": train_X, "y": train_y}
        (train_X,
         train_y), (test_X,
                    test_y) = sklearn_to_pandas.california_housing_df()
        cls._housing = {"X": train_X, "y": train_y}
        from lale.datasets import openml

        (train_X, train_y), (test_X, test_y) = openml.fetch("credit-g",
                                                            "classification",
                                                            preprocess=False)
        cls._creditG = {"X": train_X, "y": train_y}
        from lale.datasets import load_movie_review

        train_X, train_y = load_movie_review()
        cls._movies = {"X": train_X, "y": train_y}
        from lale.datasets.uci.uci_datasets import fetch_drugscom

        train_X, train_y, test_X, test_y = fetch_drugscom()
        cls._drugRev = {"X": train_X, "y": train_y}
Example #2
0
 def test_text_and_structured(self):
     from lale.datasets.uci.uci_datasets import fetch_drugscom
     from sklearn.model_selection import train_test_split
     train_X_all, train_y_all, test_X, test_y = fetch_drugscom()
     #subset to speed up debugging
     train_X, train_X_ignore, train_y, train_y_ignore = train_test_split(
         train_X_all, train_y_all, train_size=0.01, random_state=42)
     from lale.lib.lale import Project
     from lale.lib.lale import ConcatFeatures as Cat
     from lale.lib.sklearn import TfidfVectorizer as Tfidf
     from lale.lib.sklearn import LinearRegression as LinReg
     from lale.lib.sklearn import RandomForestRegressor as Forest
     prep_text = Project(columns=['review']) >> Tfidf(max_features=100)
     prep_nums = Project(columns={'type': 'number'})
     planned = (prep_text & prep_nums) >> Cat >> (LinReg | Forest)
     from lale.lib.lale import Hyperopt
     hyperopt_classifier = Hyperopt(estimator=planned, max_evals=1, scoring='r2')
     best_found = hyperopt_classifier.fit(train_X, train_y)