예제 #1
0
    def setUpClass(cls):
        from sklearn.datasets import load_iris

        existing_flag = disable_data_schema_validation
        set_disable_data_schema_validation(False)
        irisArr = load_iris()
        cls._irisArr = {"X": irisArr.data, "y": irisArr.target}
        from lale.datasets import sklearn_to_pandas

        (train_X, train_y), (test_X, test_y) = sklearn_to_pandas.load_iris_df()
        cls._irisDf = {"X": train_X, "y": train_y}
        (train_X, train_y), (test_X, test_y) = sklearn_to_pandas.digits_df()
        cls._digits = {"X": train_X, "y": train_y}
        (train_X,
         train_y), (test_X,
                    test_y) = sklearn_to_pandas.california_housing_df()
        cls._housing = {"X": train_X, "y": train_y}
        from lale.datasets import openml

        (train_X, train_y), (test_X, test_y) = openml.fetch("credit-g",
                                                            "classification",
                                                            preprocess=False)
        cls._creditG = {"X": train_X, "y": train_y}
        from lale.datasets import load_movie_review

        train_X, train_y = load_movie_review()
        cls._movies = {"X": train_X, "y": train_y}
        from lale.datasets.uci.uci_datasets import fetch_drugscom

        train_X, train_y, test_X, test_y = fetch_drugscom()
        cls._drugRev = {"X": train_X, "y": train_y}
        set_disable_data_schema_validation(existing_flag)
예제 #2
0
    def test_enable_schema_validation_individual_op(self):
        existing_flag = disable_data_schema_validation
        set_disable_data_schema_validation(False)
        import lale.schemas as schemas
        from lale.lib.sklearn import PCA

        pca_input = schemas.Object(X=schemas.AnyOf([
            schemas.Array(schemas.Array(schemas.String())),
            schemas.Array(schemas.String()),
        ]))

        foo = PCA.customize_schema(input_fit=pca_input)

        pca_output = schemas.Object(X=schemas.AnyOf([
            schemas.Array(schemas.Array(schemas.String())),
            schemas.Array(schemas.String()),
        ]))

        foo = foo.customize_schema(output_transform=pca_output)

        abc = foo()
        with self.assertRaises(ValueError):
            trained_pca = abc.fit(self.X_train)
            trained_pca.transform(self.X_test)
        set_disable_data_schema_validation(existing_flag)
예제 #3
0
    def test_transform_schema_Concat_irisDf(self):
        from lale.datasets.data_schemas import to_schema

        existing_flag = disable_data_schema_validation
        set_disable_data_schema_validation(False)

        data_X, data_y = self._irisDf["X"], self._irisDf["y"]
        s_in_X, s_in_y = to_schema(data_X), to_schema(data_y)

        def check(s_actual, n_expected, s_expected):
            assert s_actual["items"]["minItems"] == n_expected, str(s_actual)
            assert s_actual["items"]["maxItems"] == n_expected, str(s_actual)
            assert s_actual["items"]["items"] == s_expected, str(s_actual)

        s_out_X = ConcatFeatures.transform_schema({"items": [s_in_X]})
        check(s_out_X, 4, {"type": "number"})
        s_out_y = ConcatFeatures.transform_schema({"items": [s_in_y]})
        check(s_out_y, 1, {"description": "target", "type": "integer"})
        s_out_XX = ConcatFeatures.transform_schema({"items": [s_in_X, s_in_X]})
        check(s_out_XX, 8, {"type": "number"})
        s_out_yy = ConcatFeatures.transform_schema({"items": [s_in_y, s_in_y]})
        check(s_out_yy, 2, {"type": "integer"})
        s_out_Xy = ConcatFeatures.transform_schema({"items": [s_in_X, s_in_y]})
        check(s_out_Xy, 5, {"type": "number"})
        s_out_XXX = ConcatFeatures.transform_schema(
            {"items": [s_in_X, s_in_X, s_in_X]})
        check(s_out_XXX, 12, {"type": "number"})
        set_disable_data_schema_validation(existing_flag)
예제 #4
0
    def __exit__(self, value, type, traceback):
        from lale.settings import (
            set_disable_data_schema_validation,
            set_disable_hyperparams_schema_validation,
        )

        set_disable_data_schema_validation(
            self.existing_data_schema_validation_flag)
        set_disable_hyperparams_schema_validation(
            self.existing_hyperparams_schema_validation_flag)
예제 #5
0
    def __enter__(self):
        from lale.settings import (
            disable_data_schema_validation,
            disable_hyperparams_schema_validation,
            set_disable_data_schema_validation,
            set_disable_hyperparams_schema_validation,
        )

        self.existing_data_schema_validation_flag = disable_data_schema_validation
        self.existing_hyperparams_schema_validation_flag = (
            disable_hyperparams_schema_validation)
        set_disable_data_schema_validation(False)
        set_disable_hyperparams_schema_validation(False)
예제 #6
0
    def test_transform_schema_higher_order(self):
        from lale.datasets.data_schemas import to_schema

        existing_flag = disable_data_schema_validation
        set_disable_data_schema_validation(False)

        inner = LogisticRegression
        outer = IdentityWrapper(op=LogisticRegression)
        input_schema = to_schema(self._digits["X"])
        transformed_inner = inner.transform_schema(input_schema)
        transformed_outer = outer.transform_schema(input_schema)
        self.maxDiff = None
        self.assertEqual(transformed_inner, transformed_outer)
        set_disable_data_schema_validation(existing_flag)
예제 #7
0
    def test_lr_with_all_datasets(self):
        existing_flag = disable_data_schema_validation
        set_disable_data_schema_validation(False)

        should_succeed = ["irisArr", "irisDf", "digits", "housing"]
        should_fail = ["creditG", "movies", "drugRev"]
        for name in should_succeed:
            dataset = getattr(self, f"_{name}")
            LogisticRegression.validate_schema(**dataset)
        for name in should_fail:
            dataset = getattr(self, f"_{name}")
            with self.assertRaises(ValueError):
                LogisticRegression.validate_schema(**dataset)
        set_disable_data_schema_validation(existing_flag)
예제 #8
0
    def test_transform_schema_NoOp(self):
        from lale.datasets.data_schemas import to_schema

        existing_flag = disable_data_schema_validation
        set_disable_data_schema_validation(False)

        for ds in [
                self._irisArr,
                self._irisDf,
                self._digits,
                self._housing,
                self._creditG,
                self._movies,
                self._drugRev,
        ]:
            s_input = to_schema(ds["X"])
            s_output = NoOp.transform_schema(s_input)
            self.assertIs(s_input, s_output)
        set_disable_data_schema_validation(existing_flag)
예제 #9
0
    def test_trained_individual_op_freeze_trainable(self):
        from lale.lib.sklearn import KNeighborsClassifier
        from lale.operators import TrainedIndividualOp

        existing_schema_validation_flag = disable_data_schema_validation
        set_disable_data_schema_validation(False)

        trainable = KNeighborsClassifier(n_neighbors=1)
        X = np.array([[0.0], [1.0], [2.0]])
        y_old = np.array([0.0, 0.0, 1.0])
        liquid = trainable.fit(X, y_old)
        self.assertIsInstance(liquid, TrainedIndividualOp)
        self.assertFalse(liquid.is_frozen_trainable())
        self.assertIn("algorithm", liquid.free_hyperparams())
        frozen = liquid.freeze_trainable()
        self.assertIsInstance(frozen, TrainedIndividualOp)
        self.assertTrue(frozen.is_frozen_trainable())
        self.assertFalse(frozen.is_frozen_trained())
        self.assertEqual(len(frozen.free_hyperparams()), 0)
        set_disable_data_schema_validation(existing_schema_validation_flag)
예제 #10
0
    def test_individual_op_freeze_trained(self):
        from lale.lib.sklearn import KNeighborsClassifier

        existing_schema_validation_flag = disable_data_schema_validation
        set_disable_data_schema_validation(False)
        trainable = KNeighborsClassifier(n_neighbors=1)
        X = np.array([[0.0], [1.0], [2.0]])
        y_old = np.array([0.0, 0.0, 1.0])
        y_new = np.array([1.0, 0.0, 0.0])
        liquid_old = trainable.fit(X, y_old)
        self.assertEqual(list(liquid_old.predict(X)), list(y_old))
        liquid_new = liquid_old.fit(X, y_new)
        self.assertEqual(list(liquid_new.predict(X)), list(y_new))
        frozen_old = trainable.fit(X, y_old).freeze_trained()
        self.assertFalse(liquid_old.is_frozen_trained())
        self.assertTrue(frozen_old.is_frozen_trained())
        self.assertEqual(list(frozen_old.predict(X)), list(y_old))
        frozen_new = frozen_old.fit(X, y_new)
        self.assertEqual(list(frozen_new.predict(X)), list(y_old))
        set_disable_data_schema_validation(existing_schema_validation_flag)
예제 #11
0
    def test_transform_schema_choice(self):
        from lale.datasets.data_schemas import to_schema

        existing_flag = disable_data_schema_validation
        set_disable_data_schema_validation(False)

        choice = NMF | LogisticRegression
        input_schema = to_schema(self._digits["X"])
        transformed_schema = choice.transform_schema(input_schema)
        transformed_expected = {
            "type": "array",
            "items": {
                "type": "array",
                "items": {
                    "type": "number"
                }
            },
        }
        self.maxDiff = None
        self.assertEqual(transformed_schema, transformed_expected)
        set_disable_data_schema_validation(existing_flag)
예제 #12
0
    def test_disable_schema_validation_pipeline(self):
        existing_flag = disable_data_schema_validation
        set_disable_data_schema_validation(True)
        import lale.schemas as schemas
        from lale.lib.sklearn import PCA, LogisticRegression

        lr_input = schemas.Object(
            required=["X", "y"],
            X=schemas.AnyOf([
                schemas.Array(schemas.Array(schemas.String())),
                schemas.Array(schemas.String()),
            ]),
            y=schemas.Array(schemas.String()),
        )

        foo = LogisticRegression.customize_schema(input_fit=lr_input)
        abc = foo()
        pipeline = PCA() >> abc
        trained_pipeline = pipeline.fit(self.X_train, self.y_train)
        trained_pipeline.predict(self.X_test)
        set_disable_data_schema_validation(existing_flag)
예제 #13
0
    def test_project_with_all_datasets(self):
        existing_flag = disable_data_schema_validation
        set_disable_data_schema_validation(False)

        should_succeed = [
            "irisArr",
            "irisDf",
            "digits",
            "housing",
            "creditG",
            "drugRev",
        ]
        should_fail = ["movies"]
        for name in should_succeed:
            dataset = getattr(self, f"_{name}")
            lale.lib.lale.Project.validate_schema(**dataset)
        for name in should_fail:
            dataset = getattr(self, f"_{name}")
            with self.assertRaises(ValueError):
                lale.lib.lale.Project.validate_schema(**dataset)
        set_disable_data_schema_validation(existing_flag)
예제 #14
0
    def test_positive(self):
        import sklearn

        from lale.settings import set_disable_data_schema_validation

        set_disable_data_schema_validation(False)
        if sklearn.__version__ > "1.0":
            reg = Ridge(solver="lbfgs", positive=True)
            reg.fit(self.X_train, self.y_train)

            with self.assertRaises(ValidationError):
                reg = Ridge(solver="saga", positive=True)

            reg = Ridge(solver="auto", positive=True)
            reg.fit(self.X_train, self.y_train)

            with self.assertRaises(ValidationError):
                reg = Ridge(solver="lbfgs", positive=False)

            reg = Ridge(solver="auto", positive=False)
            reg.fit(self.X_train, self.y_train)
예제 #15
0
    def test_transform_schema_pipeline(self):
        from lale.datasets.data_schemas import to_schema

        existing_flag = disable_data_schema_validation
        set_disable_data_schema_validation(False)
        pipeline = NMF >> LogisticRegression
        input_schema = to_schema(self._digits["X"])
        transformed_schema = pipeline.transform_schema(input_schema)
        transformed_expected = {
            "description":
            "Probability of the sample for each class in the model.",
            "type": "array",
            "items": {
                "type": "array",
                "items": {
                    "type": "number"
                }
            },
        }
        self.maxDiff = None
        self.assertEqual(transformed_schema, transformed_expected)
        set_disable_data_schema_validation(existing_flag)
예제 #16
0
    SVC,
    IsolationForest,
    KNeighborsClassifier,
    LogisticRegression,
    MLPClassifier,
    Nystroem,
    PassiveAggressiveClassifier,
    RidgeClassifier,
    SGDClassifier,
    SimpleImputer,
    VotingClassifier,
)
from lale.search.lale_grid_search_cv import get_grid_search_parameter_grids
from lale.settings import set_disable_data_schema_validation

set_disable_data_schema_validation(False)


class TestClassification(unittest.TestCase):
    def setUp(self):
        from sklearn.model_selection import train_test_split

        data = load_iris()
        X, y = data.data, data.target
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y)


def create_function_test_classifier(clf_name):
    def test_classifier(self):
        X_train, y_train = self.X_train, self.y_train
        import importlib
예제 #17
0
    def test_keep_non_numbers(self):
        existing_flag = disable_data_schema_validation
        set_disable_data_schema_validation(False)

        from lale.datasets.data_schemas import to_schema
        from lale.lib.lale import Project

        train_X = self._creditG["X"]
        trainable = Project(columns={"not": {"type": "number"}})
        trained = trainable.fit(train_X)
        transformed = trained.transform(train_X)
        transformed_schema = to_schema(transformed)
        transformed_expected = {
            "type": "array",
            "minItems": 670,
            "maxItems": 670,
            "items": {
                "type":
                "array",
                "minItems":
                13,
                "maxItems":
                13,
                "items": [
                    {
                        "description": "checking_status",
                        "enum": ["<0", "0<=X<200", ">=200", "no checking"],
                    },
                    {
                        "description":
                        "credit_history",
                        "enum": [
                            "no credits/all paid",
                            "all paid",
                            "existing paid",
                            "delayed previously",
                            "critical/other existing credit",
                        ],
                    },
                    {
                        "description":
                        "purpose",
                        "enum": [
                            "new car",
                            "used car",
                            "furniture/equipment",
                            "radio/tv",
                            "domestic appliance",
                            "repairs",
                            "education",
                            "vacation",
                            "retraining",
                            "business",
                            "other",
                        ],
                    },
                    {
                        "description":
                        "savings_status",
                        "enum": [
                            "<100",
                            "100<=X<500",
                            "500<=X<1000",
                            ">=1000",
                            "no known savings",
                        ],
                    },
                    {
                        "description": "employment",
                        "enum":
                        ["unemployed", "<1", "1<=X<4", "4<=X<7", ">=7"],
                    },
                    {
                        "description":
                        "personal_status",
                        "enum": [
                            "male div/sep",
                            "female div/dep/mar",
                            "male single",
                            "male mar/wid",
                            "female single",
                        ],
                    },
                    {
                        "description": "other_parties",
                        "enum": ["none", "co applicant", "guarantor"],
                    },
                    {
                        "description":
                        "property_magnitude",
                        "enum": [
                            "real estate",
                            "life insurance",
                            "car",
                            "no known property",
                        ],
                    },
                    {
                        "description": "other_payment_plans",
                        "enum": ["bank", "stores", "none"],
                    },
                    {
                        "description": "housing",
                        "enum": ["rent", "own", "for free"]
                    },
                    {
                        "description":
                        "job",
                        "enum": [
                            "unemp/unskilled non res",
                            "unskilled resident",
                            "skilled",
                            "high qualif/self emp/mgmt",
                        ],
                    },
                    {
                        "description": "own_telephone",
                        "enum": ["none", "yes"]
                    },
                    {
                        "description": "foreign_worker",
                        "enum": ["yes", "no"]
                    },
                ],
            },
        }
        self.maxDiff = None
        self.assertEqual(transformed_schema, transformed_expected)
        set_disable_data_schema_validation(existing_flag)