def setUpClass(cls): from sklearn.datasets import load_iris existing_flag = disable_data_schema_validation set_disable_data_schema_validation(False) irisArr = load_iris() cls._irisArr = {"X": irisArr.data, "y": irisArr.target} from lale.datasets import sklearn_to_pandas (train_X, train_y), (test_X, test_y) = sklearn_to_pandas.load_iris_df() cls._irisDf = {"X": train_X, "y": train_y} (train_X, train_y), (test_X, test_y) = sklearn_to_pandas.digits_df() cls._digits = {"X": train_X, "y": train_y} (train_X, train_y), (test_X, test_y) = sklearn_to_pandas.california_housing_df() cls._housing = {"X": train_X, "y": train_y} from lale.datasets import openml (train_X, train_y), (test_X, test_y) = openml.fetch("credit-g", "classification", preprocess=False) cls._creditG = {"X": train_X, "y": train_y} from lale.datasets import load_movie_review train_X, train_y = load_movie_review() cls._movies = {"X": train_X, "y": train_y} from lale.datasets.uci.uci_datasets import fetch_drugscom train_X, train_y, test_X, test_y = fetch_drugscom() cls._drugRev = {"X": train_X, "y": train_y} set_disable_data_schema_validation(existing_flag)
def test_enable_schema_validation_individual_op(self): existing_flag = disable_data_schema_validation set_disable_data_schema_validation(False) import lale.schemas as schemas from lale.lib.sklearn import PCA pca_input = schemas.Object(X=schemas.AnyOf([ schemas.Array(schemas.Array(schemas.String())), schemas.Array(schemas.String()), ])) foo = PCA.customize_schema(input_fit=pca_input) pca_output = schemas.Object(X=schemas.AnyOf([ schemas.Array(schemas.Array(schemas.String())), schemas.Array(schemas.String()), ])) foo = foo.customize_schema(output_transform=pca_output) abc = foo() with self.assertRaises(ValueError): trained_pca = abc.fit(self.X_train) trained_pca.transform(self.X_test) set_disable_data_schema_validation(existing_flag)
def test_transform_schema_Concat_irisDf(self): from lale.datasets.data_schemas import to_schema existing_flag = disable_data_schema_validation set_disable_data_schema_validation(False) data_X, data_y = self._irisDf["X"], self._irisDf["y"] s_in_X, s_in_y = to_schema(data_X), to_schema(data_y) def check(s_actual, n_expected, s_expected): assert s_actual["items"]["minItems"] == n_expected, str(s_actual) assert s_actual["items"]["maxItems"] == n_expected, str(s_actual) assert s_actual["items"]["items"] == s_expected, str(s_actual) s_out_X = ConcatFeatures.transform_schema({"items": [s_in_X]}) check(s_out_X, 4, {"type": "number"}) s_out_y = ConcatFeatures.transform_schema({"items": [s_in_y]}) check(s_out_y, 1, {"description": "target", "type": "integer"}) s_out_XX = ConcatFeatures.transform_schema({"items": [s_in_X, s_in_X]}) check(s_out_XX, 8, {"type": "number"}) s_out_yy = ConcatFeatures.transform_schema({"items": [s_in_y, s_in_y]}) check(s_out_yy, 2, {"type": "integer"}) s_out_Xy = ConcatFeatures.transform_schema({"items": [s_in_X, s_in_y]}) check(s_out_Xy, 5, {"type": "number"}) s_out_XXX = ConcatFeatures.transform_schema( {"items": [s_in_X, s_in_X, s_in_X]}) check(s_out_XXX, 12, {"type": "number"}) set_disable_data_schema_validation(existing_flag)
def __exit__(self, value, type, traceback): from lale.settings import ( set_disable_data_schema_validation, set_disable_hyperparams_schema_validation, ) set_disable_data_schema_validation( self.existing_data_schema_validation_flag) set_disable_hyperparams_schema_validation( self.existing_hyperparams_schema_validation_flag)
def __enter__(self): from lale.settings import ( disable_data_schema_validation, disable_hyperparams_schema_validation, set_disable_data_schema_validation, set_disable_hyperparams_schema_validation, ) self.existing_data_schema_validation_flag = disable_data_schema_validation self.existing_hyperparams_schema_validation_flag = ( disable_hyperparams_schema_validation) set_disable_data_schema_validation(False) set_disable_hyperparams_schema_validation(False)
def test_transform_schema_higher_order(self): from lale.datasets.data_schemas import to_schema existing_flag = disable_data_schema_validation set_disable_data_schema_validation(False) inner = LogisticRegression outer = IdentityWrapper(op=LogisticRegression) input_schema = to_schema(self._digits["X"]) transformed_inner = inner.transform_schema(input_schema) transformed_outer = outer.transform_schema(input_schema) self.maxDiff = None self.assertEqual(transformed_inner, transformed_outer) set_disable_data_schema_validation(existing_flag)
def test_lr_with_all_datasets(self): existing_flag = disable_data_schema_validation set_disable_data_schema_validation(False) should_succeed = ["irisArr", "irisDf", "digits", "housing"] should_fail = ["creditG", "movies", "drugRev"] for name in should_succeed: dataset = getattr(self, f"_{name}") LogisticRegression.validate_schema(**dataset) for name in should_fail: dataset = getattr(self, f"_{name}") with self.assertRaises(ValueError): LogisticRegression.validate_schema(**dataset) set_disable_data_schema_validation(existing_flag)
def test_transform_schema_NoOp(self): from lale.datasets.data_schemas import to_schema existing_flag = disable_data_schema_validation set_disable_data_schema_validation(False) for ds in [ self._irisArr, self._irisDf, self._digits, self._housing, self._creditG, self._movies, self._drugRev, ]: s_input = to_schema(ds["X"]) s_output = NoOp.transform_schema(s_input) self.assertIs(s_input, s_output) set_disable_data_schema_validation(existing_flag)
def test_trained_individual_op_freeze_trainable(self): from lale.lib.sklearn import KNeighborsClassifier from lale.operators import TrainedIndividualOp existing_schema_validation_flag = disable_data_schema_validation set_disable_data_schema_validation(False) trainable = KNeighborsClassifier(n_neighbors=1) X = np.array([[0.0], [1.0], [2.0]]) y_old = np.array([0.0, 0.0, 1.0]) liquid = trainable.fit(X, y_old) self.assertIsInstance(liquid, TrainedIndividualOp) self.assertFalse(liquid.is_frozen_trainable()) self.assertIn("algorithm", liquid.free_hyperparams()) frozen = liquid.freeze_trainable() self.assertIsInstance(frozen, TrainedIndividualOp) self.assertTrue(frozen.is_frozen_trainable()) self.assertFalse(frozen.is_frozen_trained()) self.assertEqual(len(frozen.free_hyperparams()), 0) set_disable_data_schema_validation(existing_schema_validation_flag)
def test_individual_op_freeze_trained(self): from lale.lib.sklearn import KNeighborsClassifier existing_schema_validation_flag = disable_data_schema_validation set_disable_data_schema_validation(False) trainable = KNeighborsClassifier(n_neighbors=1) X = np.array([[0.0], [1.0], [2.0]]) y_old = np.array([0.0, 0.0, 1.0]) y_new = np.array([1.0, 0.0, 0.0]) liquid_old = trainable.fit(X, y_old) self.assertEqual(list(liquid_old.predict(X)), list(y_old)) liquid_new = liquid_old.fit(X, y_new) self.assertEqual(list(liquid_new.predict(X)), list(y_new)) frozen_old = trainable.fit(X, y_old).freeze_trained() self.assertFalse(liquid_old.is_frozen_trained()) self.assertTrue(frozen_old.is_frozen_trained()) self.assertEqual(list(frozen_old.predict(X)), list(y_old)) frozen_new = frozen_old.fit(X, y_new) self.assertEqual(list(frozen_new.predict(X)), list(y_old)) set_disable_data_schema_validation(existing_schema_validation_flag)
def test_transform_schema_choice(self): from lale.datasets.data_schemas import to_schema existing_flag = disable_data_schema_validation set_disable_data_schema_validation(False) choice = NMF | LogisticRegression input_schema = to_schema(self._digits["X"]) transformed_schema = choice.transform_schema(input_schema) transformed_expected = { "type": "array", "items": { "type": "array", "items": { "type": "number" } }, } self.maxDiff = None self.assertEqual(transformed_schema, transformed_expected) set_disable_data_schema_validation(existing_flag)
def test_disable_schema_validation_pipeline(self): existing_flag = disable_data_schema_validation set_disable_data_schema_validation(True) import lale.schemas as schemas from lale.lib.sklearn import PCA, LogisticRegression lr_input = schemas.Object( required=["X", "y"], X=schemas.AnyOf([ schemas.Array(schemas.Array(schemas.String())), schemas.Array(schemas.String()), ]), y=schemas.Array(schemas.String()), ) foo = LogisticRegression.customize_schema(input_fit=lr_input) abc = foo() pipeline = PCA() >> abc trained_pipeline = pipeline.fit(self.X_train, self.y_train) trained_pipeline.predict(self.X_test) set_disable_data_schema_validation(existing_flag)
def test_project_with_all_datasets(self): existing_flag = disable_data_schema_validation set_disable_data_schema_validation(False) should_succeed = [ "irisArr", "irisDf", "digits", "housing", "creditG", "drugRev", ] should_fail = ["movies"] for name in should_succeed: dataset = getattr(self, f"_{name}") lale.lib.lale.Project.validate_schema(**dataset) for name in should_fail: dataset = getattr(self, f"_{name}") with self.assertRaises(ValueError): lale.lib.lale.Project.validate_schema(**dataset) set_disable_data_schema_validation(existing_flag)
def test_positive(self): import sklearn from lale.settings import set_disable_data_schema_validation set_disable_data_schema_validation(False) if sklearn.__version__ > "1.0": reg = Ridge(solver="lbfgs", positive=True) reg.fit(self.X_train, self.y_train) with self.assertRaises(ValidationError): reg = Ridge(solver="saga", positive=True) reg = Ridge(solver="auto", positive=True) reg.fit(self.X_train, self.y_train) with self.assertRaises(ValidationError): reg = Ridge(solver="lbfgs", positive=False) reg = Ridge(solver="auto", positive=False) reg.fit(self.X_train, self.y_train)
def test_transform_schema_pipeline(self): from lale.datasets.data_schemas import to_schema existing_flag = disable_data_schema_validation set_disable_data_schema_validation(False) pipeline = NMF >> LogisticRegression input_schema = to_schema(self._digits["X"]) transformed_schema = pipeline.transform_schema(input_schema) transformed_expected = { "description": "Probability of the sample for each class in the model.", "type": "array", "items": { "type": "array", "items": { "type": "number" } }, } self.maxDiff = None self.assertEqual(transformed_schema, transformed_expected) set_disable_data_schema_validation(existing_flag)
SVC, IsolationForest, KNeighborsClassifier, LogisticRegression, MLPClassifier, Nystroem, PassiveAggressiveClassifier, RidgeClassifier, SGDClassifier, SimpleImputer, VotingClassifier, ) from lale.search.lale_grid_search_cv import get_grid_search_parameter_grids from lale.settings import set_disable_data_schema_validation set_disable_data_schema_validation(False) class TestClassification(unittest.TestCase): def setUp(self): from sklearn.model_selection import train_test_split data = load_iris() X, y = data.data, data.target self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y) def create_function_test_classifier(clf_name): def test_classifier(self): X_train, y_train = self.X_train, self.y_train import importlib
def test_keep_non_numbers(self): existing_flag = disable_data_schema_validation set_disable_data_schema_validation(False) from lale.datasets.data_schemas import to_schema from lale.lib.lale import Project train_X = self._creditG["X"] trainable = Project(columns={"not": {"type": "number"}}) trained = trainable.fit(train_X) transformed = trained.transform(train_X) transformed_schema = to_schema(transformed) transformed_expected = { "type": "array", "minItems": 670, "maxItems": 670, "items": { "type": "array", "minItems": 13, "maxItems": 13, "items": [ { "description": "checking_status", "enum": ["<0", "0<=X<200", ">=200", "no checking"], }, { "description": "credit_history", "enum": [ "no credits/all paid", "all paid", "existing paid", "delayed previously", "critical/other existing credit", ], }, { "description": "purpose", "enum": [ "new car", "used car", "furniture/equipment", "radio/tv", "domestic appliance", "repairs", "education", "vacation", "retraining", "business", "other", ], }, { "description": "savings_status", "enum": [ "<100", "100<=X<500", "500<=X<1000", ">=1000", "no known savings", ], }, { "description": "employment", "enum": ["unemployed", "<1", "1<=X<4", "4<=X<7", ">=7"], }, { "description": "personal_status", "enum": [ "male div/sep", "female div/dep/mar", "male single", "male mar/wid", "female single", ], }, { "description": "other_parties", "enum": ["none", "co applicant", "guarantor"], }, { "description": "property_magnitude", "enum": [ "real estate", "life insurance", "car", "no known property", ], }, { "description": "other_payment_plans", "enum": ["bank", "stores", "none"], }, { "description": "housing", "enum": ["rent", "own", "for free"] }, { "description": "job", "enum": [ "unemp/unskilled non res", "unskilled resident", "skilled", "high qualif/self emp/mgmt", ], }, { "description": "own_telephone", "enum": ["none", "yes"] }, { "description": "foreign_worker", "enum": ["yes", "no"] }, ], }, } self.maxDiff = None self.assertEqual(transformed_schema, transformed_expected) set_disable_data_schema_validation(existing_flag)