Exemple #1
0
 def test_pandas_to_schema(self):
     from lale.datasets.data_schemas import to_schema
     from lale.type_checking import validate_schema
     import pandas as pd
     train_X, train_y = self._irisDf['X'], self._irisDf['y']
     assert isinstance(train_X, pd.DataFrame)
     assert not hasattr(train_X, 'json_schema')
     train_X_schema = to_schema(train_X)
     validate_schema(train_X, train_X_schema, subsample_array=False)
     assert isinstance(train_y, pd.Series)
     assert not hasattr(train_y, 'json_schema')
     train_y_schema = to_schema(train_y)
     validate_schema(train_y, train_y_schema, subsample_array=False)
     train_X_expected = {
         'type': 'array',
         'minItems': 120,
         'maxItems': 120,
         'items': {
             'type':
             'array',
             'minItems':
             4,
             'maxItems':
             4,
             'items': [{
                 'description': 'sepal length (cm)',
                 'type': 'number'
             }, {
                 'description': 'sepal width (cm)',
                 'type': 'number'
             }, {
                 'description': 'petal length (cm)',
                 'type': 'number'
             }, {
                 'description': 'petal width (cm)',
                 'type': 'number'
             }]
         }
     }
     train_y_expected = {
         'type': 'array',
         'minItems': 120,
         'maxItems': 120,
         'items': {
             'description': 'target',
             'type': 'integer'
         }
     }
     self.maxDiff = None
     self.assertEqual(train_X_schema, train_X_expected)
     self.assertEqual(train_y_schema, train_y_expected)
Exemple #2
0
    def test_keep_numbers(self):
        from lale.datasets.data_schemas import to_schema
        from lale.lib.lale import Project

        train_X, train_y = self._creditG["X"], self._creditG["y"]
        trainable = Project(columns={"type": "number"})
        trained = trainable.fit(train_X)
        transformed = trained.transform(train_X)
        transformed_schema = to_schema(transformed)
        transformed_expected = {
            "type": "array",
            "minItems": 670,
            "maxItems": 670,
            "items": {
                "type": "array",
                "minItems": 7,
                "maxItems": 7,
                "items": [
                    {"description": "duration", "type": "number"},
                    {"description": "credit_amount", "type": "number"},
                    {"description": "installment_commitment", "type": "number"},
                    {"description": "residence_since", "type": "number"},
                    {"description": "age", "type": "number"},
                    {"description": "existing_credits", "type": "number"},
                    {"description": "num_dependents", "type": "number"},
                ],
            },
        }
        self.maxDiff = None
        self.assertEqual(transformed_schema, transformed_expected)
Exemple #3
0
    def test_datasets_with_own_schemas(self):
        from lale.datasets.data_schemas import to_schema
        from lale.type_checking import validate_schema

        for name in [
                "irisArr",
                "irisDf",
                "digits",
                "housing",
                "creditG",
                "movies",
                "drugRev",
        ]:
            dataset = getattr(self, f"_{name}")
            data_X, data_y = dataset["X"], dataset["y"]
            schema_X, schema_y = to_schema(data_X), to_schema(data_y)
            validate_schema(data_X, schema_X, subsample_array=False)
            validate_schema(data_y, schema_y, subsample_array=False)
Exemple #4
0
 def test_transform_schema_NoOp(self):
     from lale.datasets.data_schemas import to_schema
     for ds in [
             self._irisArr, self._irisDf, self._digits, self._housing,
             self._creditG, self._movies, self._drugRev
     ]:
         s_input = to_schema(ds['X'])
         s_output = NoOp.transform_schema(s_input)
         self.assertIs(s_input, s_output)
Exemple #5
0
 def test_transform_schema_higher_order(self):
     from lale.datasets.data_schemas import to_schema
     inner = LogisticRegression
     outer = IdentityWrapper(op=LogisticRegression)
     input_schema = to_schema(self._digits['X'])
     transformed_inner = inner.transform_schema(input_schema)
     transformed_outer = outer.transform_schema(input_schema)
     self.maxDiff = None
     self.assertEqual(transformed_inner, transformed_outer)
Exemple #6
0
 def test_transform_schema_choice(self):
     from lale.datasets.data_schemas import to_schema
     choice = NMF | LogisticRegression
     input_schema = to_schema(self._digits['X'])
     transformed_schema = choice.transform_schema(input_schema)
     transformed_expected = {
         '$schema': 'http://json-schema.org/draft-04/schema#',
         'type': 'array',
         'items': {'type': 'array', 'items': {'type': 'number'}}}
     self.maxDiff = None
     self.assertEqual(transformed_schema, transformed_expected)        
Exemple #7
0
 def test_transform_schema_Concat_irisDf(self):
     from lale.datasets.data_schemas import to_schema
     data_X, data_y = self._irisDf['X'], self._irisDf['y']
     s_in_X, s_in_y = to_schema(data_X), to_schema(data_y)
     def check(s_actual, n_expected, s_expected):
         assert s_actual['items']['minItems'] == n_expected, str(s_actual)
         assert s_actual['items']['maxItems'] == n_expected, str(s_actual)
         assert s_actual['items']['items'] == s_expected, str(s_actual)
     s_out_X = ConcatFeatures.transform_schema({'items': [s_in_X]})
     check(s_out_X, 4, {'type': 'number'})
     s_out_y = ConcatFeatures.transform_schema({'items': [s_in_y]})
     check(s_out_y, 1, {'description': 'target', 'type': 'integer'})
     s_out_XX = ConcatFeatures.transform_schema({'items': [s_in_X, s_in_X]})
     check(s_out_XX, 8, {'type': 'number'})
     s_out_yy = ConcatFeatures.transform_schema({'items': [s_in_y, s_in_y]})
     check(s_out_yy, 2, {'type': 'integer'})
     s_out_Xy = ConcatFeatures.transform_schema({'items': [s_in_X, s_in_y]})
     check(s_out_Xy, 5, {'type': 'number'})
     s_out_XXX = ConcatFeatures.transform_schema({
         'items': [s_in_X, s_in_X, s_in_X]})
     check(s_out_XXX, 12, {'type': 'number'})
Exemple #8
0
    def test_transform_schema_choice(self):
        from lale.datasets.data_schemas import to_schema

        choice = NMF | LogisticRegression
        input_schema = to_schema(self._digits["X"])
        transformed_schema = choice.transform_schema(input_schema)
        transformed_expected = {
            "type": "array",
            "items": {"type": "array", "items": {"type": "number"}},
        }
        self.maxDiff = None
        self.assertEqual(transformed_schema, transformed_expected)
Exemple #9
0
 def test_ndarray_to_schema(self):
     from lale.datasets.data_schemas import to_schema
     from lale.type_checking import validate_schema
     all_X, all_y = self._irisArr['X'], self._irisArr['y']
     assert not hasattr(all_X, 'json_schema')
     all_X_schema = to_schema(all_X)
     validate_schema(all_X, all_X_schema, subsample_array=False)
     assert not hasattr(all_y, 'json_schema')
     all_y_schema = to_schema(all_y)
     validate_schema(all_y, all_y_schema, subsample_array=False)
     all_X_expected = {
         'type': 'array', 'minItems': 150, 'maxItems': 150,
         'items': {
             'type': 'array', 'minItems': 4, 'maxItems': 4,
             'items': {'type': 'number'}}}
     all_y_expected = {
         'type': 'array', 'minItems': 150, 'maxItems': 150,
         'items': {'type': 'integer'}}
     self.maxDiff = None
     self.assertEqual(all_X_schema, all_X_expected)
     self.assertEqual(all_y_schema, all_y_expected)
Exemple #10
0
    def test_pandas_to_schema(self):
        import pandas as pd

        from lale.datasets.data_schemas import to_schema
        from lale.type_checking import validate_schema

        train_X, train_y = self._irisDf["X"], self._irisDf["y"]
        assert isinstance(train_X, pd.DataFrame)
        assert not hasattr(train_X, "json_schema")
        train_X_schema = to_schema(train_X)
        validate_schema(train_X, train_X_schema, subsample_array=False)
        assert isinstance(train_y, pd.Series)
        assert not hasattr(train_y, "json_schema")
        train_y_schema = to_schema(train_y)
        validate_schema(train_y, train_y_schema, subsample_array=False)
        train_X_expected = {
            "type": "array",
            "minItems": 120,
            "maxItems": 120,
            "items": {
                "type": "array",
                "minItems": 4,
                "maxItems": 4,
                "items": [
                    {"description": "sepal length (cm)", "type": "number"},
                    {"description": "sepal width (cm)", "type": "number"},
                    {"description": "petal length (cm)", "type": "number"},
                    {"description": "petal width (cm)", "type": "number"},
                ],
            },
        }
        train_y_expected = {
            "type": "array",
            "minItems": 120,
            "maxItems": 120,
            "items": {"description": "target", "type": "integer"},
        }
        self.maxDiff = None
        self.assertEqual(train_X_schema, train_X_expected)
        self.assertEqual(train_y_schema, train_y_expected)
Exemple #11
0
    def test_transform_schema_pipeline(self):
        from lale.datasets.data_schemas import to_schema

        pipeline = NMF >> LogisticRegression
        input_schema = to_schema(self._digits["X"])
        transformed_schema = pipeline.transform_schema(input_schema)
        transformed_expected = {
            "description": "Probability of the sample for each class in the model.",
            "type": "array",
            "items": {"type": "array", "items": {"type": "number"}},
        }
        self.maxDiff = None
        self.assertEqual(transformed_schema, transformed_expected)
Exemple #12
0
    def test_transform_schema_higher_order(self):
        with EnableSchemaValidation():
            from lale.datasets.data_schemas import to_schema

            inner = LogisticRegression
            outer = IdentityWrapper(op=LogisticRegression)
            digits = self._digits
            assert digits is not None
            input_schema = to_schema(digits["X"])
            transformed_inner = inner.transform_schema(input_schema)
            transformed_outer = outer.transform_schema(input_schema)
            self.maxDiff = None
            self.assertEqual(transformed_inner, transformed_outer)
Exemple #13
0
 def test_transform_schema_pipeline(self):
     from lale.datasets.data_schemas import to_schema
     pipeline = NMF >> LogisticRegression
     input_schema = to_schema(self._digits['X'])
     transformed_schema = pipeline.transform_schema(input_schema)
     transformed_expected = {
         '$schema': 'http://json-schema.org/draft-04/schema#',
         'description':
             'Probability of the sample for each class in the model.',
         'type': 'array',
         'items': {'type': 'array', 'items': {'type': 'number'}}}
     self.maxDiff = None
     self.assertEqual(transformed_schema, transformed_expected)
Exemple #14
0
    def test_transform_schema_Concat_irisDf(self):
        from lale.datasets.data_schemas import to_schema

        data_X, data_y = self._irisDf["X"], self._irisDf["y"]
        s_in_X, s_in_y = to_schema(data_X), to_schema(data_y)

        def check(s_actual, n_expected, s_expected):
            assert s_actual["items"]["minItems"] == n_expected, str(s_actual)
            assert s_actual["items"]["maxItems"] == n_expected, str(s_actual)
            assert s_actual["items"]["items"] == s_expected, str(s_actual)

        s_out_X = ConcatFeatures.transform_schema({"items": [s_in_X]})
        check(s_out_X, 4, {"type": "number"})
        s_out_y = ConcatFeatures.transform_schema({"items": [s_in_y]})
        check(s_out_y, 1, {"description": "target", "type": "integer"})
        s_out_XX = ConcatFeatures.transform_schema({"items": [s_in_X, s_in_X]})
        check(s_out_XX, 8, {"type": "number"})
        s_out_yy = ConcatFeatures.transform_schema({"items": [s_in_y, s_in_y]})
        check(s_out_yy, 2, {"type": "integer"})
        s_out_Xy = ConcatFeatures.transform_schema({"items": [s_in_X, s_in_y]})
        check(s_out_Xy, 5, {"type": "number"})
        s_out_XXX = ConcatFeatures.transform_schema({"items": [s_in_X, s_in_X, s_in_X]})
        check(s_out_XXX, 12, {"type": "number"})
Exemple #15
0
    def test_transform_schema_higher_order(self):
        from lale.datasets.data_schemas import to_schema

        existing_flag = disable_data_schema_validation
        set_disable_data_schema_validation(False)

        inner = LogisticRegression
        outer = IdentityWrapper(op=LogisticRegression)
        input_schema = to_schema(self._digits["X"])
        transformed_inner = inner.transform_schema(input_schema)
        transformed_outer = outer.transform_schema(input_schema)
        self.maxDiff = None
        self.assertEqual(transformed_inner, transformed_outer)
        set_disable_data_schema_validation(existing_flag)
Exemple #16
0
    def test_ndarray_to_schema(self):
        from lale.datasets.data_schemas import to_schema
        from lale.type_checking import validate_schema_directly

        irisArr = self._irisArr
        assert irisArr is not None
        all_X, all_y = irisArr["X"], irisArr["y"]
        assert not hasattr(all_X, "json_schema")
        all_X_schema = to_schema(all_X)
        validate_schema_directly(all_X, all_X_schema, subsample_array=False)
        assert not hasattr(all_y, "json_schema")
        all_y_schema = to_schema(all_y)
        validate_schema_directly(all_y, all_y_schema, subsample_array=False)
        all_X_expected = {
            "type": "array",
            "minItems": 150,
            "maxItems": 150,
            "items": {
                "type": "array",
                "minItems": 4,
                "maxItems": 4,
                "items": {
                    "type": "number"
                },
            },
        }
        all_y_expected = {
            "type": "array",
            "minItems": 150,
            "maxItems": 150,
            "items": {
                "type": "integer"
            },
        }
        self.maxDiff = None
        self.assertEqual(all_X_schema, all_X_expected)
        self.assertEqual(all_y_schema, all_y_expected)
Exemple #17
0
 def test_keep_non_numbers(self):
     from lale.datasets.data_schemas import to_schema
     from lale.lib.lale import Project
     train_X, train_y = self._creditG['X'], self._creditG['y']
     trainable = Project(columns={'not': {'type': 'number'}})
     trained = trainable.fit(train_X)
     transformed = trained.transform(train_X)
     transformed_schema = to_schema(transformed)
     transformed_expected = {
         'type': 'array', 'minItems': 670, 'maxItems': 670,
         'items': {
             'type': 'array', 'minItems': 13, 'maxItems': 13,
             'items': [
                 {'description': 'checking_status', 'enum': [
                     '<0', '0<=X<200', '>=200', 'no checking']},
                 {'description': 'credit_history', 'enum': [
                     'no credits/all paid', 'all paid',
                     'existing paid', 'delayed previously',
                     'critical/other existing credit']},
                 {'description': 'purpose', 'enum': [
                     'new car', 'used car', 'furniture/equipment',
                     'radio/tv', 'domestic appliance', 'repairs',
                     'education', 'vacation', 'retraining', 'business',
                     'other']},
                 {'description': 'savings_status', 'enum': [
                     '<100', '100<=X<500', '500<=X<1000', '>=1000',
                     'no known savings']},
                 {'description': 'employment', 'enum': [
                     'unemployed', '<1', '1<=X<4', '4<=X<7', '>=7']},
                 {'description': 'personal_status', 'enum': [
                     'male div/sep', 'female div/dep/mar', 'male single',
                     'male mar/wid', 'female single']},
                 {'description': 'other_parties', 'enum': [
                     'none', 'co applicant', 'guarantor']},
                 {'description': 'property_magnitude', 'enum': [
                     'real estate', 'life insurance', 'car',
                     'no known property']},
                 {'description': 'other_payment_plans', 'enum': [
                     'bank', 'stores', 'none']},
                 {'description': 'housing', 'enum': [
                     'rent', 'own', 'for free']},
                 {'description': 'job', 'enum': [
                     'unemp/unskilled non res', 'unskilled resident',
                     'skilled', 'high qualif/self emp/mgmt']},
                 {'description': 'own_telephone', 'enum': ['none', 'yes']},
                 {'description': 'foreign_worker', 'enum': ['yes', 'no']}]}}
     self.maxDiff = None
     self.assertEqual(transformed_schema, transformed_expected)
Exemple #18
0
    def test_transform_schema_NoOp(self):
        with EnableSchemaValidation():
            from lale.datasets.data_schemas import to_schema

            for ds in [
                    self._irisArr,
                    self._irisDf,
                    self._digits,
                    self._housing,
                    self._creditG,
                    self._movies,
                    self._drugRev,
            ]:
                assert ds is not None
                s_input = to_schema(ds["X"])
                s_output = NoOp.transform_schema(s_input)
                self.assertIs(s_input, s_output)
Exemple #19
0
 def test_keep_numbers(self):
     from lale.datasets.data_schemas import to_schema
     from lale.lib.lale import Project
     train_X, train_y = self._creditG['X'], self._creditG['y']
     trainable = Project(columns={'type': 'number'})
     trained = trainable.fit(train_X)
     transformed = trained.transform(train_X)
     transformed_schema = to_schema(transformed)
     transformed_expected = {
         'type': 'array',
         'minItems': 670,
         'maxItems': 670,
         'items': {
             'type':
             'array',
             'minItems':
             7,
             'maxItems':
             7,
             'items': [{
                 'description': 'duration',
                 'type': 'number'
             }, {
                 'description': 'credit_amount',
                 'type': 'number'
             }, {
                 'description': 'installment_commitment',
                 'type': 'number'
             }, {
                 'description': 'residence_since',
                 'type': 'number'
             }, {
                 'description': 'age',
                 'type': 'number'
             }, {
                 'description': 'existing_credits',
                 'type': 'number'
             }, {
                 'description': 'num_dependents',
                 'type': 'number'
             }]
         }
     }
     self.maxDiff = None
     self.assertEqual(transformed_schema, transformed_expected)
Exemple #20
0
    def test_transform_schema_NoOp(self):
        from lale.datasets.data_schemas import to_schema

        existing_flag = disable_data_schema_validation
        set_disable_data_schema_validation(False)

        for ds in [
                self._irisArr,
                self._irisDf,
                self._digits,
                self._housing,
                self._creditG,
                self._movies,
                self._drugRev,
        ]:
            s_input = to_schema(ds["X"])
            s_output = NoOp.transform_schema(s_input)
            self.assertIs(s_input, s_output)
        set_disable_data_schema_validation(existing_flag)
Exemple #21
0
    def test_keep_non_numbers(self):
        from lale.datasets.data_schemas import to_schema
        from lale.lib.lale import Project

        train_X = self._creditG["X"]
        trainable = Project(columns={"not": {"type": "number"}})
        trained = trainable.fit(train_X)
        transformed = trained.transform(train_X)
        transformed_schema = to_schema(transformed)
        transformed_expected = {
            "type": "array",
            "minItems": 670,
            "maxItems": 670,
            "items": {
                "type":
                "array",
                "minItems":
                13,
                "maxItems":
                13,
                "items": [
                    {
                        "description": "checking_status",
                        "enum": ["<0", "0<=X<200", ">=200", "no checking"],
                    },
                    {
                        "description":
                        "credit_history",
                        "enum": [
                            "no credits/all paid",
                            "all paid",
                            "existing paid",
                            "delayed previously",
                            "critical/other existing credit",
                        ],
                    },
                    {
                        "description":
                        "purpose",
                        "enum": [
                            "new car",
                            "used car",
                            "furniture/equipment",
                            "radio/tv",
                            "domestic appliance",
                            "repairs",
                            "education",
                            "vacation",
                            "retraining",
                            "business",
                            "other",
                        ],
                    },
                    {
                        "description":
                        "savings_status",
                        "enum": [
                            "<100",
                            "100<=X<500",
                            "500<=X<1000",
                            ">=1000",
                            "no known savings",
                        ],
                    },
                    {
                        "description": "employment",
                        "enum":
                        ["unemployed", "<1", "1<=X<4", "4<=X<7", ">=7"],
                    },
                    {
                        "description":
                        "personal_status",
                        "enum": [
                            "male div/sep",
                            "female div/dep/mar",
                            "male single",
                            "male mar/wid",
                            "female single",
                        ],
                    },
                    {
                        "description": "other_parties",
                        "enum": ["none", "co applicant", "guarantor"],
                    },
                    {
                        "description":
                        "property_magnitude",
                        "enum": [
                            "real estate",
                            "life insurance",
                            "car",
                            "no known property",
                        ],
                    },
                    {
                        "description": "other_payment_plans",
                        "enum": ["bank", "stores", "none"],
                    },
                    {
                        "description": "housing",
                        "enum": ["rent", "own", "for free"]
                    },
                    {
                        "description":
                        "job",
                        "enum": [
                            "unemp/unskilled non res",
                            "unskilled resident",
                            "skilled",
                            "high qualif/self emp/mgmt",
                        ],
                    },
                    {
                        "description": "own_telephone",
                        "enum": ["none", "yes"]
                    },
                    {
                        "description": "foreign_worker",
                        "enum": ["yes", "no"]
                    },
                ],
            },
        }
        self.maxDiff = None
        self.assertEqual(transformed_schema, transformed_expected)
Exemple #22
0
    def test_arff_to_schema(self):
        from lale.datasets.data_schemas import to_schema
        from lale.type_checking import validate_schema

        train_X, train_y = self._creditG["X"], self._creditG["y"]
        assert hasattr(train_X, "json_schema")
        train_X_schema = to_schema(train_X)
        validate_schema(train_X, train_X_schema, subsample_array=False)
        assert hasattr(train_y, "json_schema")
        train_y_schema = to_schema(train_y)
        validate_schema(train_y, train_y_schema, subsample_array=False)
        train_X_expected = {
            "type": "array",
            "minItems": 670,
            "maxItems": 670,
            "items": {
                "type":
                "array",
                "minItems":
                20,
                "maxItems":
                20,
                "items": [
                    {
                        "description": "checking_status",
                        "enum": ["<0", "0<=X<200", ">=200", "no checking"],
                    },
                    {
                        "description": "duration",
                        "type": "number"
                    },
                    {
                        "description":
                        "credit_history",
                        "enum": [
                            "no credits/all paid",
                            "all paid",
                            "existing paid",
                            "delayed previously",
                            "critical/other existing credit",
                        ],
                    },
                    {
                        "description":
                        "purpose",
                        "enum": [
                            "new car",
                            "used car",
                            "furniture/equipment",
                            "radio/tv",
                            "domestic appliance",
                            "repairs",
                            "education",
                            "vacation",
                            "retraining",
                            "business",
                            "other",
                        ],
                    },
                    {
                        "description": "credit_amount",
                        "type": "number"
                    },
                    {
                        "description":
                        "savings_status",
                        "enum": [
                            "<100",
                            "100<=X<500",
                            "500<=X<1000",
                            ">=1000",
                            "no known savings",
                        ],
                    },
                    {
                        "description": "employment",
                        "enum":
                        ["unemployed", "<1", "1<=X<4", "4<=X<7", ">=7"],
                    },
                    {
                        "description": "installment_commitment",
                        "type": "number"
                    },
                    {
                        "description":
                        "personal_status",
                        "enum": [
                            "male div/sep",
                            "female div/dep/mar",
                            "male single",
                            "male mar/wid",
                            "female single",
                        ],
                    },
                    {
                        "description": "other_parties",
                        "enum": ["none", "co applicant", "guarantor"],
                    },
                    {
                        "description": "residence_since",
                        "type": "number"
                    },
                    {
                        "description":
                        "property_magnitude",
                        "enum": [
                            "real estate",
                            "life insurance",
                            "car",
                            "no known property",
                        ],
                    },
                    {
                        "description": "age",
                        "type": "number"
                    },
                    {
                        "description": "other_payment_plans",
                        "enum": ["bank", "stores", "none"],
                    },
                    {
                        "description": "housing",
                        "enum": ["rent", "own", "for free"]
                    },
                    {
                        "description": "existing_credits",
                        "type": "number"
                    },
                    {
                        "description":
                        "job",
                        "enum": [
                            "unemp/unskilled non res",
                            "unskilled resident",
                            "skilled",
                            "high qualif/self emp/mgmt",
                        ],
                    },
                    {
                        "description": "num_dependents",
                        "type": "number"
                    },
                    {
                        "description": "own_telephone",
                        "enum": ["none", "yes"]
                    },
                    {
                        "description": "foreign_worker",
                        "enum": ["yes", "no"]
                    },
                ],
            },
        }
        train_y_expected = {
            "type": "array",
            "minItems": 670,
            "maxItems": 670,
            "items": {
                "description": "class",
                "enum": ["good", "bad"]
            },
        }
        self.maxDiff = None
        self.assertEqual(train_X_schema, train_X_expected)
        self.assertEqual(train_y_schema, train_y_expected)
Exemple #23
0
 def test_arff_to_schema(self):
     from lale.datasets.data_schemas import to_schema
     from lale.type_checking import validate_schema
     train_X, train_y = self._creditG['X'], self._creditG['y']
     assert hasattr(train_X, 'json_schema')
     train_X_schema = to_schema(train_X)
     validate_schema(train_X, train_X_schema, subsample_array=False)
     assert hasattr(train_y, 'json_schema')
     train_y_schema = to_schema(train_y)
     validate_schema(train_y, train_y_schema, subsample_array=False)
     train_X_expected = {
         'type': 'array', 'minItems': 670, 'maxItems': 670,
         'items': {
             'type': 'array', 'minItems': 20, 'maxItems': 20,
             'items': [
                 {'description': 'checking_status', 'enum': [
                     '<0', '0<=X<200', '>=200', 'no checking']},
                 {'description': 'duration', 'type': 'number'},
                 {'description': 'credit_history', 'enum': [
                     'no credits/all paid', 'all paid',
                     'existing paid', 'delayed previously',
                     'critical/other existing credit']},
                 {'description': 'purpose', 'enum': [
                     'new car', 'used car', 'furniture/equipment',
                     'radio/tv', 'domestic appliance', 'repairs',
                     'education', 'vacation', 'retraining', 'business',
                     'other']},
                 {'description': 'credit_amount', 'type': 'number'},
                 {'description': 'savings_status', 'enum': [
                     '<100', '100<=X<500', '500<=X<1000', '>=1000',
                     'no known savings']},
                 {'description': 'employment', 'enum': [
                     'unemployed', '<1', '1<=X<4', '4<=X<7', '>=7']},
                 {'description': 'installment_commitment', 'type': 'number'},
                 {'description': 'personal_status', 'enum': [
                     'male div/sep', 'female div/dep/mar', 'male single',
                     'male mar/wid', 'female single']},
                 {'description': 'other_parties', 'enum': [
                     'none', 'co applicant', 'guarantor']},
                 {'description': 'residence_since', 'type': 'number'},
                 {'description': 'property_magnitude', 'enum': [
                     'real estate', 'life insurance', 'car',
                     'no known property']},
                 {'description': 'age', 'type': 'number'},
                 {'description': 'other_payment_plans', 'enum': [
                     'bank', 'stores', 'none']},
                 {'description': 'housing', 'enum': [
                     'rent', 'own', 'for free']},
                 {'description': 'existing_credits', 'type': 'number'},
                 {'description': 'job', 'enum': [
                     'unemp/unskilled non res', 'unskilled resident',
                     'skilled', 'high qualif/self emp/mgmt']},
                 {'description': 'num_dependents', 'type': 'number'},
                 {'description': 'own_telephone', 'enum': ['none', 'yes']},
                 {'description': 'foreign_worker', 'enum': ['yes', 'no']}]}}
     train_y_expected = {
         'type': 'array', 'minItems': 670, 'maxItems': 670,
         'items': {'description': 'class', 'enum': [0, 1]}}
     self.maxDiff = None
     self.assertEqual(train_X_schema, train_X_expected)
     self.assertEqual(train_y_schema, train_y_expected)