def test_check_column_exists_and_order(self, d, m, r):
     df = get_test_csv(cwd, d)
     meta = read_json(cwd, m)
     expected_result = read_json(cwd, r)
     l = Linter(df, meta)
     l.check_column_exists_and_order()
     result = l.vlog.as_dict()
     self.assertDictEqual(result, expected_result)
    def test_check_regex(self, d, m, r):

        df = get_test_csv(cwd, d)
        meta = read_json(cwd, m)
        expected_result = read_json(cwd, r)

        l = Linter(df, meta)
        l.check_pattern()
        result = l.vlog.as_dict()
        self.assertDictEqual(result, expected_result)
    def test_check_enums(self, d, m, r):

        df = get_test_csv(cwd, d)
        meta = read_json(cwd, m)
        expected_result = read_json(cwd, r)

        l = Linter(df, meta)
        l.check_enums()
        result = l.vlog["mychar"]["check_enums"].as_dict()
        self.assertDictEqual(result, expected_result)
    def test_data_types(self, d, m, r):
        df = get_test_csv(cwd, d)
        meta = read_json(cwd, m)

        l = Linter(df, meta)

        l.check_types()
        result = l.success()
        self.assertEqual(result, r)
    def test_validate_meta_data(self):
        meta = read_json(cwd, "meta/test_meta_cols_valid.json")

        #Data is irrelevant but cannot instantiate linter without it
        df = get_test_csv(cwd, "test_csv_data_valid")

        # Test no error is raised
        L = Linter(df, meta)


        # Test invalid metadata raises an error
        meta = read_json(cwd, "meta/test_invalid_meta_cols_missing_name.json")

        with self.assertRaises(ValidationError):
            L = Linter(df, meta)

        # Test invalid metadata raises an error
        meta = read_json(cwd, "meta/test_invalid_meta_columns_key_mispelt.json")

        with self.assertRaises(ValidationError):
            L = Linter(df, meta)
    def test_overall_success(self, d, m, r):
        df = get_test_csv(cwd, d)
        meta = read_json(cwd, m)

        l = Linter(df, meta)
        # Should not return success before tests run
        with self.assertRaises(Exception):
            l.success()

        l.check_all()
        result = l.success()
        self.assertEqual(result, r)
    def test_detailed_markdown(self, d, m):
        """
        This tests that a wide range of metadata/data combinations render correctly without erroring
        """

        df = get_test_csv(cwd, d)
        meta = read_json(cwd, m)

        l = Linter(df, meta)

        l.check_all()

        l.markdown_report()
Exemple #8
0
    def test_check_enums(self):

        df = pd.read_csv(os.path.join(cwd, "data/test_csv_data_valid.csv"),
                         parse_dates=["mydatetime", "mydate"])

        expected_result = read_json(
            cwd, "expected_results/test_result_generated_metadata.json")
        result = generate_from_pd_df(df)

        self.assertDictEqual(result, expected_result)

        with pkg_resources.resource_stream(
                "data_linter", "data/metadata_jsonschema.json") as io:
            schema = json.load(io)

        jsonschema.validate(result, schema)
    def test_metadata_correctly_imposed_on_valid_data(self):

        df = get_test_csv(cwd, "test_csv_data_valid")

        meta_data = read_json(cwd, "meta/test_meta_cols_valid.json")

        meta_cols = meta_data["columns"]

        self.assertFalse(
            _pd_df_datatypes_match_metadata_data_types(df, meta_cols))

        # We expect that, after impose_metadata_types_on_pd_df is run, the datatypes conform to the metadata
        df = impose_metadata_types_on_pd_df(df, meta_data)

        self.assertTrue(
            _pd_df_datatypes_match_metadata_data_types(df, meta_cols))
    def test_metadata_impose_works_on_non_strings(self):

        #What happens if we read ints and it expects strings?

        df = pd.read_parquet(os.path.join(cwd, "data", "test_parquet_data_valid.parquet"))

        meta_data = read_json(cwd, "meta/test_meta_cols_allstring.json")

        meta_cols = meta_data["columns"]

        self.assertFalse(
            _pd_df_datatypes_match_metadata_data_types(df, meta_cols))

        df = impose_metadata_types_on_pd_df(df, meta_data)

        self.assertTrue(
            _pd_df_datatypes_match_metadata_data_types(df, meta_cols))
    def test_metadata_impose_does_not_work_on_invalid_data(self):

        # What happens if we read in data that does NOT conform to the metadata
        df = get_test_csv(cwd, "test_csv_data_invalid_data")

        meta_data = read_json(cwd, "meta/test_meta_cols_valid.json")

        meta_cols = meta_data["columns"]

        self.assertFalse(
            _pd_df_datatypes_match_metadata_data_types(df, meta_cols))

        # We expect that, after impose_metadata_types_on_pd_df is run, the datatypes do NOT conform to the metadata
        df = impose_metadata_types_on_pd_df(df, meta_data)

        self.assertFalse(
            _pd_df_datatypes_match_metadata_data_types(df, meta_cols))
    def test_metadata_correctly_imposed_on_alreadytyped_date(self):

        # What happens if we read in an already typed database
        df = pd.read_parquet(os.path.join(cwd, "data", "test_parquet_data_valid.parquet"))

        meta_data = read_json(cwd, "meta/test_meta_cols_valid.json")

        meta_cols = meta_data["columns"]

        self.assertTrue(
            _pd_df_datatypes_match_metadata_data_types(df, meta_cols))

        # We expect that, after impose_metadata_types_on_pd_df is run, the datatypes conform to the metadata
        df = impose_metadata_types_on_pd_df(df, meta_data)

        self.assertTrue(
            _pd_df_datatypes_match_metadata_data_types(df, meta_cols))
    def test_data_types_ints(self):

        df = get_test_csv(cwd, "test_csv_data_ints")
        meta = read_json(cwd, "meta/test_meta_cols_ints.json")

        l = Linter(df, meta)

        l.check_types()

        actual = {
            k: v["check_data_type"]["success"]
            for k, v in l.vlog.as_dict().items()
        }

        expected = {
            'int_with_float': False,
            'int_with_long': False,
            'int_with_null': True,
            'int_without_null': True
        }

        self.assertDictEqual(actual, expected)