def test_check_column_exists_and_order(self, d, m, r): df = get_test_csv(cwd, d) meta = read_json(cwd, m) expected_result = read_json(cwd, r) l = Linter(df, meta) l.check_column_exists_and_order() result = l.vlog.as_dict() self.assertDictEqual(result, expected_result)
def test_data_types(self, d, m, r): df = get_test_csv(cwd, d) meta = read_json(cwd, m) l = Linter(df, meta) l.check_types() result = l.success() self.assertEqual(result, r)
def test_check_regex(self, d, m, r): df = get_test_csv(cwd, d) meta = read_json(cwd, m) expected_result = read_json(cwd, r) l = Linter(df, meta) l.check_pattern() result = l.vlog.as_dict() self.assertDictEqual(result, expected_result)
def test_check_enums(self, d, m, r): df = get_test_csv(cwd, d) meta = read_json(cwd, m) expected_result = read_json(cwd, r) l = Linter(df, meta) l.check_enums() result = l.vlog["mychar"]["check_enums"].as_dict() self.assertDictEqual(result, expected_result)
def test_overall_success(self, d, m, r): df = get_test_csv(cwd, d) meta = read_json(cwd, m) l = Linter(df, meta) # Should not return success before tests run with self.assertRaises(Exception): l.success() l.check_all() result = l.success() self.assertEqual(result, r)
def test_detailed_markdown(self, d, m): """ This tests that a wide range of metadata/data combinations render correctly without erroring """ df = get_test_csv(cwd, d) meta = read_json(cwd, m) l = Linter(df, meta) l.check_all() l.markdown_report()
def test_metadata_correctly_imposed_on_valid_data(self): df = get_test_csv(cwd, "test_csv_data_valid") meta_data = read_json(cwd, "meta/test_meta_cols_valid.json") meta_cols = meta_data["columns"] self.assertFalse( _pd_df_datatypes_match_metadata_data_types(df, meta_cols)) # We expect that, after impose_metadata_types_on_pd_df is run, the datatypes conform to the metadata df = impose_metadata_types_on_pd_df(df, meta_data) self.assertTrue( _pd_df_datatypes_match_metadata_data_types(df, meta_cols))
def test_metadata_impose_does_not_work_on_invalid_data(self): # What happens if we read in data that does NOT conform to the metadata df = get_test_csv(cwd, "test_csv_data_invalid_data") meta_data = read_json(cwd, "meta/test_meta_cols_valid.json") meta_cols = meta_data["columns"] self.assertFalse( _pd_df_datatypes_match_metadata_data_types(df, meta_cols)) # We expect that, after impose_metadata_types_on_pd_df is run, the datatypes do NOT conform to the metadata df = impose_metadata_types_on_pd_df(df, meta_data) self.assertFalse( _pd_df_datatypes_match_metadata_data_types(df, meta_cols))
def test_validate_meta_data(self): meta = read_json(cwd, "meta/test_meta_cols_valid.json") #Data is irrelevant but cannot instantiate linter without it df = get_test_csv(cwd, "test_csv_data_valid") # Test no error is raised L = Linter(df, meta) # Test invalid metadata raises an error meta = read_json(cwd, "meta/test_invalid_meta_cols_missing_name.json") with self.assertRaises(ValidationError): L = Linter(df, meta) # Test invalid metadata raises an error meta = read_json(cwd, "meta/test_invalid_meta_columns_key_mispelt.json") with self.assertRaises(ValidationError): L = Linter(df, meta)
def test_data_types_ints(self): df = get_test_csv(cwd, "test_csv_data_ints") meta = read_json(cwd, "meta/test_meta_cols_ints.json") l = Linter(df, meta) l.check_types() actual = { k: v["check_data_type"]["success"] for k, v in l.vlog.as_dict().items() } expected = { 'int_with_float': False, 'int_with_long': False, 'int_with_null': True, 'int_without_null': True } self.assertDictEqual(actual, expected)