コード例 #1
0
def commandline_check_bq(datasetname: str, tablename: str, rulesfile: str,
                         verbose: bool) -> None:
    '''Run checks on a BigQuery table.'''
    if verbose:
        print('Parsing rules file ... ', end='')
    try:
        ymld = dw.load_yaml_file_to_dict(rulesfile)
        checksuite = dw.BqTableCheckSuite(datasetname, tablename)
        dw.apply_yamldict_to_checksuite(ymld, checksuite)
    except FileNotFoundError:
        print(f'File {rulesfile} not found')
        sys.exit(4)
    except dw.YamlParsingError as e:
        print(e)
        sys.exit(5)
    except Exception as ex:
        print(f'Unexpected YAML parsing error:\n{ex}')
        sys.exit(6)
    if verbose:
        print('done.\nRunning checks ', end='')
    checksuite.runchecks(verbose=verbose)
    num_errs = len(checksuite.error_messages)
    if num_errs > 0:
        if verbose:
            print(f' done.\nChecks failed ({num_errs}):')
        for msg in checksuite.error_messages:
            print(msg)
        sys.exit(1)
    else:
        if verbose:
            print(' done.\nAll checks passed.')
コード例 #2
0
 def test_apply_yamldict_to_checksuite_valuechecks(self):
     checksuite = dw.PandasDatsetCheckSuite(self.df_file1)
     ymld = dw.load_yaml_file_to_dict('yamls/file1.yaml')
     dw.apply_yamldict_to_checksuite(ymld, checksuite)
     # Dataset checks
     self.assertFalse(checksuite.allow_duplicate_rows)
     self.assertTrue(checksuite.stop_on_fail)
     self.assertEqual(checksuite.row_count_max, 10)
     self.assertEqual(checksuite.row_count_min, 3)
     self.assertEqual(checksuite.row_count, 5)
     # Column checks
     self.assertEqual(len(checksuite.columns), 2)
     col1 = checksuite.columns[0]
     col2 = checksuite.columns[1]
     # col 1
     self.assertEqual(col1.name, 'A')
     self.assertEqual(col1.type, 'numeric')
     self.assertEqual(col1.allow_nulls, True)
     self.assertEqual(col1.min_val, 0)
     self.assertEqual(col1.max_val, 5)
     self.assertEqual(col1.count_distinct_max, 10)
     self.assertEqual(col1.count_distinct_min, 1)
     self.assertEqual(col1.count_distinct, 5)
     # col 2
     self.assertEqual(col2.name, 'C')
     self.assertEqual(col2.type, 'string')
     self.assertEqual(col2.allow_nulls, False)
     self.assertEqual(col1.count_distinct_max, 10)
     self.assertEqual(col1.count_distinct_min, 1)
     self.assertEqual(col1.count_distinct, 5)
コード例 #3
0
 def test_load_raw_yaml(self):
     self.assertRaises(FileNotFoundError,
                       dw.load_yaml_file_to_dict, 'xxxx')
     self.assertRaises(dw.YamlParsingError,
                       dw.load_yaml_file_to_dict, 'yamls/file2.yaml')
     result = dw.load_yaml_file_to_dict('yamls/file1.yaml')
     self.assertTrue(isinstance(result, dict))
コード例 #4
0
 def test_apply_yamldict_to_checksuite_keyerrors(self):
     checksuite = dw.PandasDatsetCheckSuite(self.df_file1)
     ymld = dw.load_yaml_file_to_dict(self.file3path)
     self.assertTrue(isinstance(ymld, dict))
     self.assertRaises(dw.YamlParsingError,
                       dw.apply_yamldict_to_checksuite,
                       ymld,
                       checksuite)
     ymld = dw.load_yaml_file_to_dict(self.file4path)
     self.assertRaises(dw.YamlParsingError,
                       dw.apply_yamldict_to_checksuite,
                       ymld,
                       checksuite)
     ymld = dw.load_yaml_file_to_dict(self.file5path)
     self.assertRaises(dw.YamlParsingError,
                       dw.apply_yamldict_to_checksuite,
                       ymld,
                       checksuite)
     ymld = dw.load_yaml_file_to_dict(self.file6path)
     self.assertRaises(dw.YamlParsingError,
                       dw.apply_yamldict_to_checksuite,
                       ymld,
                       checksuite)
     ymld = dw.load_yaml_file_to_dict(self.file7path)
     self.assertRaises(dw.YamlParsingError,
                       dw.apply_yamldict_to_checksuite,
                       ymld,
                       checksuite)
     # The following should not raise an error - test will
     # fail if it does
     ymld = dw.load_yaml_file_to_dict(self.file1path)
     dw.apply_yamldict_to_checksuite(ymld, checksuite)
コード例 #5
0
 def test_apply_yamldict_to_checksuite_valuechecks(self):
     checksuite = dw.PandasDatsetCheckSuite(self.df_file1)
     ymld = dw.load_yaml_file_to_dict(self.file1path)
     dw.apply_yamldict_to_checksuite(ymld, checksuite)
     # Dataset checks
     self.assertTrue(checksuite.allow_duplicate_rows)
     self.assertTrue(checksuite.stop_on_fail)
     self.assertEqual(checksuite.row_count_max, 10)
     self.assertEqual(checksuite.row_count_min, 3)
     self.assertEqual(checksuite.row_count, 5)
     # Column checks
     self.assertEqual(len(checksuite.columns), 4)
     col1 = checksuite.columns[0]
     col2 = checksuite.columns[1]
     col3 = checksuite.columns[2]
     col9 = checksuite.columns[3]
     # col 1
     self.assertEqual(col1.columnname, 'A')
     self.assertEqual(col1.type, 'numeric')
     self.assertEqual(col1.allow_duplicates, False)
     self.assertEqual(col1.allow_nulls, True)
     self.assertEqual(col1.allow_outliers, False)
     self.assertEqual(col1.min_val, 0)
     self.assertEqual(col1.max_val, 5)
     self.assertEqual(col1.count_distinct_max, 10)
     self.assertEqual(col1.count_distinct_min, 1)
     self.assertEqual(col1.count_distinct, 5)
     # col 2
     self.assertEqual(col2.columnname, 'C')
     self.assertEqual(col2.type, 'string')
     self.assertEqual(col2.allow_blanks, False)
     self.assertEqual(col2.allow_nulls, False)
     self.assertEqual(col2.count_distinct_max, 10)
     self.assertEqual(col2.count_distinct_min, 1)
     self.assertEqual(col2.count_distinct, 5)
     self.assertEqual(col2.regex_type, 'mandatory')
     self.assertEqual(col2.regex_rule, '[a-m]')
     # col 3
     self.assertEqual(col3.columnname, 'I')
     self.assertEqual(col3.type, 'numeric')
     self.assertEqual(col3.allow_duplicates, True)
     self.assertEqual(col3.allow_nulls, True)
     self.assertEqual(col3.val, 1)
     # col 9
     self.assertEqual(col9.columnname, 'J')
     self.assertEqual(col9.type, 'datetime')
     self.assertEqual(col9.allow_nulls, False)
     self.assertEqual(col9.dateformat, '%m/%d/%Y')