def structure(data, format, fail_fast, row_limit, report_limit, output): """Run a Good Tables StructureProcessor.""" processor = processors.StructureProcessor(format=format, fail_fast=fail_fast, row_limit=row_limit, report_limit=report_limit) valid, report, data = processor.run(data) valid_msg = 'Well done! The data is valid :)\n'.upper() invalid_msg = 'Oops.The data is invalid :(\n'.upper() if output == 'json': exclude = None else: exclude = [ 'result_context', 'processor', 'row_name', 'result_category', 'column_index', 'column_name', 'result_level' ] if valid: click.echo(click.style(valid_msg, fg='green')) else: click.echo(click.style(invalid_msg, fg='red')) click.echo(report.generate(output, exclude=exclude))
def test_standalone_empty_rows_are_not_duplicatable(self): filepath = os.path.join(self.data_dir, 'empty_rows_multiple.csv') validator = processors.StructureProcessor(fail_fast=False) result, report, data = validator.run(filepath) self.assertEqual(len(report.generate()['results']), 11)
def test_standalone_hmt_bbsrc(self): data = os.path.join(self.data_dir, 'hmt', '1011-bbsrc-25k-spend-return.csv') validator = processors.StructureProcessor() result, report, data = validator.run(data, encoding=None) self.assertTrue(data)
def test_standalone_ignore_defective_rows_false(self): filepath = os.path.join(self.data_dir, 'defective_rows.csv') with io.open(filepath) as stream: validator = processors.StructureProcessor() result, report, data = validator.run(stream) self.assertFalse(result)
def test_standalone_row_limit_in_range(self): filepath = os.path.join(self.data_dir, 'row_limit_structure.csv') with io.open(filepath) as stream: validator = processors.StructureProcessor(row_limit=2) result, report, data = validator.run(stream) self.assertEqual(len(report.generate()['results']), 0)
def test_standalone_ignore_empty_rows_true(self): filepath = os.path.join(self.data_dir, 'empty_rows.csv') with io.open(filepath) as stream: validator = processors.StructureProcessor(ignore_empty_rows=True) result, report, data = validator.run(stream) self.assertTrue(result)
def test_standalone_custom_empty_strings(self): filepath = os.path.join(self.data_dir, 'empty_rows_custom.csv') with io.open(filepath) as stream: validator = processors.StructureProcessor(empty_strings=('-', )) result, report, data = validator.run(stream) self.assertFalse(result)
def test_standalone_fail_fast_false(self): filepath = os.path.join(self.data_dir, 'fail_fast_two_structure_errors.csv') with io.open(filepath) as stream: validator = processors.StructureProcessor() result, report, data = validator.run(stream) self.assertEqual(len(report.generate()['results']), 2)
def test_standalone_report_stream_none(self): filepath = os.path.join(self.data_dir, 'valid.csv') report_stream = None with io.open(filepath) as stream: validator = processors.StructureProcessor( report_stream=report_stream) result, report, data = validator.run(stream) self.assertTrue(result)
def test_standalone_report_stream_valid(self): filepath = os.path.join(self.data_dir, 'valid.csv') report_stream = io.TextIOWrapper(io.BufferedRandom(io.BytesIO())) with io.open(filepath) as stream: validator = processors.StructureProcessor( report_stream=report_stream) result, report, data = validator.run(stream) self.assertEqual(len(report.generate()['results']), 0) report_stream.seek(0) for line in report_stream: self.assertTrue(json.loads(line.rstrip('\n')))
def test_structure(self): # TODO: infer from data package format field (and default to csv) data_format = 'csv' processor = processors.StructureProcessor(format=data_format, fail_fast=False, row_limit=row_limit, report_limit=report_limit) data = dp.metadata['resources'][0]['path'] valid, report, data = processor.run(data) output_format = 'txt' exclude = ['result_context', 'processor', 'row_name', 'result_category', 'column_index', 'column_name', 'result_level'] out = report.generate(output_format, exclude=exclude) self.assertTrue(valid, out)
def test_structure(self): data_format = 'csv' processor = processors.StructureProcessor(format=data_format, fail_fast=False, row_limit=ROW_LIMIT, report_limit=REPORT_LIMIT) data = dp.metadata['resources'][0]['path'] valid, report, data = processor.run(data) output_format = 'txt' exclude = [ 'result_context', 'processor', 'row_name', 'result_category', 'column_index', 'column_name', 'result_level' ] out = report.generate(output_format, exclude=exclude) self.assertTrue(valid, out)
def test_standalone_row_limit_out_range(self): limit = processors.StructureProcessor.ROW_LIMIT_MAX validator = processors.StructureProcessor(row_limit=(limit + 1)) self.assertEqual(validator.row_limit, limit)
from goodtables import processors datafile = './data.csv' processor = processors.StructureProcessor(format='csv') valid, report, data = processor.run(datafile) output_format = 'txt' exclude = [ 'result_context', 'processor', 'row_name', 'result_category', 'column_index', 'column_name', 'result_level' ] out = report.generate(output_format, exclude=exclude) print(out)