def test_standalone_field_unique(self): filepath = os.path.join(self.data_dir, 'unique_field.csv') schema = os.path.join(self.data_dir, 'unique_field.json') validator = processors.SchemaProcessor(schema=schema) result, report, data = validator.run(filepath) self.assertEqual(len(report.generate()['results']), 1)
def test_standalone_case_insensitive_headers(self): filepath = os.path.join(self.data_dir, 'case_insensitive_headers.csv') schema = os.path.join(self.data_dir, 'test_schema.json') validator = processors.SchemaProcessor(schema=schema, case_insensitive_headers=True) result, report, data = validator.run(filepath) self.assertEqual(len(report.generate()['results']), 0)
def schema(data, schema, format, fail_fast, row_limit, report_limit, output): """Run a Good Tables SchemaProcessor.""" processor = processors.SchemaProcessor(schema=schema, format=format, fail_fast=fail_fast, row_limit=row_limit, report_limit=report_limit) valid, report, data = processor.run(data) if output == 'json': exclude = None else: exclude = [ 'result_context', 'processor', 'row_name', 'result_category', 'column_index', 'column_name', 'result_level' ] valid_msg = 'Well done! The data is valid :)\n'.upper() invalid_msg = 'Oops.The data is invalid :(\n'.upper() if valid: click.echo(click.style(valid_msg, fg='green')) else: click.echo(click.style(invalid_msg, fg='red')) click.echo(report.generate(output, exclude=exclude))
def test_standalone_info_result_for_required_false(self): filepath = os.path.join(self.data_dir, 'required_false.csv') schema = os.path.join(self.data_dir, 'required_false_schema.json') validator = processors.SchemaProcessor(schema=schema, result_level='info') result, report, data = validator.run(filepath) self.assertEqual(len(report.generate()['results']), 1)
def test_standalone_process_extra_fields(self): filepath = os.path.join(self.data_dir, 'contacts', 'people.csv') schema = os.path.join(self.data_dir, 'contacts', 'schema_incomplete_fields.json') validator = processors.SchemaProcessor(schema=schema, case_insensitive_headers=True, process_extra_fields=True, result_level='info') result, report, data = validator.run(filepath) reports = report.generate() self.assertTrue(any('schema_008' in res.values() for res in reports['results']))
def test_standalone_row_limit_in_range(self): filepath = os.path.join(self.data_dir, 'row_limit_schema.csv') schema = os.path.join(self.data_dir, 'test_schema.json') with io.open(filepath) as stream: validator = processors.SchemaProcessor(row_limit=2, schema=schema) result, report, data = validator.run(stream) self.assertEqual(len(report.generate()['results']), 0)
def test_standalone_fail_fast_false(self): filepath = os.path.join(self.data_dir, 'fail_fast_two_schema_errors.csv') schema = os.path.join(self.data_dir, 'test_schema.json') with io.open(filepath) as stream: validator = processors.SchemaProcessor(schema=schema) result, report, data = validator.run(stream) self.assertEqual(len(report.generate()['results']), 5)
def test_standalone_report_stream_none(self): filepath = os.path.join(self.data_dir, 'valid.csv') report_stream = None with io.open(filepath) as stream: validator = processors.SchemaProcessor( report_stream=report_stream) result, report, data = validator.run(stream) self.assertTrue(result)
def test_processor_run_error_when_data_html_error(self): data_source = 'https://www.google.com/' processor = processors.SchemaProcessor() result, report, data = processor.run(data_source=data_source) generated_report = report.generate() report_results = generated_report['results'] self.assertFalse(result) self.assertEqual(len(report_results), 1) self.assertEqual(report_results[0]['result_id'], 'data_html_error')
def test_processor_run_error_when_data_http_error(self): data_source = 'https://github.com/frictionlessdata/goodtables/blob/master/.travis.yaml' processor = processors.SchemaProcessor() result, report, data = processor.run(data_source) generated_report = report.generate() report_results = generated_report['results'] self.assertFalse(result) self.assertEqual(len(report_results), 1) self.assertEqual(report_results[0]['result_id'], 'http_404_error')
def test_standalone_ignore_field_order_false(self): data_filepath = os.path.join(self.data_dir, 'contacts', 'people.csv') schema_filepath = os.path.join(self.data_dir, 'contacts', 'schema_valid_not_field_order.json') with io.open(data_filepath) as data_stream, \ io.open(schema_filepath) as schema_stream: schema = json.load(schema_stream) validator = processors.SchemaProcessor(schema=schema, ignore_field_order=False) result, report, data = validator.run(data_stream) self.assertFalse(result)
def test_standalone_schema_valid_simple(self): data_filepath = os.path.join(self.data_dir, 'contacts', 'people.csv') schema_filepath = os.path.join(self.data_dir, 'contacts', 'schema_valid.json') with io.open(data_filepath) as data_stream, \ io.open(schema_filepath) as schema_stream: schema = json.load(schema_stream) validator = processors.SchemaProcessor(schema=schema) result, report, data = validator.run(data_stream) self.assertTrue(result)
def test_processor_run_error_when_wrong_encoding(self): data_source = os.path.join(self.data_dir, 'hmt','BIS_spending_over__25_000_July_2014.csv') encoding = 'UTF-8' # should be 'ISO-8859-2' processor = processors.SchemaProcessor() result, report, data = processor.run(data_source=data_source, encoding=encoding, decode_strategy=None) generated_report = report.generate() report_results = generated_report['results'] self.assertFalse(result) self.assertEqual(len(report_results), 1) self.assertEqual(report_results[0]['result_id'], 'data_decode_error')
def test_standalone_report_stream_valid(self): filepath = os.path.join(self.data_dir, 'valid.csv') report_stream = io.TextIOWrapper(io.BufferedRandom(io.BytesIO())) with io.open(filepath) as stream: validator = processors.SchemaProcessor( report_stream=report_stream) result, report, data = validator.run(stream) self.assertEqual(len(report.generate()['results']), 0) report_stream.seek(0) for line in report_stream: self.assertTrue(json.loads(line.rstrip('\n')))
def test_schema(self): data_format = 'csv' data = dp.metadata['resources'][0]['path'] schema = dp.metadata['resources'][0]['schema'] processor = processors.SchemaProcessor(schema=schema, format=data_format, row_limit=ROW_LIMIT, report_limit=REPORT_LIMIT) valid, report, data = processor.run(data) output_format = 'txt' exclude = [ 'result_context', 'processor', 'row_name', 'result_category', 'column_name', 'result_id', 'result_level' ] out = report.generate(output_format, exclude=exclude) self.assertTrue(valid, out)
def test_standalone_row_limit_out_range(self): limit = processors.SchemaProcessor.ROW_LIMIT_MAX validator = processors.SchemaProcessor(row_limit=(limit + 1)) self.assertEqual(validator.row_limit, limit)
def test_standalone_infer_schema(self): filepath = os.path.join(self.data_dir, 'valid.csv') validator = processors.SchemaProcessor(infer_schema=True) result, report, data = validator.run(filepath) self.assertEqual(len(report.generate()['results']), 0)
from goodtables import processors datafile = './data.csv' schemafile = './schema.json' processor = processors.SchemaProcessor(format='csv', schema=schemafile) valid, report, data = processor.run(datafile) output_format = 'txt' exclude = [ 'result_context', 'processor', 'row_name', 'result_category', 'column_index', 'column_name', 'result_level' ] out = report.generate(output_format, exclude=exclude) print(out)