Esempio n. 1
0
def structure(data, format, fail_fast, row_limit, report_limit, output):
    """Run a Good Tables StructureProcessor."""

    processor = processors.StructureProcessor(format=format,
                                              fail_fast=fail_fast,
                                              row_limit=row_limit,
                                              report_limit=report_limit)

    valid, report, data = processor.run(data)

    valid_msg = 'Well done! The data is valid :)\n'.upper()
    invalid_msg = 'Oops.The data is invalid :(\n'.upper()

    if output == 'json':
        exclude = None
    else:
        exclude = [
            'result_context', 'processor', 'row_name', 'result_category',
            'column_index', 'column_name', 'result_level'
        ]

    if valid:
        click.echo(click.style(valid_msg, fg='green'))
    else:
        click.echo(click.style(invalid_msg, fg='red'))

    click.echo(report.generate(output, exclude=exclude))
Esempio n. 2
0
    def test_standalone_empty_rows_are_not_duplicatable(self):

        filepath = os.path.join(self.data_dir, 'empty_rows_multiple.csv')
        validator = processors.StructureProcessor(fail_fast=False)
        result, report, data = validator.run(filepath)

        self.assertEqual(len(report.generate()['results']), 11)
Esempio n. 3
0
    def test_standalone_hmt_bbsrc(self):

        data = os.path.join(self.data_dir, 'hmt', '1011-bbsrc-25k-spend-return.csv')
        validator = processors.StructureProcessor()
        result, report, data = validator.run(data, encoding=None)

        self.assertTrue(data)
Esempio n. 4
0
    def test_standalone_ignore_defective_rows_false(self):

        filepath = os.path.join(self.data_dir, 'defective_rows.csv')
        with io.open(filepath) as stream:
            validator = processors.StructureProcessor()
            result, report, data = validator.run(stream)

            self.assertFalse(result)
Esempio n. 5
0
    def test_standalone_row_limit_in_range(self):

        filepath = os.path.join(self.data_dir, 'row_limit_structure.csv')
        with io.open(filepath) as stream:
            validator = processors.StructureProcessor(row_limit=2)
            result, report, data = validator.run(stream)

            self.assertEqual(len(report.generate()['results']), 0)
Esempio n. 6
0
    def test_standalone_ignore_empty_rows_true(self):

        filepath = os.path.join(self.data_dir, 'empty_rows.csv')
        with io.open(filepath) as stream:
            validator = processors.StructureProcessor(ignore_empty_rows=True)
            result, report, data = validator.run(stream)

            self.assertTrue(result)
Esempio n. 7
0
    def test_standalone_custom_empty_strings(self):

        filepath = os.path.join(self.data_dir, 'empty_rows_custom.csv')
        with io.open(filepath) as stream:
            validator = processors.StructureProcessor(empty_strings=('-', ))
            result, report, data = validator.run(stream)

            self.assertFalse(result)
Esempio n. 8
0
    def test_standalone_fail_fast_false(self):

        filepath = os.path.join(self.data_dir,
                                'fail_fast_two_structure_errors.csv')
        with io.open(filepath) as stream:
            validator = processors.StructureProcessor()
            result, report, data = validator.run(stream)

            self.assertEqual(len(report.generate()['results']), 2)
Esempio n. 9
0
    def test_standalone_report_stream_none(self):

        filepath = os.path.join(self.data_dir, 'valid.csv')
        report_stream = None
        with io.open(filepath) as stream:
            validator = processors.StructureProcessor(
                report_stream=report_stream)
            result, report, data = validator.run(stream)

            self.assertTrue(result)
Esempio n. 10
0
    def test_standalone_report_stream_valid(self):

        filepath = os.path.join(self.data_dir, 'valid.csv')
        report_stream = io.TextIOWrapper(io.BufferedRandom(io.BytesIO()))
        with io.open(filepath) as stream:
            validator = processors.StructureProcessor(
                report_stream=report_stream)
            result, report, data = validator.run(stream)

            self.assertEqual(len(report.generate()['results']), 0)

            report_stream.seek(0)
            for line in report_stream:
                self.assertTrue(json.loads(line.rstrip('\n')))
Esempio n. 11
0
    def test_structure(self):
        # TODO: infer from data package format field (and default to csv)
        data_format = 'csv'
        processor = processors.StructureProcessor(format=data_format, fail_fast=False,
            row_limit=row_limit,
            report_limit=report_limit)

        data = dp.metadata['resources'][0]['path']
        valid, report, data = processor.run(data)

        output_format = 'txt'
        exclude = ['result_context', 'processor', 'row_name', 'result_category',
                                   'column_index', 'column_name', 'result_level']
        out = report.generate(output_format, exclude=exclude)

        self.assertTrue(valid, out)
    def test_structure(self):
        data_format = 'csv'
        processor = processors.StructureProcessor(format=data_format,
                                                  fail_fast=False,
                                                  row_limit=ROW_LIMIT,
                                                  report_limit=REPORT_LIMIT)

        data = dp.metadata['resources'][0]['path']
        valid, report, data = processor.run(data)

        output_format = 'txt'
        exclude = [
            'result_context', 'processor', 'row_name', 'result_category',
            'column_index', 'column_name', 'result_level'
        ]
        out = report.generate(output_format, exclude=exclude)

        self.assertTrue(valid, out)
Esempio n. 13
0
    def test_standalone_row_limit_out_range(self):

        limit = processors.StructureProcessor.ROW_LIMIT_MAX
        validator = processors.StructureProcessor(row_limit=(limit + 1))

        self.assertEqual(validator.row_limit, limit)
Esempio n. 14
0
from goodtables import processors

datafile = './data.csv'

processor = processors.StructureProcessor(format='csv')

valid, report, data = processor.run(datafile)

output_format = 'txt'

exclude = [
    'result_context', 'processor', 'row_name', 'result_category',
    'column_index', 'column_name', 'result_level'
]

out = report.generate(output_format, exclude=exclude)

print(out)