Python Pipelineの例、goodtables.pipeline.Pipeline Pythonの例

コード例 #1

0

ファイルを表示

    def test_pipeline_ignore_headerless_columns_false(self):

        filepath = os.path.join(self.data_dir, 'headerless_columns.csv')
        validator = Pipeline(filepath, processors=('structure', ))
        result, report = validator.run()

        self.assertFalse(result)

コード例 #2

0

ファイルを表示

ファイル: test_pipeline.py プロジェクト: ejdickerson/goodtables

    def test_register_processor_append(self):

        pipeline = Pipeline(self.data_string)
        self.assertEqual(len(pipeline.pipeline), 1)

        pipeline.register_processor('schema')
        self.assertEqual(len(pipeline.pipeline), 2)

コード例 #3

0

ファイルを表示

ファイル: test_schema.py プロジェクト: harryi3t/goodtables

    def test_pipeline_infer_schema(self):
        filepath = os.path.join(self.data_dir, 'valid.csv')
        options = {'schema': {'infer_schema': True}}
        validator = Pipeline(filepath, processors=('schema',), options=options)
        result, report = validator.run()

        self.assertEqual(len(report.generate()['results']), 0)

コード例 #4

0

ファイルを表示

ファイル: test_jungle.py プロジェクト: ethanwhite/goodtables

    def test_multilingual_xlsx(self):

        data = os.path.join(self.data_dir, 'jungle', 'multilingual.xlsx')
        pipeline = Pipeline(data, format='excel')
        result, report = pipeline.run()

        self.assertTrue(pipeline.data)

コード例 #5

0

ファイルを表示

ファイル: test_jungle.py プロジェクト: ethanwhite/goodtables

    def test_messytables_source_six(self):

        data = os.path.join(self.data_dir, 'jungle', 'messytables-excel_properties.xls')
        pipeline = Pipeline(data, format='excel')
        result, report = pipeline.run()

        self.assertTrue(pipeline.data)

コード例 #6

0

ファイルを表示

ファイル: test_jungle.py プロジェクト: ethanwhite/goodtables

    def test_messytables_source_five(self):

        data = 'https://raw.githubusercontent.com/okfn/messytables/master/horror/characters.csv'
        pipeline = Pipeline(data)
        result, report = pipeline.run()

        self.assertTrue(pipeline.data)

コード例 #7

0

ファイルを表示

ファイル: test_pipeline.py プロジェクト: trickvi/goodtables

    def test_register_processor_append(self):

        pipeline = Pipeline(self.data_string)
        self.assertEqual(len(pipeline.pipeline), 1)

        pipeline.register_processor('schema')
        self.assertEqual(len(pipeline.pipeline), 2)

コード例 #8

0

ファイルを表示

ファイル: test_structure.py プロジェクト: akariv/goodtables

    def test_pipeline_ignore_defective_rows_false(self):

        filepath = os.path.join(self.data_dir, 'defective_rows.csv')
        validator = Pipeline(filepath, processors=('structure',))
        result, report = validator.run()

        self.assertFalse(result)

コード例 #9

0

ファイルを表示

ファイル: test_pipeline.py プロジェクト: cspencer51/goodtables

    def test_rm_workspace(self):

        pipeline = Pipeline(self.data_string, dry_run=False)
        self.assertTrue(pipeline.workspace)
        pipeline.rm_workspace()

        self.assertFalse(os.path.exists(pipeline.workspace))

コード例 #10

0

ファイルを表示

ファイル: test_pipeline.py プロジェクト: trickvi/goodtables

    def test_register_processor_insert(self):

        pipeline = Pipeline(self.data_string)
        self.assertEqual(len(pipeline.pipeline), 1)

        pipeline.register_processor('schema', position=0)
        self.assertEqual(len(pipeline.pipeline), 2)

コード例 #11

0

ファイルを表示

ファイル: test_structure.py プロジェクト: akariv/goodtables

    def test_pipeline_ignore_headerless_columns_false(self):

        filepath = os.path.join(self.data_dir, 'headerless_columns.csv')
        validator = Pipeline(filepath, processors=('structure',))
        result, report = validator.run()

        self.assertFalse(result)

コード例 #12

0

ファイルを表示

    def test_pipeline_ignore_duplicate_rows_false(self):

        filepath = os.path.join(self.data_dir, 'duplicate_rows.csv')
        validator = Pipeline(filepath, processors=('structure', ))
        result, report = validator.run()

        self.assertFalse(result)

コード例 #13

0

ファイルを表示

ファイル: test_jungle.py プロジェクト: ethanwhite/goodtables

    def test_messytables_source_two(self):

        data = 'https://raw.githubusercontent.com/okfn/messytables/master/horror/utf-16le_encoded.csv'
        pipeline = Pipeline(data)
        result, report = pipeline.run()

        self.assertTrue(pipeline.data)

コード例 #14

0

ファイルを表示

ファイル: test_jungle.py プロジェクト: ethanwhite/goodtables

    def test_messytables_source_three(self):

        data = 'https://raw.githubusercontent.com/okfn/messytables/master/horror/sparse_with_column_errors.csv'
        pipeline = Pipeline(data)
        result, report = pipeline.run()

        self.assertTrue(pipeline.data)

コード例 #15

0

ファイルを表示

ファイル: test_pipeline.py プロジェクト: ejdickerson/goodtables

    def test_register_processor_insert(self):

        pipeline = Pipeline(self.data_string)
        self.assertEqual(len(pipeline.pipeline), 1)

        pipeline.register_processor('schema', position=0)
        self.assertEqual(len(pipeline.pipeline), 2)

コード例 #16

0

ファイルを表示

    def test_pipeline_infer_schema(self):
        filepath = os.path.join(self.data_dir, 'valid.csv')
        options = {'schema': {'infer_schema': True}}
        validator = Pipeline(filepath, processors=('schema',), options=options)
        result, report = validator.run()

        self.assertEqual(len(report.generate()['results']), 0)

コード例 #17

0

ファイルを表示

ファイル: test_structure.py プロジェクト: akariv/goodtables

    def test_pipeline_empty_rows_are_not_duplicatable(self):

        filepath = os.path.join(self.data_dir, 'empty_rows_multiple.csv')
        validator = Pipeline(filepath, processors=('structure',), fail_fast=False)
        result, report = validator.run()

        self.assertEqual(len(report.generate()['results']), 11)

コード例 #18

0

ファイルを表示

    def test_pipeline_case_insensitive_headers(self):
        filepath = os.path.join(self.data_dir, 'case_insensitive_headers.csv')
        schema = os.path.join(self.data_dir, 'test_schema.json')
        options = {'schema': {'schema': schema, 'case_insensitive_headers': True}}
        validator = Pipeline(filepath, processors=('schema',), options=options)
        result, report = validator.run()

        self.assertEqual(len(report.generate()['results']), 0)

コード例 #19

0

ファイルを表示

ファイル: test_jungle.py プロジェクト: ethanwhite/goodtables

    def test_gla_source_clean(self):

        data = 'https://raw.githubusercontent.com/rgrp/dataset-gla/master/data/all.csv'
        pipeline = Pipeline(data)
        result, report = pipeline.run()

        self.assertTrue(result)
        self.assertTrue(pipeline.data)

コード例 #20

0

ファイルを表示

ファイル: test_jungle.py プロジェクト: ethanwhite/goodtables

    def test_pipeline_hmt_bbsrc(self):

        data = os.path.join(self.data_dir, 'hmt', '1011-bbsrc-25k-spend-return.csv')
        encoding = 'ISO-8859-2'
        pipeline = Pipeline(data, encoding=encoding)
        result, report = pipeline.run()

        self.assertTrue(pipeline.data)

コード例 #21

0

ファイルを表示

ファイル: test_jungle.py プロジェクト: ethanwhite/goodtables

    def test_gla_source_three(self):

        data = os.path.join(self.data_dir, 'jungle', 'gla-250-report-2014-15-P08.csv')
        pipeline = Pipeline(data)
        result, report = pipeline.run()

        self.assertFalse(result)
        self.assertTrue(pipeline.data)

コード例 #22

0

ファイルを表示

ファイル: test_schema.py プロジェクト: harryi3t/goodtables

    def test_pipeline_field_unique(self):
        filepath = os.path.join(self.data_dir, 'unique_field.csv')
        schema = os.path.join(self.data_dir, 'unique_field.json')
        options = {'schema': {'schema': schema}}
        validator = Pipeline(filepath, processors=('schema',), options=options)
        result, report = validator.run()

        self.assertEqual(len(report.generate()['results']), 1)

コード例 #23

0

ファイルを表示

ファイル: test_pipeline.py プロジェクト: ejdickerson/goodtables

    def test_header_index_invalid(self):

        filepath = os.path.join(self.data_dir, 'invalid_header_index_1.csv')
        options = {}
        validator = Pipeline(filepath, options=options, header_index=1)
        result, report = validator.run()

        self.assertFalse(result)

コード例 #24

0

ファイルを表示

ファイル: test_jungle.py プロジェクト: ethanwhite/goodtables

    def test_gla_source_five(self):

        data = os.path.join(self.data_dir, 'jungle', 'gla-2012-13-P10-250.csv')
        pipeline = Pipeline(data)
        result, report = pipeline.run()

        self.assertFalse(result)
        self.assertTrue(pipeline.data)

コード例 #25

0

ファイルを表示

ファイル: test_jungle.py プロジェクト: ethanwhite/goodtables

    def test_gla_source_six(self):

        data = os.path.join(self.data_dir, 'jungle', 'gla-december_2009.csv')
        pipeline = Pipeline(data)
        result, report = pipeline.run()

        self.assertFalse(result)
        self.assertTrue(pipeline.data)

コード例 #26

0

ファイルを表示

    def test_pipeline_field_unique(self):
        filepath = os.path.join(self.data_dir, 'unique_field.csv')
        schema = os.path.join(self.data_dir, 'unique_field.json')
        options = {'schema': {'schema': schema}}
        validator = Pipeline(filepath, processors=('schema',), options=options)
        result, report = validator.run()

        self.assertEqual(len(report.generate()['results']), 1)

コード例 #27

0

ファイルを表示

ファイル: test_schema.py プロジェクト: harryi3t/goodtables

    def test_pipeline_case_insensitive_headers(self):
        filepath = os.path.join(self.data_dir, 'case_insensitive_headers.csv')
        schema = os.path.join(self.data_dir, 'test_schema.json')
        options = {'schema': {'schema': schema, 'case_insensitive_headers': True}}
        validator = Pipeline(filepath, processors=('schema',), options=options)
        result, report = validator.run()

        self.assertEqual(len(report.generate()['results']), 0)

コード例 #28

0

ファイルを表示

ファイル: test_schema.py プロジェクト: harryi3t/goodtables

    def test_pipeline_info_result_for_required_false(self):
        filepath = os.path.join(self.data_dir, 'required_false.csv')
        schema = os.path.join(self.data_dir, 'required_false_schema.json')
        options = {'schema': {'schema': schema, 'result_level': 'info'}}
        validator = Pipeline(filepath, processors=('schema',), options=options)
        result, report = validator.run()

        self.assertEqual(len(report.generate()['results']), 1)

コード例 #29

0

ファイルを表示

    def test_pipeline_info_result_for_required_false(self):
        filepath = os.path.join(self.data_dir, 'required_false.csv')
        schema = os.path.join(self.data_dir, 'required_false_schema.json')
        options = {'schema': {'schema': schema, 'result_level': 'info'}}
        validator = Pipeline(filepath, processors=('schema',), options=options)
        result, report = validator.run()

        self.assertEqual(len(report.generate()['results']), 1)

コード例 #30

0

ファイルを表示

ファイル: test_pipeline.py プロジェクト: trickvi/goodtables

    def test_header_index_invalid(self):

        filepath = os.path.join(self.data_dir, 'invalid_header_index_1.csv')
        options = {}
        validator = Pipeline(filepath, options=options, header_index=1)
        result, report = validator.run()

        self.assertFalse(result)

コード例 #31

0

ファイルを表示

ファイル: test_structure.py プロジェクト: akariv/goodtables

    def test_pipeline_row_limit_in_range(self):

        filepath = os.path.join(self.data_dir, 'row_limit_structure.csv')
        options = {}
        validator = Pipeline(filepath, processors=('structure',),
                             row_limit=2, options=options)
        result, report = validator.run()

        self.assertEqual(len(report.generate()['results']), 0)

コード例 #32

0

ファイルを表示

    def test__report_limit_in_range(self):

        filepath = os.path.join(self.data_dir, 'report_limit_structure.csv')
        options = {}
        validator = Pipeline(filepath, processors=('structure',),
                             report_limit=1, options=options)
        result, report = validator.run()

        self.assertEqual(len([r for r in report.generate()['results'] if r['processor'] == 'structure']), 1)

コード例 #33

0

ファイルを表示

    def test_pipeline_report_stream_none(self):
        filepath = os.path.join(self.data_dir, 'valid.csv')
        report_stream = None
        options = {}
        validator = Pipeline(filepath, processors=('schema',),
                             report_stream=report_stream, options=options)
        result, report = validator.run()

        self.assertTrue(result)

コード例 #34

0

ファイルを表示

ファイル: test_jungle.py プロジェクト: ethanwhite/goodtables

    def test_hmt_three(self):
        data = 'https://www.gov.uk/government/uploads/system/uploads/attachment_data/file/407609/Publishable_December_2014_Spend.csv'
        schema = os.path.join(self.data_dir, 'hmt', 'spend-publishing-schema.json')
        options = {'schema': {'schema': schema}}
        pipeline = Pipeline(data, processors=('structure', 'schema'),
                            options=options)
        result, report = pipeline.run()

        self.assertTrue(pipeline.data)

コード例 #35

0

ファイルを表示

ファイル: test_structure.py プロジェクト: akariv/goodtables

    def test_pipeline_custom_empty_strings(self):

        filepath = os.path.join(self.data_dir, 'empty_rows_custom.csv')
        options = {'structure': {'empty_strings': ('-',)}}
        validator = Pipeline(filepath, processors=('structure',),
                             options=options)
        result, report = validator.run()

        self.assertFalse(result)

コード例 #36

0

ファイルを表示

    def test_pipeline_empty_rows_are_not_duplicatable(self):

        filepath = os.path.join(self.data_dir, 'empty_rows_multiple.csv')
        validator = Pipeline(filepath,
                             processors=('structure', ),
                             fail_fast=False)
        result, report = validator.run()

        self.assertEqual(len(report.generate()['results']), 11)

コード例 #37

0

ファイルを表示

ファイル: test_structure.py プロジェクト: akariv/goodtables

    def test_pipeline_fail_fast_false(self):

        filepath = os.path.join(self.data_dir, 'fail_fast_two_structure_errors.csv')
        options = {}
        validator = Pipeline(filepath, processors=('structure',),
                             options=options)
        result, report = validator.run()

        self.assertEqual(len(report.generate()['results']), 2)

コード例 #38

0

ファイルを表示

ファイル: test_structure.py プロジェクト: akariv/goodtables

    def test_pipeline_ignore_duplicate_rows_true(self):

        filepath = os.path.join(self.data_dir, 'duplicate_rows.csv')
        options = {'structure': {'ignore_duplicate_rows': True}}
        validator = Pipeline(filepath, processors=('structure',),
                             options=options)
        result, report = validator.run()

        self.assertTrue(result)

コード例 #39

0

ファイルを表示

ファイル: test_pipeline.py プロジェクト: cspencer51/goodtables

    def test_create_file(self):

        filepath = 'example.file'
        headers = ['first', 'second', 'three']
        row = '1,2,3\n'
        pipeline = Pipeline(self.data_string, dry_run=False)
        pipeline.create_file(row, filepath, headers=headers)

        self.assertTrue(os.path.exists(os.path.join(pipeline.workspace, filepath)))

コード例 #40

0

ファイルを表示

ファイル: test_schema.py プロジェクト: harryi3t/goodtables

    def test_pipeline_report_stream_none(self):
        filepath = os.path.join(self.data_dir, 'valid.csv')
        report_stream = None
        options = {}
        validator = Pipeline(filepath, processors=('schema',),
                             report_stream=report_stream, options=options)
        result, report = validator.run()

        self.assertTrue(result)

コード例 #41

0

ファイルを表示

ファイル: test_pipeline.py プロジェクト: trickvi/goodtables

    def test__report_limit_in_range(self):

        filepath = os.path.join(self.data_dir, 'report_limit_structure.csv')
        options = {}
        validator = Pipeline(filepath, processors=('structure',),
                             report_limit=1, options=options)
        result, report = validator.run()

        self.assertEqual(len([r for r in report.generate()['results'] if r['processor'] == 'structure']), 1)

コード例 #42

0

ファイルを表示

ファイル: test_pipeline.py プロジェクト: ejdickerson/goodtables

    def test_report_summary(self):

        filepath = os.path.join(self.data_dir, 'invalid_header_index_1.csv')
        options = {}
        validator = Pipeline(filepath, options=options, header_index=1)
        result, report = validator.run()
        generated = report.generate()

        self.assertEqual(generated['meta']['bad_row_count'], 1)
        self.assertEqual(generated['meta']['row_count'], 9)

コード例 #43

0

ファイルを表示

    def test_report_results_grouped_by_rows(self):

        filepath = os.path.join(self.data_dir, 'fail_fast_two_schema_errors.csv')
        schema = os.path.join(self.data_dir, 'test_schema.json')
        options = {'schema': {'schema': schema}}
        validator = Pipeline(filepath, processors=('schema',), options=options,
                             fail_fast=True, report_type='grouped')
        result, report = validator.run()
        generated = report.generate()
        self.assertEqual(1, len(generated['results']))

コード例 #44

0

ファイルを表示

ファイル: test_jungle.py プロジェクト: ethanwhite/goodtables

    def test_hmt_bis_two(self):
        # excel
        data = os.path.join(self.data_dir, 'hmt', 'BIS_monthly_spend_December_2012.xls')
        schema = os.path.join(self.data_dir, 'hmt', 'bis-modified.json')
        options = {'schema': {'schema': schema}}
        pipeline = Pipeline(data, processors=('structure', 'schema'),
                            options=options, format='excel')
        result, report = pipeline.run()

        self.assertTrue(pipeline.data)

コード例 #45

0

ファイルを表示

    def test_pipeline_custom_empty_strings(self):

        filepath = os.path.join(self.data_dir, 'empty_rows_custom.csv')
        options = {'structure': {'empty_strings': ('-', )}}
        validator = Pipeline(filepath,
                             processors=('structure', ),
                             options=options)
        result, report = validator.run()

        self.assertFalse(result)

コード例 #46

0

ファイルを表示

ファイル: test_schema.py プロジェクト: harryi3t/goodtables

    def test_pipeline_fail_fast_false(self):

        filepath = os.path.join(self.data_dir, 'fail_fast_two_schema_errors.csv')
        schema = os.path.join(self.data_dir, 'test_schema.json')
        options = {'schema': {'schema': schema}}
        validator = Pipeline(filepath, processors=('schema',),
                             options=options)
        result, report = validator.run()

        self.assertEqual(len(report.generate()['results']), 7)

コード例 #47

0

ファイルを表示

    def test_pipeline_ignore_duplicate_rows_true(self):

        filepath = os.path.join(self.data_dir, 'duplicate_rows.csv')
        options = {'structure': {'ignore_duplicate_rows': True}}
        validator = Pipeline(filepath,
                             processors=('structure', ),
                             options=options)
        result, report = validator.run()

        self.assertTrue(result)

コード例 #48

0

ファイルを表示

    def test_pipeline_fail_fast_false(self):

        filepath = os.path.join(self.data_dir, 'fail_fast_two_schema_errors.csv')
        schema = os.path.join(self.data_dir, 'test_schema.json')
        options = {'schema': {'schema': schema}}
        validator = Pipeline(filepath, processors=('schema',),
                             options=options)
        result, report = validator.run()

        self.assertEqual(len(report.generate()['results']), 5)

コード例 #49

0

ファイルを表示

    def test_pipeline_row_limit_in_range(self):

        filepath = os.path.join(self.data_dir, 'row_limit_structure.csv')
        options = {}
        validator = Pipeline(filepath,
                             processors=('structure', ),
                             row_limit=2,
                             options=options)
        result, report = validator.run()

        self.assertEqual(len(report.generate()['results']), 0)