Exemple #1
0
    def test_register_processor_insert(self):

        pipeline = Pipeline(self.data_string)
        self.assertEqual(len(pipeline.pipeline), 1)

        pipeline.register_processor('schema', position=0)
        self.assertEqual(len(pipeline.pipeline), 2)
Exemple #2
0
    def test_register_processor_append(self):

        pipeline = Pipeline(self.data_string)
        self.assertEqual(len(pipeline.pipeline), 1)

        pipeline.register_processor('schema')
        self.assertEqual(len(pipeline.pipeline), 2)
    def test_register_processor_insert(self):

        pipeline = Pipeline(self.data_string)
        self.assertEqual(len(pipeline.pipeline), 1)

        pipeline.register_processor('schema', position=0)
        self.assertEqual(len(pipeline.pipeline), 2)
    def test_register_processor_append(self):

        pipeline = Pipeline(self.data_string)
        self.assertEqual(len(pipeline.pipeline), 1)

        pipeline.register_processor('schema')
        self.assertEqual(len(pipeline.pipeline), 2)
Exemple #5
0
    def _validate_data(self, raise_on_error):
        """Validate the package resources with GoodTables."""

        def summarize(feedback_, path_):
            intro = 'GoodTables has detected some errors in %s.' % path_
            hint = 'Please check out the full report: %s.' % REPORT_FILENAME

            info = feedback_['meta']
            summary = (
                'There are {bad_rows} (out of {total_rows}) bad rows '
                'and {bad_cols} (out of {total_cols}) bad columns. '
            ).format(
                bad_rows=info['bad_row_count'],
                total_rows=info['row_count'],
                bad_cols=info['bad_column_count'],
                total_cols=len(info['columns'])
            )

            log.debug(intro + summary)
            return [intro, summary, hint]

        for resource in self:
            schema = resource.descriptor['schema']
            path = resource.descriptor['path']
            filepath = join(self._base_path, path)

            pipeline = Pipeline(filepath, report_stream=StringIO())
            pipeline.register_processor('schema', options={'schema': schema})
            is_valid, report = pipeline.run()

            if is_valid:
                return []

            if raise_on_error:
                raise ValidationError('%s is invalid' % filepath)
            else:
                feedback = report.generate()
                with open(REPORT_FILENAME, 'w+') as json:
                    json.write(dumps(feedback, indent=4, ensure_ascii=False))
                return summarize(feedback, path)
def validate_data(data, schema):
    report_stream = open(SOURCE_REPORT, 'w+')
    pipeline = Pipeline(data, report_stream=report_stream)
    pipeline.register_processor('schema', options={'schema': schema})
    valid, report = pipeline.run()
    return valid, report