def test_register_processor_insert(self): pipeline = Pipeline(self.data_string) self.assertEqual(len(pipeline.pipeline), 1) pipeline.register_processor('schema', position=0) self.assertEqual(len(pipeline.pipeline), 2)
def test_register_processor_append(self): pipeline = Pipeline(self.data_string) self.assertEqual(len(pipeline.pipeline), 1) pipeline.register_processor('schema') self.assertEqual(len(pipeline.pipeline), 2)
def test_register_processor_insert(self): pipeline = Pipeline(self.data_string) self.assertEqual(len(pipeline.pipeline), 1) pipeline.register_processor('schema', position=0) self.assertEqual(len(pipeline.pipeline), 2)
def test_register_processor_append(self): pipeline = Pipeline(self.data_string) self.assertEqual(len(pipeline.pipeline), 1) pipeline.register_processor('schema') self.assertEqual(len(pipeline.pipeline), 2)
def _validate_data(self, raise_on_error): """Validate the package resources with GoodTables.""" def summarize(feedback_, path_): intro = 'GoodTables has detected some errors in %s.' % path_ hint = 'Please check out the full report: %s.' % REPORT_FILENAME info = feedback_['meta'] summary = ( 'There are {bad_rows} (out of {total_rows}) bad rows ' 'and {bad_cols} (out of {total_cols}) bad columns. ' ).format( bad_rows=info['bad_row_count'], total_rows=info['row_count'], bad_cols=info['bad_column_count'], total_cols=len(info['columns']) ) log.debug(intro + summary) return [intro, summary, hint] for resource in self: schema = resource.descriptor['schema'] path = resource.descriptor['path'] filepath = join(self._base_path, path) pipeline = Pipeline(filepath, report_stream=StringIO()) pipeline.register_processor('schema', options={'schema': schema}) is_valid, report = pipeline.run() if is_valid: return [] if raise_on_error: raise ValidationError('%s is invalid' % filepath) else: feedback = report.generate() with open(REPORT_FILENAME, 'w+') as json: json.write(dumps(feedback, indent=4, ensure_ascii=False)) return summarize(feedback, path)
def validate_data(data, schema): report_stream = open(SOURCE_REPORT, 'w+') pipeline = Pipeline(data, report_stream=report_stream) pipeline.register_processor('schema', options={'schema': schema}) valid, report = pipeline.run() return valid, report