def test_normalize_action_dataset(self): # prepare dataset_id = 1 dataset_name = 'example' dataset = { 'id': dataset_id, 'name': dataset_name, 'groups': [{'name': 'group1'}, {'name': 'group2'}], 'tags': [{'name': 'tag1'}, {'name': 'tag2'}], 'extras': [ {'key': 'temporal_granularity_factor', 'value': '1'}, {'key': 'anotherKey', 'value': 'anotherValue'} ] } # execute normalize_action_dataset(dataset) # verify extras_expected = { 'temporal_granularity_factor': 1, 'anotherKey': 'anotherValue' } self.assertDictEqual(dataset['extras'], extras_expected) self.assertListEqual(dataset['groups'], ['group1', 'group2']) self.assertListEqual(dataset['tags'], ['tag1', 'tag2'])
def validate_datasets(self, dataset, data): normalize_action_dataset(dataset) identifier = dataset['id'] portal = dataset['extras'].get('metadata_original_portal', 'null') data['broken_rules'][portal][identifier] = [] broken_rules = data['broken_rules'][portal][identifier] data['datasets_per_portal'][portal].add(identifier) errors = Draft3Validator(self.schema).iter_errors(dataset) if Draft3Validator(self.schema).is_valid(dataset): data['valid_datasets'] += 1 else: data['invalid_datasets'] += 1 errors = Draft3Validator(self.schema).iter_errors(dataset) for error in errors: path = [e for e in error.path if isinstance(e, basestring)] path = str(' -> '.join(map((lambda e: str(e)), path))) data['field_paths'][path] += 1 field_path_message = [path, error.message] broken_rules.append(field_path_message)
def test_groups_field(): groups = [{u'approval_status': u'approved', u'capacity': u'public', u'created': u'2012-11-01T07:04:45.306038', u'description': u'', u'id': u'9dafbe15-8fe8-4fae-be78-353aa28391ca', u'image_url': u'', u'name': u'bildung_wissenschaft', u'revision_id': u'73355b6b-20a3-42c9-820f-e399eb65a749', u'state': u'active', u'title': u'Bildung und Wissenschaft', u'type': u'group'}, {u'approval_status': u'approved', u'capacity': u'public', u'created': u'2012-11-01T07:04:45.729819', u'description': u'', u'id': u'0ee8f2f5-bb43-4744-a5f3-3b285cd1fa21', u'image_url': u'', u'name': u'geo', u'revision_id': u'd37ebf3d-86bb-4022-b762-7bcc2ebc8302', u'state': u'active', u'title': u'Geographie, Geologie und Geobasisdaten', u'type': u'group'}] dataset = {'groups': groups, 'tags': [], 'extras': []} normalize_action_dataset(dataset) assert dataset['groups'] == [u'bildung_wissenschaft', u'geo']
def validate_datasets(self, datasets, data): print 'Validate datasets' for i, dataset in enumerate(datasets): normalize_action_dataset(dataset) identifier = dataset['id'] portal = dataset['extras'].get('metadata_original_portal', 'null') portal = portal.replace('http://', '') portal = portal.replace('/', '') data['broken_rules'][portal][identifier] = [] broken_rules = data['broken_rules'][portal][identifier] data['datasets_per_portal'][portal].add(identifier) errors = Draft3Validator(self.schema).iter_errors(dataset) if Draft3Validator(self.schema).is_valid(dataset): data['valid_datasets'] += 1 else: data['invalid_datasets'] += 1 errors = Draft3Validator(self.schema).iter_errors(dataset) for error in errors: path = [e for e in error.path if isinstance(e, basestring)] path = str(' -> '.join(map((lambda e: str(e)), path))) data['field_paths'][path] += 1 field_path_message = [path, error.message] broken_rules.append(field_path_message)
def command(self): super(SchemaChecker, self)._load_config() context = self.create_context() data = {'field_paths': defaultdict(int), 'broken_rules': defaultdict(dict), 'datasets_per_portal': defaultdict(set), 'invalid_datasets': 0, 'valid_datasets': 0} if len(self.args) == 0: active_datasets = [] context = {'model': model, 'session': model.Session, 'ignore_auth': True} validator = schema_checker.SchemaChecker() num_datasets = 0 for i, dataset in enumerate(iterate_local_datasets(context)): print 'Processing dataset %s' % i normalize_action_dataset(dataset) validator.process_record(dataset) num_datasets += 1 active_datasets.append(dataset['id']) delete_deprecated_violations(active_datasets) general = {'num_datasets': num_datasets} validator.redis_client.set('general', general) elif len(self.args) == 2 and self.args[0] == 'specific': context = {'model': model, 'session': model.Session, 'ignore_auth': True} package_show = get_action('package_show') dataset_name = self.args[1] dataset = package_show(context, {'id': dataset_name}) print 'Processing dataset %s' % dataset normalize_action_dataset(dataset) validator = schema_checker.SchemaChecker() validator.process_record(dataset) elif len(self.args) == 2 and self.args[0] == 'remote': endpoint = self.args[1] ckan = ckanclient.CkanClient(base_location=endpoint) rows = 1000 total = self.get_dataset_count(ckan) steps = int(ceil(total / float(rows))) for i in range(0, steps): if i == steps - 1: rows = total - (i * rows) datasets = self.get_datasets(ckan, rows, i) self.validate_datasets(datasets, data) self.write_validation_result(self.render_template(data))
def test_extras_field(): extras = [{ u'id': u'd87c7de7-efeb-4736-be75-bc1be7c616c6', u'key': u'sector', u'package_id': u'1e7454dc-8ca0-444b-be6e-db8c3a41ff7f', u'revision_id': u'8e402ed7-89f9-4d50-8f4f-ee5f9a9ec02f', u'revision_timestamp': u'2013-05-10T14:56:33.088324', u'state': u'active', u'value': u'"oeffentlich"' }, { u'id': u'763d81b0-fdef-4498-bce7-73d69f619734', u'key': u'tag_sources', u'package_id': u'1e7454dc-8ca0-444b-be6e-db8c3a41ff7f', u'revision_id': u'8e402ed7-89f9-4d50-8f4f-ee5f9a9ec02f', u'revision_timestamp': u'2013-05-10T14:56:33.088324', u'state': u'active', u'value': u'[]' }] dataset = {'groups': [], 'tags': [], 'extras': extras} normalize_action_dataset(dataset) expectation = {u'sector': u'oeffentlich', u'tag_sources': []} print dataset['extras'] assert dataset['extras'] == expectation
def test_tags_field(): tags = [{ u'display_name': u'bauleitplan', u'id': u'8dfff9e2-ab24-4b98-9ef8-988dac9bf52a', u'name': u'bauleitplan', u'revision_timestamp': u'2013-05-10T14:56:33.088324', u'state': u'active', u'vocabulary_id': None }, { u'display_name': u'bebauungsplan', u'id': u'c3f452ab-d396-40b5-b19d-e4df069f82be', u'name': u'bebauungsplan', u'revision_timestamp': u'2013-05-10T14:56:33.088324', u'state': u'active', u'vocabulary_id': None }, { u'display_name': u'bplan', u'id': u'b20f44e0-d704-42eb-aa9a-e30fd5a11b37', u'name': u'bplan', u'revision_timestamp': u'2013-05-10T14:56:33.088324', u'state': u'active', u'vocabulary_id': None }] dataset = {'groups': [], 'tags': tags, 'extras': []} normalize_action_dataset(dataset) assert dataset['tags'] == [u'bauleitplan', u'bebauungsplan', u'bplan']
def test_groups_field(): groups = [{ u'approval_status': u'approved', u'capacity': u'public', u'created': u'2012-11-01T07:04:45.306038', u'description': u'', u'id': u'9dafbe15-8fe8-4fae-be78-353aa28391ca', u'image_url': u'', u'name': u'bildung_wissenschaft', u'revision_id': u'73355b6b-20a3-42c9-820f-e399eb65a749', u'state': u'active', u'title': u'Bildung und Wissenschaft', u'type': u'group' }, { u'approval_status': u'approved', u'capacity': u'public', u'created': u'2012-11-01T07:04:45.729819', u'description': u'', u'id': u'0ee8f2f5-bb43-4744-a5f3-3b285cd1fa21', u'image_url': u'', u'name': u'geo', u'revision_id': u'd37ebf3d-86bb-4022-b762-7bcc2ebc8302', u'state': u'active', u'title': u'Geographie, Geologie und Geobasisdaten', u'type': u'group' }] dataset = {'groups': groups, 'tags': [], 'extras': []} normalize_action_dataset(dataset) assert dataset['groups'] == [u'bildung_wissenschaft', u'geo']
def command(self): super(SchemaChecker, self)._load_config() context = self.create_context() data = { 'field_paths': defaultdict(int), 'broken_rules': defaultdict(dict), 'datasets_per_portal': defaultdict(set), 'invalid_datasets': 0, 'valid_datasets': 0 } if len(self.args) == 0: context = { 'model': model, 'session': model.Session, 'ignore_auth': True } validator = schema_checker.SchemaChecker() num_datasets = 0 for i, dataset in enumerate(iterate_local_datasets(context)): print 'Processing dataset %s' % i normalize_action_dataset(dataset) validator.process_record(dataset) num_datasets += 1 general = {'num_datasets': num_datasets} validator.redis_client.set('general', general) elif len(self.args) == 2 and self.args[0] == 'remote': endpoint = self.args[1] ckan = ckanclient.CkanClient(base_location=endpoint) rows = 1000 total = self.get_dataset_count(ckan) steps = int(ceil(total / float(rows))) for i in range(0, steps): if i == steps - 1: rows = total - (i * rows) datasets = self.get_datasets(ckan, rows, i) self.validate_datasets(datasets, data) self.write_validation_result(self.render_template(data))
def command(self): super(LinkChecker,self)._load_config() active_datasets = set() if len(self.args) == 0: context = {'model': model, 'session': model.Session, 'ignore_auth': True} validator = link_checker.LinkChecker() num_datasets = 0 for i, dataset in enumerate(iterate_local_datasets(context)): print 'Processing dataset %s with name: %s' % (i,dataset['name']) normalize_action_dataset(dataset) validator.process_record(dataset) num_datasets += 1 active_datasets.add(dataset['id']) self.delete_deprecated_datasets(active_dataset_ids) general = {'num_datasets': num_datasets} validator.redis_client.set('general', general) if len(self.args) > 0: subcommand = self.args[0] if subcommand == 'remote': self.check_remote_host(self.args[1]) elif subcommand == 'report': self.generate_report() elif len(self.args) == 2 and self.args[0] == 'specific': dataset_name = self.args[1] context = {'model': model, 'session': model.Session, 'ignore_auth': True} package_show = get_action('package_show') validator = link_checker.LinkChecker() dataset = package_show(context, {'id': dataset_name}) print 'Processing dataset %s' % dataset normalize_action_dataset(dataset) validator.process_record(dataset)
def test_extras_field(): extras = [{u'id': u'd87c7de7-efeb-4736-be75-bc1be7c616c6', u'key': u'sector', u'package_id': u'1e7454dc-8ca0-444b-be6e-db8c3a41ff7f', u'revision_id': u'8e402ed7-89f9-4d50-8f4f-ee5f9a9ec02f', u'revision_timestamp': u'2013-05-10T14:56:33.088324', u'state': u'active', u'value': u'"oeffentlich"'}, {u'id': u'763d81b0-fdef-4498-bce7-73d69f619734', u'key': u'tag_sources', u'package_id': u'1e7454dc-8ca0-444b-be6e-db8c3a41ff7f', u'revision_id': u'8e402ed7-89f9-4d50-8f4f-ee5f9a9ec02f', u'revision_timestamp': u'2013-05-10T14:56:33.088324', u'state': u'active', u'value': u'[]'}] dataset = {'groups': [], 'tags': [], 'extras': extras} normalize_action_dataset(dataset) expectation = {u'sector': u'oeffentlich', u'tag_sources': []} assert dataset['extras'] == expectation
def test_tags_field(): tags = [{u'display_name': u'bauleitplan', u'id': u'8dfff9e2-ab24-4b98-9ef8-988dac9bf52a', u'name': u'bauleitplan', u'revision_timestamp': u'2013-05-10T14:56:33.088324', u'state': u'active', u'vocabulary_id': None}, {u'display_name': u'bebauungsplan', u'id': u'c3f452ab-d396-40b5-b19d-e4df069f82be', u'name': u'bebauungsplan', u'revision_timestamp': u'2013-05-10T14:56:33.088324', u'state': u'active', u'vocabulary_id': None}, {u'display_name': u'bplan', u'id': u'b20f44e0-d704-42eb-aa9a-e30fd5a11b37', u'name': u'bplan', u'revision_timestamp': u'2013-05-10T14:56:33.088324', u'state': u'active', u'vocabulary_id': None}] dataset = {'groups': [], 'tags': tags, 'extras': []} normalize_action_dataset(dataset) assert dataset['tags'] == [u'bauleitplan', u'bebauungsplan', u'bplan']