def test_normalize_action_dataset(self):
        # prepare
        dataset_id = 1
        dataset_name = 'example'
        dataset = {
            'id': dataset_id,
            'name': dataset_name,
            'groups': [{'name': 'group1'}, {'name': 'group2'}],
            'tags': [{'name': 'tag1'}, {'name': 'tag2'}],
            'extras': [
                        {'key': 'temporal_granularity_factor', 'value': '1'},
                        {'key': 'anotherKey', 'value': 'anotherValue'}
                      ]
        }

        # execute
        normalize_action_dataset(dataset)

        # verify
        extras_expected = {
                    'temporal_granularity_factor': 1,
                    'anotherKey': 'anotherValue'
                }
        self.assertDictEqual(dataset['extras'], extras_expected)
        self.assertListEqual(dataset['groups'], ['group1', 'group2'])
        self.assertListEqual(dataset['tags'], ['tag1', 'tag2'])
    def validate_datasets(self, dataset, data):
        normalize_action_dataset(dataset)

        identifier = dataset['id']
        portal = dataset['extras'].get('metadata_original_portal', 'null')

        data['broken_rules'][portal][identifier] = []
        broken_rules = data['broken_rules'][portal][identifier]

        data['datasets_per_portal'][portal].add(identifier)
        errors = Draft3Validator(self.schema).iter_errors(dataset)

        if Draft3Validator(self.schema).is_valid(dataset):
            data['valid_datasets'] += 1
        else:
            data['invalid_datasets'] += 1
            errors = Draft3Validator(self.schema).iter_errors(dataset)

            for error in errors:
                path = [e for e in error.path if isinstance(e, basestring)]
                path = str(' -> '.join(map((lambda e: str(e)), path)))

                data['field_paths'][path] += 1
                field_path_message = [path, error.message]
                broken_rules.append(field_path_message)
def test_groups_field():
    groups = [{u'approval_status': u'approved',
               u'capacity': u'public',
               u'created': u'2012-11-01T07:04:45.306038',
               u'description': u'',
               u'id': u'9dafbe15-8fe8-4fae-be78-353aa28391ca',
               u'image_url': u'',
               u'name': u'bildung_wissenschaft',
               u'revision_id': u'73355b6b-20a3-42c9-820f-e399eb65a749',
               u'state': u'active',
               u'title': u'Bildung und Wissenschaft',
               u'type': u'group'},
              {u'approval_status': u'approved',
               u'capacity': u'public',
               u'created': u'2012-11-01T07:04:45.729819',
               u'description': u'',
               u'id': u'0ee8f2f5-bb43-4744-a5f3-3b285cd1fa21',
               u'image_url': u'',
               u'name': u'geo',
               u'revision_id': u'd37ebf3d-86bb-4022-b762-7bcc2ebc8302',
               u'state': u'active',
               u'title': u'Geographie, Geologie und Geobasisdaten',
               u'type': u'group'}]

    dataset = {'groups': groups, 'tags': [], 'extras': []}

    normalize_action_dataset(dataset)
    assert dataset['groups'] == [u'bildung_wissenschaft', u'geo']
Beispiel #4
0
    def validate_datasets(self, datasets, data):

        print 'Validate datasets'
        for i, dataset in enumerate(datasets):
            normalize_action_dataset(dataset)

            identifier = dataset['id']
            portal = dataset['extras'].get('metadata_original_portal', 'null')
            portal = portal.replace('http://', '')
            portal = portal.replace('/', '')

            data['broken_rules'][portal][identifier] = []
            broken_rules = data['broken_rules'][portal][identifier]

            data['datasets_per_portal'][portal].add(identifier)
            errors = Draft3Validator(self.schema).iter_errors(dataset)

            if Draft3Validator(self.schema).is_valid(dataset):
                data['valid_datasets'] += 1
            else:
                data['invalid_datasets'] += 1
                errors = Draft3Validator(self.schema).iter_errors(dataset)

                for error in errors:
                    path = [e for e in error.path if isinstance(e, basestring)]
                    path = str(' -> '.join(map((lambda e: str(e)), path)))

                    data['field_paths'][path] += 1
                    field_path_message = [path, error.message]
                    broken_rules.append(field_path_message)
    def command(self):
        super(SchemaChecker, self)._load_config()
        context = self.create_context()

        data = {'field_paths':         defaultdict(int),
                'broken_rules':        defaultdict(dict),
                'datasets_per_portal': defaultdict(set),
                'invalid_datasets':    0,
                'valid_datasets':      0}

        if len(self.args) == 0:
            active_datasets = []
            context = {'model':       model,
                       'session':     model.Session,
                       'ignore_auth': True}

            validator = schema_checker.SchemaChecker()

            num_datasets = 0
            for i, dataset in enumerate(iterate_local_datasets(context)):
                print 'Processing dataset %s' % i
                normalize_action_dataset(dataset)
                validator.process_record(dataset)
                num_datasets += 1
                active_datasets.append(dataset['id'])

            delete_deprecated_violations(active_datasets)
            general = {'num_datasets': num_datasets}
            validator.redis_client.set('general', general)

        elif len(self.args) == 2 and self.args[0] == 'specific':
            context = {'model':       model,
                       'session':     model.Session,
                       'ignore_auth': True}
            
            package_show = get_action('package_show')
            dataset_name = self.args[1]
            dataset = package_show(context, {'id': dataset_name})
                   
            print 'Processing dataset %s' % dataset
            normalize_action_dataset(dataset)
            validator = schema_checker.SchemaChecker()
            validator.process_record(dataset)

        elif len(self.args) == 2 and self.args[0] == 'remote':
            endpoint = self.args[1]
            ckan = ckanclient.CkanClient(base_location=endpoint)

            rows = 1000
            total = self.get_dataset_count(ckan)
            steps = int(ceil(total / float(rows)))

            for i in range(0, steps):
                if i == steps - 1:
                    rows = total - (i * rows)

                datasets = self.get_datasets(ckan, rows, i)
                self.validate_datasets(datasets, data)

            self.write_validation_result(self.render_template(data))
def test_extras_field():
    extras = [{
        u'id': u'd87c7de7-efeb-4736-be75-bc1be7c616c6',
        u'key': u'sector',
        u'package_id': u'1e7454dc-8ca0-444b-be6e-db8c3a41ff7f',
        u'revision_id': u'8e402ed7-89f9-4d50-8f4f-ee5f9a9ec02f',
        u'revision_timestamp': u'2013-05-10T14:56:33.088324',
        u'state': u'active',
        u'value': u'"oeffentlich"'
    }, {
        u'id': u'763d81b0-fdef-4498-bce7-73d69f619734',
        u'key': u'tag_sources',
        u'package_id': u'1e7454dc-8ca0-444b-be6e-db8c3a41ff7f',
        u'revision_id': u'8e402ed7-89f9-4d50-8f4f-ee5f9a9ec02f',
        u'revision_timestamp': u'2013-05-10T14:56:33.088324',
        u'state': u'active',
        u'value': u'[]'
    }]

    dataset = {'groups': [], 'tags': [], 'extras': extras}

    normalize_action_dataset(dataset)

    expectation = {u'sector': u'oeffentlich', u'tag_sources': []}
    print dataset['extras']
    assert dataset['extras'] == expectation
def test_tags_field():
    tags = [{
        u'display_name': u'bauleitplan',
        u'id': u'8dfff9e2-ab24-4b98-9ef8-988dac9bf52a',
        u'name': u'bauleitplan',
        u'revision_timestamp': u'2013-05-10T14:56:33.088324',
        u'state': u'active',
        u'vocabulary_id': None
    }, {
        u'display_name': u'bebauungsplan',
        u'id': u'c3f452ab-d396-40b5-b19d-e4df069f82be',
        u'name': u'bebauungsplan',
        u'revision_timestamp': u'2013-05-10T14:56:33.088324',
        u'state': u'active',
        u'vocabulary_id': None
    }, {
        u'display_name': u'bplan',
        u'id': u'b20f44e0-d704-42eb-aa9a-e30fd5a11b37',
        u'name': u'bplan',
        u'revision_timestamp': u'2013-05-10T14:56:33.088324',
        u'state': u'active',
        u'vocabulary_id': None
    }]

    dataset = {'groups': [], 'tags': tags, 'extras': []}

    normalize_action_dataset(dataset)
    assert dataset['tags'] == [u'bauleitplan', u'bebauungsplan', u'bplan']
def test_groups_field():
    groups = [{
        u'approval_status': u'approved',
        u'capacity': u'public',
        u'created': u'2012-11-01T07:04:45.306038',
        u'description': u'',
        u'id': u'9dafbe15-8fe8-4fae-be78-353aa28391ca',
        u'image_url': u'',
        u'name': u'bildung_wissenschaft',
        u'revision_id': u'73355b6b-20a3-42c9-820f-e399eb65a749',
        u'state': u'active',
        u'title': u'Bildung und Wissenschaft',
        u'type': u'group'
    }, {
        u'approval_status': u'approved',
        u'capacity': u'public',
        u'created': u'2012-11-01T07:04:45.729819',
        u'description': u'',
        u'id': u'0ee8f2f5-bb43-4744-a5f3-3b285cd1fa21',
        u'image_url': u'',
        u'name': u'geo',
        u'revision_id': u'd37ebf3d-86bb-4022-b762-7bcc2ebc8302',
        u'state': u'active',
        u'title': u'Geographie, Geologie und Geobasisdaten',
        u'type': u'group'
    }]

    dataset = {'groups': groups, 'tags': [], 'extras': []}

    normalize_action_dataset(dataset)
    assert dataset['groups'] == [u'bildung_wissenschaft', u'geo']
    def command(self):
        super(SchemaChecker, self)._load_config()
        context = self.create_context()

        data = {
            'field_paths': defaultdict(int),
            'broken_rules': defaultdict(dict),
            'datasets_per_portal': defaultdict(set),
            'invalid_datasets': 0,
            'valid_datasets': 0
        }

        if len(self.args) == 0:

            context = {
                'model': model,
                'session': model.Session,
                'ignore_auth': True
            }

            validator = schema_checker.SchemaChecker()

            num_datasets = 0
            for i, dataset in enumerate(iterate_local_datasets(context)):
                print 'Processing dataset %s' % i
                normalize_action_dataset(dataset)
                validator.process_record(dataset)
                num_datasets += 1

            general = {'num_datasets': num_datasets}
            validator.redis_client.set('general', general)

        elif len(self.args) == 2 and self.args[0] == 'remote':
            endpoint = self.args[1]
            ckan = ckanclient.CkanClient(base_location=endpoint)

            rows = 1000
            total = self.get_dataset_count(ckan)
            steps = int(ceil(total / float(rows)))

            for i in range(0, steps):
                if i == steps - 1:
                    rows = total - (i * rows)

                datasets = self.get_datasets(ckan, rows, i)
                self.validate_datasets(datasets, data)

            self.write_validation_result(self.render_template(data))
    def command(self):
        super(LinkChecker,self)._load_config()
        active_datasets = set()

        if len(self.args) == 0:

            context = {'model': model,
                       'session': model.Session,
                       'ignore_auth': True}

            validator = link_checker.LinkChecker()

            num_datasets = 0
            for i, dataset in enumerate(iterate_local_datasets(context)):
                print 'Processing dataset %s with name: %s' % (i,dataset['name'])
                normalize_action_dataset(dataset)
                validator.process_record(dataset)
                num_datasets += 1
                active_datasets.add(dataset['id'])

            self.delete_deprecated_datasets(active_dataset_ids)
            general = {'num_datasets': num_datasets}
            validator.redis_client.set('general', general)
        if len(self.args) > 0:
            subcommand = self.args[0]
            if subcommand == 'remote':
                self.check_remote_host(self.args[1])
            elif subcommand == 'report':
                self.generate_report()
            elif len(self.args) == 2 and self.args[0] == 'specific':
                dataset_name = self.args[1]

                context = {'model':       model,
                           'session':     model.Session,
                           'ignore_auth': True}

                package_show = get_action('package_show')
                validator = link_checker.LinkChecker()

                dataset = package_show(context, {'id': dataset_name})

                print 'Processing dataset %s' % dataset
                normalize_action_dataset(dataset)
                validator.process_record(dataset)
def test_extras_field():
    extras = [{u'id': u'd87c7de7-efeb-4736-be75-bc1be7c616c6',
               u'key': u'sector',
               u'package_id': u'1e7454dc-8ca0-444b-be6e-db8c3a41ff7f',
               u'revision_id': u'8e402ed7-89f9-4d50-8f4f-ee5f9a9ec02f',
               u'revision_timestamp': u'2013-05-10T14:56:33.088324',
               u'state': u'active',
               u'value': u'"oeffentlich"'},
              {u'id': u'763d81b0-fdef-4498-bce7-73d69f619734',
               u'key': u'tag_sources',
               u'package_id': u'1e7454dc-8ca0-444b-be6e-db8c3a41ff7f',
               u'revision_id': u'8e402ed7-89f9-4d50-8f4f-ee5f9a9ec02f',
               u'revision_timestamp': u'2013-05-10T14:56:33.088324',
               u'state': u'active',
               u'value': u'[]'}]

    dataset = {'groups': [], 'tags': [], 'extras': extras}

    normalize_action_dataset(dataset)

    expectation = {u'sector': u'oeffentlich', u'tag_sources': []}
    assert dataset['extras'] == expectation
def test_tags_field():
    tags = [{u'display_name': u'bauleitplan',
             u'id': u'8dfff9e2-ab24-4b98-9ef8-988dac9bf52a',
             u'name': u'bauleitplan',
             u'revision_timestamp': u'2013-05-10T14:56:33.088324',
             u'state': u'active',
             u'vocabulary_id': None},
            {u'display_name': u'bebauungsplan',
             u'id': u'c3f452ab-d396-40b5-b19d-e4df069f82be',
             u'name': u'bebauungsplan',
             u'revision_timestamp': u'2013-05-10T14:56:33.088324',
             u'state': u'active',
             u'vocabulary_id': None},
            {u'display_name': u'bplan',
             u'id': u'b20f44e0-d704-42eb-aa9a-e30fd5a11b37',
             u'name': u'bplan',
             u'revision_timestamp': u'2013-05-10T14:56:33.088324',
             u'state': u'active',
             u'vocabulary_id': None}]

    dataset = {'groups': [], 'tags': tags, 'extras': []}

    normalize_action_dataset(dataset)
    assert dataset['tags'] == [u'bauleitplan', u'bebauungsplan', u'bplan']