Beispiel #1
0
    def import_schemas(self, schema_filepath):
        from ckan import model
        from ckanext.dgu.model.schema_codelist import Schema

        # Load file with schemas
        schema_dicts = []
        with open(schema_filepath) as f:
            for line in f.readlines():
                if not line.strip():
                    continue
                schema_dict = json.loads(line)
                schema_dicts.append(schema_dict)

        # Create/update in the db
        for schema in schema_dicts:
            if 'id' in schema:
                existing_schema = Schema.get(schema['id'])
            else:
                existing_schema = Schema.by_title(schema['title'])
            if existing_schema:
                schema['id'] = existing_schema.id
                for k, v in schema.items():
                    setattr(existing_schema, k, v)
                schema_obj = existing_schema
            else:
                schema_obj = Schema(**schema)
                model.Session.add(schema_obj)
            model.Session.commit()
            # Print JSONL with ids, in case you want to save with IDs
            print json.dumps(schema_obj.as_dict())
        model.Session.remove()
Beispiel #2
0
import codecs
import os
from collections import namedtuple
import json

from running_stats import Stats


dataset_name_corrections = {
    'waverley-public-conveniences': 'waverley-borough-council-public-conveniences',
    'public-conveniences': 'public-conveniences2',
    }

Schema = namedtuple('Schema', ('lga_name', 'dgu_schema_name', 'search_for'))
all_schemas = [
    Schema(lga_name='Toilets', dgu_schema_name='Public Toilets (for LGTC by LGA)',
           search_for=['toilet', 'public_toilets', 'public conveniences']),
    Schema(lga_name='Premises', dgu_schema_name='Premises Licences (for LGTC by LGA)',
           search_for=['premises licence', 'premises license', 'premises licensing', 'licensed premises', 'premiseslicences']),
    Schema(lga_name='Planning', dgu_schema_name='Planning Applications (for LGTC by LGA)',
           search_for=['planning applications']),
    Schema(lga_name='', dgu_schema_name=u'Spend over \xa3500 by local authority (Expenditure transactions exceeding \xa3500) (for LGTC by LGA)',
           search_for=['spend over', 'spending over', 'expenditure over', u'\xa3500']),
    Schema(lga_name='', dgu_schema_name='Procurement Information (Local authority contracts) (for LGTC by LGA)',
           search_for=['procurement', 'contracts']),
    Schema(lga_name='', dgu_schema_name='Land and building assets (for LGTC by LGA)',
           search_for=['land assets', 'building assets', 'land ownership', 'land assets', 'land and property assets', 'local authority land', 'Public Property and Land', 'land terrier', 'Council Registered Land', 'owned land', 'council land', 'land owned']),
    Schema(lga_name='', dgu_schema_name='Organisation structure (org chart / organogram for local authority) (for LGTC by LGA)',
           search_for=['org chart', 'organisation chart', 'organisation structure']),
    Schema(lga_name='', dgu_schema_name='Senior employees of a local authority (for LGTC by LGA)',
           search_for=['senior staff', 'senior employees', 'senior roles']),
    Schema(lga_name='', dgu_schema_name='Salary counts of senior employees of a local authority (for LGTC by LGA)',
Beispiel #3
0
    def create_test_data(self):
        from ckan import plugins
        from ckan import model
        from ckanext.dgu.model.schema_codelist import Schema, Codelist
        pt = plugins.toolkit
        context = {'model': model, 'user': '******'}

        # Create schemas
        schemas = [
            dict(url='http://lga.org/toilet?v0.3', title='Toilet locations'),
            dict(url='http://spend.com/25', title='25k Spend'),
            dict(
                url='http://environment.data.gov.uk/def/bathing-water-quality/',
                title='Bathing water quality (ontology)'),
            dict(url='http://environment.data.gov.uk/def/bathing-water/',
                 title='Bathing water (ontology)'),
            dict(url='http://environment.data.gov.uk/def/bwq-cc-2012/',
                 title='Bathing water classifications'),
            dict(url='http://location.data.gov.uk/def/ef/SamplingPoint/',
                 title='Sampling point (environmental monitoring) ontology'),
            dict(url='http://www.w3.org/2006/time',
                 title='Time (OWL ontology)'),
            dict(url='http://purl.org/linked-data/cube',
                 title='Data cube (vocabulary)'),
            dict(url='http://www.w3.org/2004/02/skos/core',
                 title='Simple Knowledge Organization System (SKOS vocabulary)'
                 ),
            dict(url='http://purl.org/dc/terms/',
                 title='DCMI Metadata Terms (vocabulary)'),
            dict(url='http://xmlns.com/foaf/0.1/', title='FOAF Vocabulary'),
            dict(url='http://purl.org/linked-data/sdmx/2009/sdmx-attribute',
                 title='Statistical Data and Metadata Exchange (SDMX)'),
            dict(url='http://www.w3.org/2003/01/geo/wgs84_pos',
                 title='WGS84 Geo Positioning vocabulary'),
            dict(url='http://data.ordnancesurvey.co.uk/ontology/geometry/',
                 title='Ordnance Survey Geometry (ontology)'),
        ]
        for schema in schemas:
            existing_schema = Schema.by_title(schema['title'])
            if existing_schema:
                schema['id'] = existing_schema.id
                for k, v in schema.items():
                    setattr(existing_schema, k, v)
            else:
                model.Session.add(Schema(**schema))
            model.repo.commit_and_remove()

        codelists = [
            dict(
                url=
                'http://environment.data.gov.uk/registry/def/water-quality/_sampling_point_types',
                title='Water sampling point types'),
            dict(
                url=
                'http://environment.data.gov.uk/registry/def/water-quality/sampling_mechanisms',
                title='Water quality sampling mechanisms'),
        ]
        for codelist in codelists:
            existing_list = Codelist.by_title(codelist['title'])
            if existing_list:
                codelist['id'] = existing_list.id
                for k, v in codelist.items():
                    setattr(existing_list, k, v)
            else:
                model.Session.add(Codelist(**codelist))
            model.repo.commit_and_remove()

        # Create org
        org = dict(name='oxford',
                   title='Oxford',
                   type='organization',
                   is_organization=True,
                   category='local-council')
        existing_org = model.Group.get(org['name'])
        action = 'create' if not existing_org else 'update'
        if existing_org:
            org['id'] = existing_org.id
        org = pt.get_action('organization_%s' % action)(context, org)

        # Create datasets
        defaults = dict(license_id='uk-ogl',
                        owner_org=org['id'],
                        notes='This is a test')
        datasets = [
            dict(name='oxford-toilets',
                 title='Oxford toilets',
                 codelist=[],
                 schema=[Schema.by_title('Toilet locations').id]),
            dict(
                name='bathing-waters',
                title='Bathing waters',
                codelist=[
                    Codelist.by_title(title).id
                    for title in ('Water sampling point types',
                                  'Water quality sampling mechanisms')
                ],
                schema=[
                    Schema.by_url(url).id for url in (
                        'http://environment.data.gov.uk/def/bathing-water-quality/',
                        'http://environment.data.gov.uk/def/bathing-water/',
                        'http://environment.data.gov.uk/def/bwq-cc-2012/',
                        'http://location.data.gov.uk/def/ef/SamplingPoint/',
                        'http://www.w3.org/2006/time',
                        'http://purl.org/linked-data/cube',
                        'http://www.w3.org/2004/02/skos/core',
                        'http://purl.org/dc/terms/',
                        'http://xmlns.com/foaf/0.1/',
                        'http://purl.org/linked-data/sdmx/2009/sdmx-attribute',
                        'http://www.w3.org/2003/01/geo/wgs84_pos',
                        'http://data.ordnancesurvey.co.uk/ontology/geometry/',
                    )
                ])
        ]
        for dataset in datasets:
            dataset.update(defaults)
            existing_dataset = model.Package.get(dataset['name'])
            action = 'create' if not existing_dataset else 'update'
            if existing_dataset:
                dataset['id'] = existing_dataset.id
            dataset = pt.get_action('dataset_%s' % action)(context, dataset)

        print 'Datasets: ', ' '.join([dataset['name'] for dataset in datasets])