Example #1
0
    def import_schemas(self, schema_filepath):
        from ckan import model
        from ckanext.dgu.model.schema_codelist import Schema

        # Load file with schemas
        schema_dicts = []
        with open(schema_filepath) as f:
            for line in f.readlines():
                if not line.strip():
                    continue
                schema_dict = json.loads(line)
                schema_dicts.append(schema_dict)

        # Create/update in the db
        for schema in schema_dicts:
            if 'id' in schema:
                existing_schema = Schema.get(schema['id'])
            else:
                existing_schema = Schema.by_title(schema['title'])
            if existing_schema:
                schema['id'] = existing_schema.id
                for k, v in schema.items():
                    setattr(existing_schema, k, v)
                schema_obj = existing_schema
            else:
                schema_obj = Schema(**schema)
                model.Session.add(schema_obj)
            model.Session.commit()
            # Print JSONL with ids, in case you want to save with IDs
            print json.dumps(schema_obj.as_dict())
        model.Session.remove()
Example #2
0
    def import_schemas(self, schema_filepath):
        from ckan import model
        from ckanext.dgu.model.schema_codelist import Schema

        # Load file with schemas
        schema_dicts = []
        with open(schema_filepath) as f:
            for line in f.readlines():
                if not line.strip():
                    continue
                schema_dict = json.loads(line)
                schema_dicts.append(schema_dict)

        # Create/update in the db
        for schema in schema_dicts:
            if 'id' in schema:
                existing_schema = Schema.get(schema['id'])
            else:
                existing_schema = Schema.by_title(schema['title'])
            if existing_schema:
                schema['id'] = existing_schema.id
                for k, v in schema.items():
                    setattr(existing_schema, k, v)
                schema_obj = existing_schema
            else:
                schema_obj = Schema(**schema)
                model.Session.add(schema_obj)
            model.Session.commit()
            # Print JSONL with ids, in case you want to save with IDs
            print json.dumps(schema_obj.as_dict())
        model.Session.remove()
Example #3
0
    def add_schema(cls, pkg_dict):
        from ckanext.dgu.model.schema_codelist import Schema, Codelist
        try:
            schema_ids = json.loads(pkg_dict.get('schema') or '[]')
        except ValueError:
            log.error('Not valid JSON in schema field: %s %r',
                      pkg_dict['name'], pkg_dict.get('schema'))
            schemas = None
        schemas = []
        for schema_id in schema_ids:
            schemas.append(Schema.get(schema_id).title)
        pkg_dict['schema_multi'] = schemas
        log.debug('Schema: %s', ' '.join(schemas))

        try:
            codelist_ids = json.loads(pkg_dict.get('codelist') or '[]')
        except ValueError:
            log.error('Not valid JSON in codelists field: %s %r',
                      pkg_dict['name'], pkg_dict.get('codelist'))
            codelists = None
        codelists = []
        for codelist_id in codelist_ids:
            codelists.append(Codelist.get(codelist_id).title)
        pkg_dict['codelist_multi'] = codelists
        log.debug('Code lists: %s', ' '.join(codelists))
Example #4
0
    def add_schema(cls, pkg_dict):
        from ckanext.dgu.model.schema_codelist import Schema, Codelist
        try:
            schema_ids = json.loads(pkg_dict.get('schema') or '[]')
        except ValueError:
            log.error('Not valid JSON in schema field: %s %r',
                      pkg_dict['name'], pkg_dict.get('schema'))
            schemas = None
        schemas = []
        for schema_id in schema_ids:
            schemas.append(Schema.get(schema_id).title)
        pkg_dict['schema_multi'] = schemas
        log.debug('Schema: %s', ' '.join(schemas))

        try:
            codelist_ids = json.loads(pkg_dict.get('codelist') or '[]')
        except ValueError:
            log.error('Not valid JSON in codelists field: %s %r',
                      pkg_dict['name'], pkg_dict.get('codelist'))
            codelists = None
        codelists = []
        for codelist_id in codelist_ids:
            codelists.append(Codelist.get(codelist_id).title)
        pkg_dict['codelist_multi'] = codelists
        log.debug('Code lists: %s', ' '.join(codelists))
Example #5
0
def schema_codelist_validator(key, data, errors, context):
    from ckanext.dgu.model.schema_codelist import Schema, Codelist
    for i, schema_ref in enumerate(data[key]):
        if not schema_ref:
            # drop-down has no selection - ignore
            continue
        # form gives an ID. API might give a title.
        if key == ('schema',):
            obj = Schema.get(schema_ref) or Schema.by_title(schema_ref) or \
                    Schema.by_url(schema_ref)
        elif key == ('codelist',):
            obj = Codelist.get(schema_ref) or Codelist.by_title(schema_ref) or\
                    Codelist.by_url(schema_ref)
        else:
            raise NotImplementedError('Bad key: %s' % key)
        if not obj:
            raise Invalid('%s id does not exist: %r' % (key[0], schema_ref))
        # write the ID in case it came in via the API and was a URL or title
        data[key][i] = obj.id
Example #6
0
def id_to_dict(key, data, errors, context):
    from ckanext.dgu.model.schema_codelist import Schema, Codelist
    for i, id_ in enumerate(data[key]):
        if key == ('schema',):
            obj = Schema.get(id_)
        elif key == ('codelist',):
            obj = Codelist.get(id_)
        else:
            raise NotImplementedError('Bad key: %s' % key)
        if not obj:
            raise Invalid('%s id does not exist: %s' % (key, id_))
        data[key][i] = obj.as_dict()
Example #7
0
 def add_schema(cls, pkg_dict):
     from ckanext.dgu.model.schema_codelist import Schema, Codelist
     try:
         schema_ids = json.loads(pkg_dict.get('schema') or '[]')
     except ValueError:
         log.error('Not valid JSON in schema field: %s %r',
                   pkg_dict['name'], pkg_dict.get('schema'))
         schema_ids = None
     schemas = []
     for schema_id in schema_ids:
         try:
             schemas.append(Schema.get(schema_id).title)
         except AttributeError, e:
             log.error('Invalid schema_id: %r %s', schema_id, e)
 def add_schema(cls, pkg_dict):
     from ckanext.dgu.model.schema_codelist import Schema, Codelist
     try:
         schema_ids = json.loads(pkg_dict.get('schema') or '[]')
     except ValueError:
         log.error('Not valid JSON in schema field: %s %r',
                   pkg_dict['name'], pkg_dict.get('schema'))
         schema_ids = None
     schemas = []
     for schema_id in schema_ids:
         try:
             schemas.append(Schema.get(schema_id).title)
         except AttributeError, e:
             log.error('Invalid schema_id: %r %s', schema_id, e)
Example #9
0
 def add_datasets_to_results(datasets, result):
     for dataset in datasets:
         if dataset['name'] not in result['dataset_names']:
             result['dataset_names'].append(dataset['name'])
             result['dataset_titles'].append(dataset['title'])
             schema_applied = True if schema.dgu_schema_name in \
                 [s['title'] for s in dataset.get('schema', [])] \
                 else False
             result['dataset_schema_applied'].append(schema_applied)
             if not schema_applied and options.write:
                 pkg = model.Package.get(dataset['name'])
                 schema_obj = Schema.by_title(schema.dgu_schema_name)
                 assert schema_obj, schema.dgu_schema_name
                 try:
                     schema_ids = json.loads(pkg.extras.get('schema') or '[]')
                 except ValueError:
                     log.error('Not valid JSON in schema field: %s %r',
                               dataset['name'], pkg.extras.get('schema'))
                     schema_ids = []
                 schema_ids.append(schema_obj.id)
                 pkg.extras['schema'] = json.dumps(schema_ids)
Example #10
0
import codecs
import os
from collections import namedtuple
import json

from running_stats import Stats


dataset_name_corrections = {
    'waverley-public-conveniences': 'waverley-borough-council-public-conveniences',
    'public-conveniences': 'public-conveniences2',
    }

Schema = namedtuple('Schema', ('lga_name', 'dgu_schema_name', 'search_for'))
all_schemas = [
    Schema(lga_name='Toilets', dgu_schema_name='Public Toilets (for LGTC by LGA)',
           search_for=['toilet', 'public_toilets', 'public conveniences']),
    Schema(lga_name='Premises', dgu_schema_name='Premises Licences (for LGTC by LGA)',
           search_for=['premises licence', 'premises license', 'premises licensing', 'licensed premises', 'premiseslicences']),
    Schema(lga_name='Planning', dgu_schema_name='Planning Applications (for LGTC by LGA)',
           search_for=['planning applications']),
    Schema(lga_name='', dgu_schema_name=u'Spend over \xa3500 by local authority (Expenditure transactions exceeding \xa3500) (for LGTC by LGA)',
           search_for=['spend over', 'spending over', 'expenditure over', u'\xa3500']),
    Schema(lga_name='', dgu_schema_name='Procurement Information (Local authority contracts) (for LGTC by LGA)',
           search_for=['procurement', 'contracts']),
    Schema(lga_name='', dgu_schema_name='Land and building assets (for LGTC by LGA)',
           search_for=['land assets', 'building assets', 'land ownership', 'land assets', 'land and property assets', 'local authority land', 'Public Property and Land', 'land terrier', 'Council Registered Land', 'owned land', 'council land', 'land owned']),
    Schema(lga_name='', dgu_schema_name='Organisation structure (org chart / organogram for local authority) (for LGTC by LGA)',
           search_for=['org chart', 'organisation chart', 'organisation structure']),
    Schema(lga_name='', dgu_schema_name='Senior employees of a local authority (for LGTC by LGA)',
           search_for=['senior staff', 'senior employees', 'senior roles']),
    Schema(lga_name='', dgu_schema_name='Salary counts of senior employees of a local authority (for LGTC by LGA)',
Example #11
0
    def create_test_data(self):
        from ckan import plugins
        from ckan import model
        from ckanext.dgu.model.schema_codelist import Schema, Codelist
        pt = plugins.toolkit
        context = {'model': model, 'user': '******'}

        # Create schemas
        schemas = [
            dict(url='http://lga.org/toilet?v0.3', title='Toilet locations'),
            dict(url='http://spend.com/25', title='25k Spend'),
            dict(
                url='http://environment.data.gov.uk/def/bathing-water-quality/',
                title='Bathing water quality (ontology)'),
            dict(url='http://environment.data.gov.uk/def/bathing-water/',
                 title='Bathing water (ontology)'),
            dict(url='http://environment.data.gov.uk/def/bwq-cc-2012/',
                 title='Bathing water classifications'),
            dict(url='http://location.data.gov.uk/def/ef/SamplingPoint/',
                 title='Sampling point (environmental monitoring) ontology'),
            dict(url='http://www.w3.org/2006/time',
                 title='Time (OWL ontology)'),
            dict(url='http://purl.org/linked-data/cube',
                 title='Data cube (vocabulary)'),
            dict(url='http://www.w3.org/2004/02/skos/core',
                 title='Simple Knowledge Organization System (SKOS vocabulary)'
                 ),
            dict(url='http://purl.org/dc/terms/',
                 title='DCMI Metadata Terms (vocabulary)'),
            dict(url='http://xmlns.com/foaf/0.1/', title='FOAF Vocabulary'),
            dict(url='http://purl.org/linked-data/sdmx/2009/sdmx-attribute',
                 title='Statistical Data and Metadata Exchange (SDMX)'),
            dict(url='http://www.w3.org/2003/01/geo/wgs84_pos',
                 title='WGS84 Geo Positioning vocabulary'),
            dict(url='http://data.ordnancesurvey.co.uk/ontology/geometry/',
                 title='Ordnance Survey Geometry (ontology)'),
        ]
        for schema in schemas:
            existing_schema = Schema.by_title(schema['title'])
            if existing_schema:
                schema['id'] = existing_schema.id
                for k, v in schema.items():
                    setattr(existing_schema, k, v)
            else:
                model.Session.add(Schema(**schema))
            model.repo.commit_and_remove()

        codelists = [
            dict(
                url=
                'http://environment.data.gov.uk/registry/def/water-quality/_sampling_point_types',
                title='Water sampling point types'),
            dict(
                url=
                'http://environment.data.gov.uk/registry/def/water-quality/sampling_mechanisms',
                title='Water quality sampling mechanisms'),
        ]
        for codelist in codelists:
            existing_list = Codelist.by_title(codelist['title'])
            if existing_list:
                codelist['id'] = existing_list.id
                for k, v in codelist.items():
                    setattr(existing_list, k, v)
            else:
                model.Session.add(Codelist(**codelist))
            model.repo.commit_and_remove()

        # Create org
        org = dict(name='oxford',
                   title='Oxford',
                   type='organization',
                   is_organization=True,
                   category='local-council')
        existing_org = model.Group.get(org['name'])
        action = 'create' if not existing_org else 'update'
        if existing_org:
            org['id'] = existing_org.id
        org = pt.get_action('organization_%s' % action)(context, org)

        # Create datasets
        defaults = dict(license_id='uk-ogl',
                        owner_org=org['id'],
                        notes='This is a test')
        datasets = [
            dict(name='oxford-toilets',
                 title='Oxford toilets',
                 codelist=[],
                 schema=[Schema.by_title('Toilet locations').id]),
            dict(
                name='bathing-waters',
                title='Bathing waters',
                codelist=[
                    Codelist.by_title(title).id
                    for title in ('Water sampling point types',
                                  'Water quality sampling mechanisms')
                ],
                schema=[
                    Schema.by_url(url).id for url in (
                        'http://environment.data.gov.uk/def/bathing-water-quality/',
                        'http://environment.data.gov.uk/def/bathing-water/',
                        'http://environment.data.gov.uk/def/bwq-cc-2012/',
                        'http://location.data.gov.uk/def/ef/SamplingPoint/',
                        'http://www.w3.org/2006/time',
                        'http://purl.org/linked-data/cube',
                        'http://www.w3.org/2004/02/skos/core',
                        'http://purl.org/dc/terms/',
                        'http://xmlns.com/foaf/0.1/',
                        'http://purl.org/linked-data/sdmx/2009/sdmx-attribute',
                        'http://www.w3.org/2003/01/geo/wgs84_pos',
                        'http://data.ordnancesurvey.co.uk/ontology/geometry/',
                    )
                ])
        ]
        for dataset in datasets:
            dataset.update(defaults)
            existing_dataset = model.Package.get(dataset['name'])
            action = 'create' if not existing_dataset else 'update'
            if existing_dataset:
                dataset['id'] = existing_dataset.id
            dataset = pt.get_action('dataset_%s' % action)(context, dataset)

        print 'Datasets: ', ' '.join([dataset['name'] for dataset in datasets])
Example #12
0
    def create_test_data(self):
        from ckan import plugins
        from ckan import model
        from ckanext.dgu.model.schema_codelist import Schema, Codelist
        pt = plugins.toolkit
        context = {'model': model, 'user': '******'}

        # Create schemas
        schemas = [dict(url='http://lga.org/toilet?v0.3', title='Toilet locations'),
                   dict(url='http://spend.com/25', title='25k Spend'),
                   dict(url='http://environment.data.gov.uk/def/bathing-water-quality/', title='Bathing water quality (ontology)'),
                   dict(url='http://environment.data.gov.uk/def/bathing-water/', title='Bathing water (ontology)'),
                   dict(url='http://environment.data.gov.uk/def/bwq-cc-2012/', title='Bathing water classifications'),
                   dict(url='http://location.data.gov.uk/def/ef/SamplingPoint/', title='Sampling point (environmental monitoring) ontology'),
                   dict(url='http://www.w3.org/2006/time', title='Time (OWL ontology)'),
                   dict(url='http://purl.org/linked-data/cube', title='Data cube (vocabulary)'),
                   dict(url='http://www.w3.org/2004/02/skos/core', title='Simple Knowledge Organization System (SKOS vocabulary)'),
                   dict(url='http://purl.org/dc/terms/', title='DCMI Metadata Terms (vocabulary)'),
                   dict(url='http://xmlns.com/foaf/0.1/', title='FOAF Vocabulary'),
                   dict(url='http://purl.org/linked-data/sdmx/2009/sdmx-attribute', title='Statistical Data and Metadata Exchange (SDMX)'),
                   dict(url='http://www.w3.org/2003/01/geo/wgs84_pos', title='WGS84 Geo Positioning vocabulary'),
                   dict(url='http://data.ordnancesurvey.co.uk/ontology/geometry/', title='Ordnance Survey Geometry (ontology)'),
                   ]
        for schema in schemas:
            existing_schema = Schema.by_title(schema['title'])
            if existing_schema:
                schema['id'] = existing_schema.id
                for k, v in schema.items():
                    setattr(existing_schema, k, v)
            else:
                model.Session.add(Schema(**schema))
            model.repo.commit_and_remove()

        codelists = [
            dict(url='http://environment.data.gov.uk/registry/def/water-quality/_sampling_point_types', title='Water sampling point types'),
            dict(url='http://environment.data.gov.uk/registry/def/water-quality/sampling_mechanisms', title='Water quality sampling mechanisms'),
            ]
        for codelist in codelists:
            existing_list = Codelist.by_title(codelist['title'])
            if existing_list:
                codelist['id'] = existing_list.id
                for k, v in codelist.items():
                    setattr(existing_list, k, v)
            else:
                model.Session.add(Codelist(**codelist))
            model.repo.commit_and_remove()

        # Create org
        org = dict(name='oxford', title='Oxford',
                   type='organization',
                   is_organization=True,
                   category='local-council')
        existing_org = model.Group.get(org['name'])
        action = 'create' if not existing_org else 'update'
        if existing_org:
            org['id'] = existing_org.id
        org = pt.get_action('organization_%s' % action)(context, org)

        # Create datasets
        defaults = dict(license_id='uk-ogl',
                        owner_org=org['id'],
                        notes='This is a test')
        datasets = [
            dict(name='oxford-toilets',
                 title='Oxford toilets',
                 codelist=[],
                 schema=[Schema.by_title('Toilet locations').id]),
            dict(name='bathing-waters',
                 title='Bathing waters',
                 codelist=[Codelist.by_title(title).id for title in
                           ('Water sampling point types',
                            'Water quality sampling mechanisms')],
                 schema=[Schema.by_url(url).id for url in
                         (
                            'http://environment.data.gov.uk/def/bathing-water-quality/',
                            'http://environment.data.gov.uk/def/bathing-water/',
                            'http://environment.data.gov.uk/def/bwq-cc-2012/',
                            'http://location.data.gov.uk/def/ef/SamplingPoint/',
                            'http://www.w3.org/2006/time',
                            'http://purl.org/linked-data/cube',
                            'http://www.w3.org/2004/02/skos/core',
                            'http://purl.org/dc/terms/',
                            'http://xmlns.com/foaf/0.1/',
                            'http://purl.org/linked-data/sdmx/2009/sdmx-attribute',
                            'http://www.w3.org/2003/01/geo/wgs84_pos',
                            'http://data.ordnancesurvey.co.uk/ontology/geometry/',
                         )]
                     )]
        for dataset in datasets:
            dataset.update(defaults)
            existing_dataset = model.Package.get(dataset['name'])
            action = 'create' if not existing_dataset else 'update'
            if existing_dataset:
                dataset['id'] = existing_dataset.id
            dataset = pt.get_action('dataset_%s' % action)(context, dataset)

        print 'Datasets: ', ' '.join([dataset['name'] for dataset in datasets])