Exemple #1
0
def dcat_to_ckan(dcat_dict):

    package_dict = {}

    package_dict['title'] = dcat_dict.get('title')
    package_dict['notes'] = dcat_dict.get('description')
    package_dict['url'] = dcat_dict.get('landingPage')


    package_dict['tags'] = []
    for keyword in dcat_dict.get('keyword', []):
        package_dict['tags'].append({'name': keyword})

    package_dict['extras'] = []
    for key in ['issued', 'modified']:
        package_dict['extras'].append({'key': 'dcat_{0}'.format(key), 'value': dcat_dict.get(key)})

    package_dict['extras'].append({'key': 'guid', 'value': dcat_dict.get('identifier')})

    dcat_publisher = dcat_dict.get('publisher')
    if isinstance(dcat_publisher, basestring):
        package_dict['owner_org'] = munge.munge_name(dcat_publisher)
        package_dict['extras'].append({'key': 'dcat_publisher_name', 'value': dcat_publisher})
    elif isinstance(dcat_publisher, dict) and dcat_publisher.get('name'):
        package_dict['owner_org'] = munge.munge_name(dcat_publisher.get('name'))
        package_dict['extras'].append({'key': 'dcat_publisher_name', 'value': dcat_publisher.get('name')})
        package_dict['extras'].append({'key': 'dcat_publisher_email', 'value': dcat_publisher.get('mbox')})

    package_dict['extras'].append({
        'key': 'language',
        'value': dcat_dict.get('language', [])
    })

    package_dict['resources'] = []
    for distribution in dcat_dict.get('distribution', []):
        mt = distribution.get('mediaType')
        fr = mt.split('/')[-1] if hasattr(mt, 'split') else ''
        resource = {
            'name': distribution.get('title'),
            'description': distribution.get('description'),
            'url': distribution.get('downloadURL') or distribution.get('accessURL'),
            'format': fr
        }

        if distribution.get('byteSize'):
            try:
                resource['size'] = int(distribution.get('byteSize'))
            except ValueError:
                pass
        package_dict['resources'].append(resource)

    return package_dict
Exemple #2
0
def dcat_to_ckan(dcat_dict):

    package_dict = {}

    package_dict['title'] = dcat_dict.get('title')
    package_dict['notes'] = dcat_dict.get('description')
    package_dict['url'] = dcat_dict.get('landingPage')

    package_dict['tags'] = []
    for keyword in dcat_dict.get('keyword', []):
        package_dict['tags'].append({'name': keyword})

    package_dict['extras'] = []
    for key in ['issued', 'modified']:
        package_dict['extras'].append({
            'key': 'dcat_{0}'.format(key),
            'value': dcat_dict.get(key)
        })

    package_dict['extras'].append({
        'key': 'guid',
        'value': dcat_dict.get('identifier')
    })

    dcat_publisher = dcat_dict.get('publisher')
    if isinstance(dcat_publisher, basestring):
        package_dict['owner_org'] = munge.munge_name(dcat_publisher)
        package_dict['extras'].append({
            'key': 'dcat_publisher_name',
            'value': dcat_publisher
        })
    elif isinstance(dcat_publisher, dict) and dcat_publisher.get('name'):
        package_dict['owner_org'] = munge.munge_name(
            dcat_publisher.get('name'))
        package_dict['extras'].append({
            'key': 'dcat_publisher_name',
            'value': dcat_publisher.get('name')
        })
        package_dict['extras'].append({
            'key': 'dcat_publisher_email',
            'value': dcat_publisher.get('mbox')
        })

    package_dict['extras'].append({
        'key': 'language',
        'value': dcat_dict.get('language', [])
    })

    package_dict['resources'] = []
    for distribution in dcat_dict.get('distribution', []):
        mt = distribution.get('mediaType')
        fr = mt.split('/')[-1] if hasattr(mt, 'split') else ''
        resource = {
            'name': distribution.get('title'),
            'description': distribution.get('description'),
            'url': distribution.get('downloadURL')
            or distribution.get('accessURL'),
            'format': fr
        }

        if distribution.get('byteSize'):
            try:
                resource['size'] = int(distribution.get('byteSize'))
            except ValueError:
                pass
        package_dict['resources'].append(resource)

    return package_dict
Exemple #3
0
def dcat_to_ckan(dcat_dict):

    package_dict = {}

    package_dict['title'] = dcat_dict.get('title')
    package_dict['notes'] = dcat_dict.get('description')
    package_dict['url'] = dcat_dict.get('landingPage')


    package_dict['tags'] = []
    for keyword in dcat_dict.get('keyword', []):
        package_dict['tags'].append({'name': keyword})

    # Nivel de gobierno por medio del vocabulario
    if dcat_dict.get('govType', False):
        package_dict['tags'].append({
            'name': dcat_dict.get('govType').capitalize(),
            'vocabulary_id': os.environ.get('VOCABULARY_GOV_TYPE_ID', '910b5e72-2723-466d-a892-4be1e4129120')
        })

    package_dict['gov_type'] = dcat_dict.get('govType').capitalize()

    package_dict['extras'] = []
    for key in ['issued', 'modified']:
        package_dict['extras'].append({'key': 'dcat_{0}'.format(key), 'value': dcat_dict.get(key)})

    package_dict['extras'].append({'key': 'guid', 'value': dcat_dict.get('identifier')})

    dcat_publisher = dcat_dict.get('publisher')
    if isinstance(dcat_publisher, basestring):
        package_dict['owner_org'] = munge.munge_name(dcat_publisher)
        package_dict['extras'].append({'key': 'dcat_publisher_name', 'value': dcat_publisher})
    elif isinstance(dcat_publisher, dict) and dcat_publisher.get('name'):
        package_dict['owner_org'] = munge.munge_name(dcat_publisher.get('name'))
        package_dict['extras'].append({'key': 'dcat_publisher_name', 'value': dcat_publisher.get('name')})
        package_dict['extras'].append({'key': 'dcat_publisher_email', 'value': dcat_publisher.get('mbox')})
        package_dict['extras'].append({'key': 'publisher_type', 'value': dcat_publisher.get('position')})

    if dcat_dict.get('theme'):
        package_dict['extras'].append({
            'key': 'theme', 'value': dcat_dict.get('theme').title()
        })

    package_dict['extras'].append({
        'key': 'frequency', 'value': dcat_dict.get('accrualPeriodicity', '')
    })

    if dcat_dict.get('temporal'):
        start, end = dcat_dict.get('temporal').split('/')
        package_dict['extras'].append({
            'key': 'temporal_start', 'value': start
        })
        package_dict['extras'].append({
            'key': 'temporal_end', 'value': end
        })

    if dcat_dict.get('spatial'):
        package_dict['extras'].append({
            'key': 'spatial_text',
            'value': dcat_dict.get('spatial')
        })

    if dcat_dict.get('comments'):
        package_dict['extras'].append({
            'key': 'version_notes',
            'value': dcat_dict.get('comments')
        })

    if dcat_dict.get('dataDictionary'):
        package_dict['extras'].append({
            'key': 'dataDictionary',
            'value': dcat_dict.get('dataDictionary')
        })

    if dcat_dict.get('quality'):
        package_dict['extras'].append({
            'key': 'quality',
            'value': dcat_dict.get('quality')
        })

    package_dict['extras'].append({
        'key': 'language',
        'value': dcat_dict.get('language', [])
    })

    package_dict['resources'] = []
    for distribution in dcat_dict.get('distribution', []):
        mt = distribution.get('mediaType')
        fr = mt.split('/')[-1] if hasattr(mt, 'split') else ''
        resource = {
            'name': distribution.get('title'),
            'description': distribution.get('description'),
            'url': distribution.get('downloadURL') or distribution.get('accessURL'),
            'format': fr
        }

        if distribution.get('byteSize'):
            try:
                resource['size'] = int(distribution.get('byteSize'))
            except ValueError:
                pass
        package_dict['resources'].append(resource)

    return package_dict
Exemple #4
0
        sys.exit(2)
    for opt, arg in opts:
        if opt in ("-h", "--help"):
            usage()
            sys.exit()
        elif opt in ("-d", "--datasets"):
            datasets = remote.action.package_list()
            for d in datasets:
                print d
        elif opt in ("-s", "--harvest", "-p", "--purge-harvest"):
            catalog = dcat_to_utf8_dict(arg)

            # If purge mode is activated, then delete all org's datasets
            if opt in ("-p", "--purge-harvest"):
                org_name = catalog['dataset'][0]['publisher']['name']
                org_id = munge.munge_name(org_name)
                delete_organization_datasets(remote, org_id)

            for dcat_dataset in catalog.get('dataset', []):
                ckan_dataset = converters.dcat_to_ckan(dcat_dataset)
                ckan_dataset['name'] = munge.munge_title_to_name(ckan_dataset['title'])
                ckan_dataset['state'] = 'active'
                print 'Dataset "%s"' % ckan_dataset['title'], 'with %d resources' % len(ckan_dataset['resources'])
                print datetime.datetime.utcnow()
                new_dataset = upsert_dataset(remote, ckan_dataset)
                if new_dataset:
                    print 'Dataset upserted'
                else:
                    print 'Something went wrong'
        elif opt in ("-q", "--find-datasets"):
            datasets = find_datasets_with_query(remote, arg)
Exemple #5
0
        sys.exit(2)
    for opt, arg in opts:
        if opt in ("-h", "--help"):
            usage()
            sys.exit()
        elif opt in ("-d", "--datasets"):
            datasets = remote.action.package_list()
            for d in datasets:
                print d
        elif opt in ("-s", "--harvest", "-p", "--purge-harvest"):
            catalog = dcat_to_utf8_dict(arg)

            # If purge mode is activated, then delete all org's datasets
            if opt in ("-p", "--purge-harvest"):
                org_name = catalog['dataset'][0]['publisher']['name']
                org_id = munge.munge_name(org_name)
                delete_organization_datasets(remote, org_id)

            for dcat_dataset in catalog.get('dataset', []):
                ckan_dataset = converters.dcat_to_ckan(dcat_dataset)
                ckan_dataset['name'] = munge.munge_title_to_name(ckan_dataset['title'])
                ckan_dataset['state'] = 'active'
                print 'Dataset "%s"' % ckan_dataset['title'], 'with %d resources' % len(ckan_dataset['resources'])
                print datetime.datetime.utcnow()
                new_dataset = upsert_dataset(remote, ckan_dataset)
                if new_dataset:
                    print 'Dataset upserted'
                else:
                    print 'Something went wrong'
        elif opt in ("-q", "--find-datasets"):
            datasets = find_datasets_with_query(remote, arg)