Beispiel #1
0
def create(org_id, **kwargs):
    """Creates a package (aka dataset)"""
    kw = ft.Objectify(kwargs, type='dataset')
    verbose = not kw.quiet
    ckan_kwargs = {k: v for k, v in kwargs.items() if k in api.CKAN_KEYS}
    ckan = CKAN(**ckan_kwargs)

    licenses = it.imap(itemgetter('id'), ckan.license_list())
    orgs = ckan.organization_list()
    org_ids = it.imap(itemgetter('id'), orgs)
    org_names = it.imap(itemgetter('name'), orgs)
    groups = ckan.group_list()
    name = kw.name or slugify(kw.title)

    raw_tags = filter(None, kw.tags.split(','))
    tags = [{'state': 'active', 'name': t} for t in raw_tags]

    if kw.start:
        start = parse(str(kw.start)).strftime('%m/%d/%Y')
    else:
        date = None

    if kw.start and kw.end:
        date = '%s-%s' % (start, parse(str(kw.end)).strftime('%m/%d/%Y'))
    elif kw.start:
        date = start

    if kw.location in set(groups):
        group_list = [{'name': kw.location}]
    elif kw.location:
        sys.exit('group name: %s not found!' % kw.location)
    else:
        group_list = []

    if org_id not in set(it.chain(org_ids, org_names)):
        sys.exit('organization id: %s not found!' % org_id)

    if kw.license_id not in set(licenses):
        sys.exit('license id: %s not found!' % kw.license_id)

    files = filter(None, kw.files.split(','))
    names = filter(None, kw.names.split(','))
    resource_list = list(it.starmap(make_rkwargs, zip(files, names))) or []

    package_kwargs = {
        'title': kw.title,
        'name': name,
        'license_id': kw.license_id,
        'owner_org': org_id,
        'dataset_source': kw.source,
        'notes': kw.description or kw.title,
        'type': kw.type,
        'tags': tags,
        'resources': resource_list,
        'package_creator': ckan.user['name'],
        'groups': group_list,
        'dataset_date': date,
        'caveats': kw.caveats,
        'methodology': methods.get(kw.methodology, 'Other'),
        'methodology_other': methods.get(kw.methodology) or kw.methodology,
    }

    if verbose:
        print('Submitting your package request.')
        pprint(package_kwargs)
        print('\n')

    try:
        package = ckan.package_create(**package_kwargs)
    except api.ValidationError as e:
        exit(e)

    if kw.private:
        org = package['organization']
        ckan.package_privatize(org_id=org['id'], datasets=[package['id']])

    if verbose:
        print('Your package response.')
        pprint(package)
        print('\n')

    print(package['id'])
    print('\n')
Beispiel #2
0
def update(resource_id, force=None, **kwargs):
    """Updates a datastore table based on the current filestore resource"""
    verbose = not kwargs.get('quiet')
    chunk_bytes = kwargs.get('chunk_bytes', api.CHUNKSIZE_BYTES)
    ckan_kwargs = {k: v for k, v in kwargs.items() if k in api.CKAN_KEYS}
    hash_kwargs = {'chunksize': chunk_bytes, 'verbose': verbose}
    ckan = CKAN(**ckan_kwargs)

    try:
        r = ckan.fetch_resource(resource_id)
    except (api.NotFound, api.NotAuthorized) as err:
        sys.exit('ERROR: %s\n' % str(err))
    else:
        f = SpooledTemporaryFile(suffix='.xlsx', mode='r+b')
        write_kwargs = {
            'length': r.headers.get('content-length'),
            'chunksize': chunk_bytes
        }

        tio.write(f, r.iter_content, **write_kwargs)

        try:
            old_hash = ckan.get_hash(resource_id)
        except api.NotFound as err:
            item = err.args[0]['item']

            if item == 'package':
                orgs = ckan.organization_list(permission='admin_group')
                owner_org = (
                    o['id'] for o in orgs
                    if o['display_name'] == kwargs['hash_group']).next()

                package_kwargs = {
                    'name': kwargs['hash_table'],
                    'owner_org': owner_org,
                    'package_creator': 'Hash Table',
                    'dataset_source': 'Multiple sources',
                    'notes': 'Datastore resource hash table'
                }

                ckan.hash_table_pack = ckan.package_create(**package_kwargs)

            if item in {'package', 'resource'}:
                fileobj = StringIO('datastore_id,hash\n')
                create_kwargs = {'fileobj': fileobj, 'name': api.DEF_HASH_RES}
                table = kwargs['hash_table']
                resource = ckan.create_resource(table, **create_kwargs)
                ckan.hash_table_id = resource['id']

            ckan.create_hash_table(verbose)
            old_hash = ckan.get_hash(resource_id)

        new_hash = tio.hash_file(f, **hash_kwargs)
        changed = new_hash != old_hash if old_hash else True

        if verbose:
            print(get_message(changed, force))

        if not (changed or force):
            sys.exit(0)

        kwargs['encoding'] = r.encoding
        kwargs['content_type'] = r.headers['content-type']
        updated = ckan.update_datastore(resource_id, f, **kwargs)

        if updated and verbose:
            print('Success! Resource %s updated.' % resource_id)

        if updated and changed:
            ckan.update_hash_table(resource_id, new_hash, verbose)
        elif not updated:
            sys.exit('ERROR: resource %s not updated.' % resource_id)
Beispiel #3
0
def create(org_id, **kwargs):
    """Creates a package (aka dataset)"""
    kw = ft.Objectify(kwargs, type='dataset')
    verbose = not kw.quiet
    ckan_kwargs = {k: v for k, v in kwargs.items() if k in api.CKAN_KEYS}
    ckan = CKAN(**ckan_kwargs)

    licenses = it.imap(itemgetter('id'), ckan.license_list())
    orgs = ckan.organization_list()
    org_ids = it.imap(itemgetter('id'), orgs)
    org_names = it.imap(itemgetter('name'), orgs)
    groups = ckan.group_list()
    name = kw.name or slugify(kw.title)

    raw_tags = filter(None, kw.tags.split(','))
    tags = [{'state': 'active', 'name': t} for t in raw_tags]

    if kw.start:
        start = parse(str(kw.start)).strftime('%m/%d/%Y')
    else:
        date = None

    if kw.start and kw.end:
        date = '%s-%s' % (start, parse(str(kw.end)).strftime('%m/%d/%Y'))
    elif kw.start:
        date = start

    if kw.location in set(groups):
        group_list = [{'name': kw.location}]
    elif kw.location:
        sys.exit('group name: %s not found!' % kw.location)
    else:
        group_list = []

    if org_id not in set(it.chain(org_ids, org_names)):
        sys.exit('organization id: %s not found!' % org_id)

    if kw.license_id not in set(licenses):
        sys.exit('license id: %s not found!' % kw.license_id)

    files = filter(None, kw.files.split(','))
    names = filter(None, kw.names.split(','))
    resource_list = list(it.starmap(make_rkwargs, zip(files, names))) or []

    package_kwargs = {
        'title': kw.title,
        'name': name,
        'license_id': kw.license_id,
        'owner_org': org_id,
        'dataset_source': kw.source,
        'notes': kw.description or kw.title,
        'type': kw.type,
        'tags': tags,
        'resources': resource_list,
        'package_creator': ckan.user['name'],
        'groups': group_list,
        'dataset_date': date,
        'caveats': kw.caveats,
        'methodology': methods.get(kw.methodology, 'Other'),
        'methodology_other': methods.get(kw.methodology) or kw.methodology,
    }

    if verbose:
        print('Submitting your package request.')
        pprint(package_kwargs)
        print('\n')

    try:
        package = ckan.package_create(**package_kwargs)
    except api.ValidationError as e:
        exit(e)

    if kw.private:
        org = package['organization']
        ckan.package_privatize(org_id=org['id'], datasets=[package['id']])

    if verbose:
        print('Your package response.')
        pprint(package)
        print('\n')

    print(package['id'])
    print('\n')
Beispiel #4
0
def update(resource_id, force=None, **kwargs):
    """Updates a datastore table based on the current filestore resource"""
    verbose = not kwargs.get('quiet')
    chunk_bytes = kwargs.get('chunk_bytes', api.CHUNKSIZE_BYTES)
    ckan_kwargs = {k: v for k, v in kwargs.items() if k in api.CKAN_KEYS}
    hash_kwargs = {'chunksize': chunk_bytes, 'verbose': verbose}
    ckan = CKAN(**ckan_kwargs)

    try:
        r = ckan.fetch_resource(resource_id)
    except (api.NotFound, api.NotAuthorized) as err:
        sys.exit('ERROR: %s\n' % str(err))
    else:
        f = SpooledTemporaryFile(suffix='.xlsx', mode='r+b')
        write_kwargs = {
            'length': r.headers.get('content-length'),
            'chunksize': chunk_bytes
        }

        tio.write(f, r.iter_content, **write_kwargs)

        try:
            old_hash = ckan.get_hash(resource_id)
        except api.NotFound as err:
            item = err.args[0]['item']

            if item == 'package':
                orgs = ckan.organization_list(permission='admin_group')
                owner_org = (
                    o['id'] for o in orgs
                    if o['display_name'] == kwargs['hash_group']).next()

                package_kwargs = {
                    'name': kwargs['hash_table'],
                    'owner_org': owner_org,
                    'package_creator': 'Hash Table',
                    'dataset_source': 'Multiple sources',
                    'notes': 'Datastore resource hash table'
                }

                ckan.hash_table_pack = ckan.package_create(**package_kwargs)

            if item in {'package', 'resource'}:
                fileobj = StringIO('datastore_id,hash\n')
                create_kwargs = {'fileobj': fileobj, 'name': api.DEF_HASH_RES}
                table = kwargs['hash_table']
                resource = ckan.create_resource(table, **create_kwargs)
                ckan.hash_table_id = resource['id']

            ckan.create_hash_table(verbose)
            old_hash = ckan.get_hash(resource_id)

        new_hash = tio.hash_file(f, **hash_kwargs)
        changed = new_hash != old_hash if old_hash else True

        if verbose:
            print(get_message(changed, force))

        if not (changed or force):
            sys.exit(0)

        kwargs['encoding'] = r.encoding
        kwargs['content_type'] = r.headers['content-type']
        updated = ckan.update_datastore(resource_id, f, **kwargs)

        if updated and verbose:
            print('Success! Resource %s updated.' % resource_id)

        if updated and changed:
            ckan.update_hash_table(resource_id, new_hash, verbose)
        elif not updated:
            sys.exit('ERROR: resource %s not updated.' % resource_id)