Esempio n. 1
0
def fetch(resource_id, **kwargs):
    """Downloads a filestore resource"""
    verbose = not kwargs['quiet']
    filepath = kwargs['destination']
    name_from_id = kwargs.get('name_from_id')
    chunksize = kwargs.get('chunksize_bytes')
    ckan_kwargs = {k: v for k, v in kwargs.items() if k in api.CKAN_KEYS}
    ckan = CKAN(**ckan_kwargs)

    try:
        r = ckan.fetch_resource(resource_id)
    except api.NotAuthorized as err:
        sys.exit('ERROR: %s\n' % str(err))
    else:
        fkwargs = {
            'headers': r.headers,
            'name_from_id': name_from_id,
            'resource_id': resource_id}

        filepath = tup.make_filepath(filepath, **fkwargs)
        tio.write(filepath, r.iter_content, chunksize=chunksize)

        # save encoding to extended attributes
        x = xattr(filepath)

        if verbose and r.encoding:
            print('saving encoding %s to extended attributes' % r.encoding)

        if r.encoding:
            x['com.ckanny.encoding'] = r.encoding

        print(filepath)
Esempio n. 2
0
def upload(source, resource_id=None, **kwargs):
    """Uploads a file to a datastore table"""
    verbose = not kwargs['quiet']
    resource_id = resource_id or p.splitext(p.basename(source))[0]

    if '.' in resource_id:
        resource_id = resource_id.split('.')[0]

    ckan_kwargs = {k: v for k, v in kwargs.items() if k in api.CKAN_KEYS}

    if verbose:
        print(
            'Uploading %s to datastore resource %s...' % (source, resource_id))

    # read encoding from extended attributes
    x = xattr(source)

    try:
        kwargs['encoding'] = x.get('com.ckanny.encoding')
    except IOError:
        pass

    if verbose and kwargs['encoding']:
        print('Using encoding %s' % kwargs['encoding'])

    ckan = CKAN(**ckan_kwargs)

    if ckan.update_datastore(resource_id, source, **kwargs):
        print('Success! Resource %s uploaded.' % resource_id)
    else:
        sys.exit('ERROR: resource %s not uploaded.' % resource_id)
Esempio n. 3
0
def upload(source, resource_id=None, package_id=None, **kwargs):
    """Updates the filestore of an existing resource or creates a new one"""
    verbose = not kwargs['quiet']
    resource_id = resource_id or p.splitext(p.basename(source))[0]
    ckan_kwargs = {k: v for k, v in kwargs.items() if k in api.CKAN_KEYS}

    if package_id and verbose:
        print(
            'Creating filestore resource %s in dataset %s...' %
            (source, package_id))
    elif verbose:
        print(
            'Uploading %s to filestore resource %s...' % (source, resource_id))

    ckan = CKAN(**ckan_kwargs)

    resource_kwargs = {
        'url' if 'http' in source else 'filepath': source,
        'name': kwargs.get('name')
    }

    if package_id:
        resource = ckan.create_resource(package_id, **resource_kwargs)
    else:
        resource = ckan.update_filestore(resource_id, **resource_kwargs)

    if package_id and resource and verbose:
        infix = '%s ' % resource['id'] if resource.get('id') else ''
        print('Success! Resource %screated.' % infix)
    elif resource and verbose:
        print('Success! Resource %s updated.' % resource_id)
    elif not resource:
        sys.exit('Error uploading file!')
Esempio n. 4
0
def upload(source, resource_id=None, **kwargs):
    """Uploads a file to a datastore table"""
    verbose = not kwargs['quiet']
    resource_id = resource_id or p.splitext(p.basename(source))[0]

    if '.' in resource_id:
        resource_id = resource_id.split('.')[0]

    ckan_kwargs = {k: v for k, v in kwargs.items() if k in api.CKAN_KEYS}

    if verbose:
        print('Uploading %s to datastore resource %s...' %
              (source, resource_id))

    # read encoding from extended attributes
    x = xattr(source)

    try:
        kwargs['encoding'] = x.get('com.ckanny.encoding')
    except IOError:
        pass

    if verbose and kwargs['encoding']:
        print('Using encoding %s' % kwargs['encoding'])

    ckan = CKAN(**ckan_kwargs)

    if ckan.update_datastore(resource_id, source, **kwargs):
        print('Success! Resource %s uploaded.' % resource_id)
    else:
        sys.exit('ERROR: resource %s not uploaded.' % resource_id)
Esempio n. 5
0
def update(endpoint, **kwargs):
    """ Updates the database

    Args:
        endpoint (str): The api resource url.
        kwargs (dict): passed to CKAN constructor.

    Kwargs:
        chunk_size (int): Number of rows to process at a time (default: All).
        row_limit (int): Total number of rows to process (default: All).
        err_limit (int): Number of errors to encounter before failing
            (default: Inf).

    Returns:
        (dict): Update details
    """
    start = timer()
    pid = kwargs.pop('pid', None)
    chunk_size = kwargs.pop('chunk_size', 0)
    row_limit = kwargs.pop('row_limit', None)
    err_limit = kwargs.pop('err_limit', None)

    rows = 0
    ckan = CKAN(**kwargs)

    if pid:
        pids = [pid]
    else:
        org_show = partial(ckan.organization_show, include_datasets=True)
        orgs_basic = ckan.organization_list(permission='read')
        org_ids = it.imap(itemgetter('id'), orgs_basic)
        orgs = (org_show(id=org_id) for org_id in org_ids)
        package_lists = it.imap(itemgetter('packages'), orgs)
        pid_getter = partial(map, itemgetter('id'))
        pids = it.chain.from_iterable(it.imap(pid_getter, package_lists))

    data = gen_data(ckan, pids, kwargs.get('mock_freq'))
    errors = {}

    for records in tup.chunk(data, min(row_limit or 'inf', chunk_size)):
        rs = map(partial(patch_or_post, endpoint), records)
        rows += len(filter(lambda r: r.ok, rs))
        ids = map(itemgetter('dataset_id'), records)
        errors.update(dict((k, r.json()) for k, r in zip(ids, rs) if not r.ok))

        if row_limit and rows >= row_limit:
            break

        if err_limit and len(errors) >= err_limit:
            raise Exception(errors)

    elapsed_time = ' ,'.join(fmt_elapsed(timer() - start))
    return {'rows_added': rows, 'errors': errors, 'elapsed_time': elapsed_time}
Esempio n. 6
0
def status():
    """ Displays the current status
    """
    kwargs = {k: parse(v) for k, v in request.args.to_dict().items()}
    ckan = CKAN(**kwargs)

    resp = {
        'online': True,
        'message': 'Service for checking and updating HDX dataset ages.',
        'CKAN_instance': ckan.address,
        'version': __version__,
        'repository': c.REPO
    }

    return jsonify(**resp)
Esempio n. 7
0
def migrate(resource_id, **kwargs):
    """Copies a filestore resource from one ckan instance to another"""
    src_remote, dest_remote = kwargs['src_remote'], kwargs['dest_remote']

    if src_remote == dest_remote:
        msg = (
            'ERROR: `dest-remote` of %s is the same as `src-remote` of %s.\n'
            'The dest and src remotes must be different.\n' % (src_remote,
            dest_remote))

        sys.exit(msg)

    verbose = not kwargs['quiet']
    chunksize = kwargs['chunksize_bytes']
    ckan_kwargs = {k: v for k, v in kwargs.items() if k in api.CKAN_KEYS}
    src_ckan = CKAN(remote=src_remote, **ckan_kwargs)
    dest_ckan = CKAN(remote=dest_remote, **ckan_kwargs)

    try:
        r = src_ckan.fetch_resource(resource_id)
        filepath = NamedTemporaryFile(delete=False).name
    except api.NotAuthorized as err:
        sys.exit('ERROR: %s\n' % str(err))
    except Exception as err:
        sys.exit('ERROR: %s\n' % str(err))
    else:
        tio.write(filepath, r.raw.read(), chunksize=chunksize)
        resource = dest_ckan.update_filestore(resource_id, filepath=filepath)

        if resource and verbose:
            print('Success! Resource %s updated.' % resource_id)
        elif not resource:
            sys.exit('Error uploading file!')
    finally:
        if verbose:
            print('Removing tempfile...')

        unlink(filepath)
Esempio n. 8
0
def update(pid, **kwargs):
    """Updates a package (aka dataset)"""
    kw = ft.Objectify(kwargs, type='dataset')
    verbose = not kw.quiet
    ckan_kwargs = {k: v for k, v in kwargs.items() if k in api.CKAN_KEYS}
    ckan = CKAN(**ckan_kwargs)

    licenses = it.imap(itemgetter('id'), ckan.license_list())
    groups = ckan.group_list()

    raw_tags = filter(None, kw.tags.split(',')) if kw.tags else []
    tags = [{'state': 'active', 'name': t} for t in raw_tags]

    if kw.start:
        start = parse(str(kw.start)).strftime('%m/%d/%Y')
    else:
        date = None

    if kw.start and kw.end:
        date = '%s-%s' % (start, parse(str(kw.end)).strftime('%m/%d/%Y'))
    elif kw.start:
        date = start

    if kw.location and kw.location in set(groups):
        group_list = [{'name': kw.location}]
    elif kw.location:
        sys.exit('group name: %s not found!' % kw.location)
    else:
        group_list = []

    if kw.license_id and kw.license_id not in set(licenses):
        sys.exit('license id: %s not found!' % kw.license_id)

    package_kwargs = {
        'title': kw.title,
        'name': kw.name,
        'license_id': kw.license_id,
        'dataset_source': kw.source,
        'notes': kw.description or kw.title,
        'type': kw.type,
        'tags': tags,
        'groups': group_list,
        'dataset_date': date,
        'caveats': kw.caveats,
        'methodology': methods.get(kw.methodology, 'Other'),
        'methodology_other': methods.get(kw.methodology) or kw.methodology,
    }

    try:
        old_package = ckan.package_show(id=pid)
    except api.ValidationError as e:
        exit(e)

    if any(package_kwargs.values()):
        # combine keys by returning the last non-empty result
        pred = lambda key: True
        last = lambda pair: filter(None, pair)[-1] if any(pair) else None
        records = [old_package, package_kwargs]
        new_kwargs = pr.merge(records, pred=pred, op=last)

        if verbose:
            print('Submitting your package request.')
            pprint(new_kwargs)
            print('\n')

        package = ckan.package_update(**new_kwargs)
    else:
        package = old_package

    if kw.private:
        org = package['organization']
        ckan.package_privatize(org_id=org['id'], datasets=[package['id']])

    print(package['id'])
    print('\n')
Esempio n. 9
0
def create(org_id, **kwargs):
    """Creates a package (aka dataset)"""
    kw = ft.Objectify(kwargs, type='dataset')
    verbose = not kw.quiet
    ckan_kwargs = {k: v for k, v in kwargs.items() if k in api.CKAN_KEYS}
    ckan = CKAN(**ckan_kwargs)

    licenses = it.imap(itemgetter('id'), ckan.license_list())
    orgs = ckan.organization_list()
    org_ids = it.imap(itemgetter('id'), orgs)
    org_names = it.imap(itemgetter('name'), orgs)
    groups = ckan.group_list()
    name = kw.name or slugify(kw.title)

    raw_tags = filter(None, kw.tags.split(','))
    tags = [{'state': 'active', 'name': t} for t in raw_tags]

    if kw.start:
        start = parse(str(kw.start)).strftime('%m/%d/%Y')
    else:
        date = None

    if kw.start and kw.end:
        date = '%s-%s' % (start, parse(str(kw.end)).strftime('%m/%d/%Y'))
    elif kw.start:
        date = start

    if kw.location in set(groups):
        group_list = [{'name': kw.location}]
    elif kw.location:
        sys.exit('group name: %s not found!' % kw.location)
    else:
        group_list = []

    if org_id not in set(it.chain(org_ids, org_names)):
        sys.exit('organization id: %s not found!' % org_id)

    if kw.license_id not in set(licenses):
        sys.exit('license id: %s not found!' % kw.license_id)

    files = filter(None, kw.files.split(','))
    names = filter(None, kw.names.split(','))
    resource_list = list(it.starmap(make_rkwargs, zip(files, names))) or []

    package_kwargs = {
        'title': kw.title,
        'name': name,
        'license_id': kw.license_id,
        'owner_org': org_id,
        'dataset_source': kw.source,
        'notes': kw.description or kw.title,
        'type': kw.type,
        'tags': tags,
        'resources': resource_list,
        'package_creator': ckan.user['name'],
        'groups': group_list,
        'dataset_date': date,
        'caveats': kw.caveats,
        'methodology': methods.get(kw.methodology, 'Other'),
        'methodology_other': methods.get(kw.methodology) or kw.methodology,
    }

    if verbose:
        print('Submitting your package request.')
        pprint(package_kwargs)
        print('\n')

    try:
        package = ckan.package_create(**package_kwargs)
    except api.ValidationError as e:
        exit(e)

    if kw.private:
        org = package['organization']
        ckan.package_privatize(org_id=org['id'], datasets=[package['id']])

    if verbose:
        print('Your package response.')
        pprint(package)
        print('\n')

    print(package['id'])
    print('\n')
Esempio n. 10
0
def update(three_dub_id, topline_id=None, **kwargs):
    ckan_kwargs = {k: v for k, v in kwargs.items() if k in api.CKAN_KEYS}
    ckan = CKAN(**ckan_kwargs)
    ds.update(topline_id, ckan=ckan) if topline_id else None
    ds.update(three_dub_id, ckan=ckan)
Esempio n. 11
0
def delete(resource_id, **kwargs):
    """Deletes a datastore table"""
    ckan_kwargs = {k: v for k, v in kwargs.items() if k in api.CKAN_KEYS}
    ckan = CKAN(**ckan_kwargs)
    ckan.delete_table(resource_id, filters=kwargs.get('filters'))
Esempio n. 12
0
def update(resource_id, force=None, **kwargs):
    """Updates a datastore table based on the current filestore resource"""
    verbose = not kwargs.get('quiet')
    chunk_bytes = kwargs.get('chunk_bytes', api.CHUNKSIZE_BYTES)
    ckan_kwargs = {k: v for k, v in kwargs.items() if k in api.CKAN_KEYS}
    hash_kwargs = {'chunksize': chunk_bytes, 'verbose': verbose}
    ckan = CKAN(**ckan_kwargs)

    try:
        r = ckan.fetch_resource(resource_id)
    except (api.NotFound, api.NotAuthorized) as err:
        sys.exit('ERROR: %s\n' % str(err))
    else:
        f = SpooledTemporaryFile(suffix='.xlsx', mode='r+b')
        write_kwargs = {
            'length': r.headers.get('content-length'),
            'chunksize': chunk_bytes
        }

        tio.write(f, r.iter_content, **write_kwargs)

        try:
            old_hash = ckan.get_hash(resource_id)
        except api.NotFound as err:
            item = err.args[0]['item']

            if item == 'package':
                orgs = ckan.organization_list(permission='admin_group')
                owner_org = (
                    o['id'] for o in orgs
                    if o['display_name'] == kwargs['hash_group']).next()

                package_kwargs = {
                    'name': kwargs['hash_table'],
                    'owner_org': owner_org,
                    'package_creator': 'Hash Table',
                    'dataset_source': 'Multiple sources',
                    'notes': 'Datastore resource hash table'
                }

                ckan.hash_table_pack = ckan.package_create(**package_kwargs)

            if item in {'package', 'resource'}:
                fileobj = StringIO('datastore_id,hash\n')
                create_kwargs = {'fileobj': fileobj, 'name': api.DEF_HASH_RES}
                table = kwargs['hash_table']
                resource = ckan.create_resource(table, **create_kwargs)
                ckan.hash_table_id = resource['id']

            ckan.create_hash_table(verbose)
            old_hash = ckan.get_hash(resource_id)

        new_hash = tio.hash_file(f, **hash_kwargs)
        changed = new_hash != old_hash if old_hash else True

        if verbose:
            print(get_message(changed, force))

        if not (changed or force):
            sys.exit(0)

        kwargs['encoding'] = r.encoding
        kwargs['content_type'] = r.headers['content-type']
        updated = ckan.update_datastore(resource_id, f, **kwargs)

        if updated and verbose:
            print('Success! Resource %s updated.' % resource_id)

        if updated and changed:
            ckan.update_hash_table(resource_id, new_hash, verbose)
        elif not updated:
            sys.exit('ERROR: resource %s not updated.' % resource_id)
Esempio n. 13
0
def update(resource_id, force=None, **kwargs):
    """Updates a datastore table based on the current filestore resource"""
    verbose = not kwargs.get('quiet')
    chunk_bytes = kwargs.get('chunk_bytes', api.CHUNKSIZE_BYTES)
    ckan_kwargs = {k: v for k, v in kwargs.items() if k in api.CKAN_KEYS}
    hash_kwargs = {'chunksize': chunk_bytes, 'verbose': verbose}
    ckan = CKAN(**ckan_kwargs)

    try:
        r = ckan.fetch_resource(resource_id)
    except (api.NotFound, api.NotAuthorized) as err:
        sys.exit('ERROR: %s\n' % str(err))
    else:
        f = SpooledTemporaryFile(suffix='.xlsx', mode='r+b')
        write_kwargs = {
            'length': r.headers.get('content-length'),
            'chunksize': chunk_bytes
        }

        tio.write(f, r.iter_content, **write_kwargs)

        try:
            old_hash = ckan.get_hash(resource_id)
        except api.NotFound as err:
            item = err.args[0]['item']

            if item == 'package':
                orgs = ckan.organization_list(permission='admin_group')
                owner_org = (
                    o['id'] for o in orgs
                    if o['display_name'] == kwargs['hash_group']).next()

                package_kwargs = {
                    'name': kwargs['hash_table'],
                    'owner_org': owner_org,
                    'package_creator': 'Hash Table',
                    'dataset_source': 'Multiple sources',
                    'notes': 'Datastore resource hash table'
                }

                ckan.hash_table_pack = ckan.package_create(**package_kwargs)

            if item in {'package', 'resource'}:
                fileobj = StringIO('datastore_id,hash\n')
                create_kwargs = {'fileobj': fileobj, 'name': api.DEF_HASH_RES}
                table = kwargs['hash_table']
                resource = ckan.create_resource(table, **create_kwargs)
                ckan.hash_table_id = resource['id']

            ckan.create_hash_table(verbose)
            old_hash = ckan.get_hash(resource_id)

        new_hash = tio.hash_file(f, **hash_kwargs)
        changed = new_hash != old_hash if old_hash else True

        if verbose:
            print(get_message(changed, force))

        if not (changed or force):
            sys.exit(0)

        kwargs['encoding'] = r.encoding
        kwargs['content_type'] = r.headers['content-type']
        updated = ckan.update_datastore(resource_id, f, **kwargs)

        if updated and verbose:
            print('Success! Resource %s updated.' % resource_id)

        if updated and changed:
            ckan.update_hash_table(resource_id, new_hash, verbose)
        elif not updated:
            sys.exit('ERROR: resource %s not updated.' % resource_id)
Esempio n. 14
0
def delete(resource_id, **kwargs):
    """Deletes a datastore table"""
    ckan_kwargs = {k: v for k, v in kwargs.items() if k in api.CKAN_KEYS}
    ckan = CKAN(**ckan_kwargs)
    ckan.delete_table(resource_id, filters=kwargs.get('filters'))
Esempio n. 15
0
def create(org_id, **kwargs):
    """Creates a package (aka dataset)"""
    kw = ft.Objectify(kwargs, type='dataset')
    verbose = not kw.quiet
    ckan_kwargs = {k: v for k, v in kwargs.items() if k in api.CKAN_KEYS}
    ckan = CKAN(**ckan_kwargs)

    licenses = it.imap(itemgetter('id'), ckan.license_list())
    orgs = ckan.organization_list()
    org_ids = it.imap(itemgetter('id'), orgs)
    org_names = it.imap(itemgetter('name'), orgs)
    groups = ckan.group_list()
    name = kw.name or slugify(kw.title)

    raw_tags = filter(None, kw.tags.split(','))
    tags = [{'state': 'active', 'name': t} for t in raw_tags]

    if kw.start:
        start = parse(str(kw.start)).strftime('%m/%d/%Y')
    else:
        date = None

    if kw.start and kw.end:
        date = '%s-%s' % (start, parse(str(kw.end)).strftime('%m/%d/%Y'))
    elif kw.start:
        date = start

    if kw.location in set(groups):
        group_list = [{'name': kw.location}]
    elif kw.location:
        sys.exit('group name: %s not found!' % kw.location)
    else:
        group_list = []

    if org_id not in set(it.chain(org_ids, org_names)):
        sys.exit('organization id: %s not found!' % org_id)

    if kw.license_id not in set(licenses):
        sys.exit('license id: %s not found!' % kw.license_id)

    files = filter(None, kw.files.split(','))
    names = filter(None, kw.names.split(','))
    resource_list = list(it.starmap(make_rkwargs, zip(files, names))) or []

    package_kwargs = {
        'title': kw.title,
        'name': name,
        'license_id': kw.license_id,
        'owner_org': org_id,
        'dataset_source': kw.source,
        'notes': kw.description or kw.title,
        'type': kw.type,
        'tags': tags,
        'resources': resource_list,
        'package_creator': ckan.user['name'],
        'groups': group_list,
        'dataset_date': date,
        'caveats': kw.caveats,
        'methodology': methods.get(kw.methodology, 'Other'),
        'methodology_other': methods.get(kw.methodology) or kw.methodology,
    }

    if verbose:
        print('Submitting your package request.')
        pprint(package_kwargs)
        print('\n')

    try:
        package = ckan.package_create(**package_kwargs)
    except api.ValidationError as e:
        exit(e)

    if kw.private:
        org = package['organization']
        ckan.package_privatize(org_id=org['id'], datasets=[package['id']])

    if verbose:
        print('Your package response.')
        pprint(package)
        print('\n')

    print(package['id'])
    print('\n')
Esempio n. 16
0
def customize(org_id, **kwargs):
    """Introspects custom organization values"""
    verbose = not kwargs['quiet']
    ckan_kwargs = {k: v for k, v in kwargs.items() if k in api.CKAN_KEYS}
    image_sq = kwargs.get('image_sq')
    image_rect = kwargs.get('image_rect')
    sanitize = kwargs.get('sanitize')
    three_dub_id = kwargs.get('3w')
    geojson_id = kwargs.get('geojson')
    topline_id = kwargs.get('topline')

    ckan = CKAN(**ckan_kwargs)
    organization = ckan.organization_show(id=org_id, include_datasets=True)
    org_packages = organization['packages']
    hdx = ckan.organization_show(id='hdx', include_datasets=True)
    extras = {e['key']: e['value'] for e in organization['extras']}

    if three_dub_id:
        three_dub_set_id = ckan.get_package_id(three_dub_id)
    else:
        ids = ckan.find_ids(org_packages, pnamed='3w', ptagged='3w')
        three_dub_set_id = ids['pname']
        three_dub_id = ids['rid']

    if not three_dub_id:
        sys.exit(1)

    if not topline_id:
        topline_id = ckan.find_ids(org_packages, pnamed='topline')['rid']

    if geojson_id:
        geojson_set_id = ckan.get_package_id(geojson_id)
    else:
        country = org_id.split('-')[1]
        hkwargs = {'pnamed': 'json-repository', 'rnamed': country}
        ids = ckan.find_ids(hdx['packages'], **hkwargs)
        geojson_set_id = ids['pname']
        geojson_id = ids['rid']

    viz_url = '%s/dataset/%s' % (kwargs['remote'], three_dub_set_id)
    three_dub_r = ckan.fetch_resource(three_dub_id)
    _fields = three_dub_r.iter_lines().next().split(',')
    three_dub_fields = tup.underscorify(_fields) if sanitize else _fields

    if geojson_id:
        geojson_r = ckan.fetch_resource(geojson_id)
        geojson_fields = geojson_r.json()['features'][0]['properties'].keys()
    else:
        geojson_fields = []

    if verbose:
        print('3w fields:')
        pprint(three_dub_fields)
        print('geojson fields:')
        pprint(geojson_fields)

    def_where = tup.find(three_dub_fields, geojson_fields) or ''
    who_column = find_field(three_dub_fields, 'who', **kwargs)
    what_column = find_field(three_dub_fields, 'what', **kwargs)
    where_column = find_field(three_dub_fields, 'where', def_where, **kwargs)

    where_column_2 = find_field(geojson_fields, 'where', def_where, **kwargs)
    name_column = kwargs.get('where') or def_where

    if 'http' not in image_sq:
        gdocs = 'https://docs.google.com'
        image_sq = '%s/uc?id=%s&export=download' % (gdocs, image_sq)

    if 'http' not in image_rect:
        gdocs = 'https://docs.google.com'
        image_rect = '%s/uc?id=%s&export=download' % (gdocs, image_rect)

    data = {
        'name': org_id,
        'resource_id_1': three_dub_id,
        'resource_id_2': geojson_id,
        'topline_resource': topline_id,
        'datatype_1': kwargs.get('datatype_1') or 'datastore',
        'datatype_2': kwargs.get('datatype_2') or 'filestore',
        'org_url': extras['org_url'],
        'description': organization['description'],
        'title': organization['title'],
        'image_sq': image_sq,
        'image_rect': image_rect,
        'highlight_color': kwargs.get('color'),
        'dataset_id_1': three_dub_set_id,
        'dataset_id_2': geojson_set_id,
        'who_column': deref_field(three_dub_fields, who_column),
        'what_column': deref_field(three_dub_fields, what_column),
        'where_column': deref_field(three_dub_fields, where_column),
        'where_column_2': deref_field(geojson_fields, where_column_2),
        'map_district_name_column': deref_field(geojson_fields, name_column),
        'viz_data_link_url': viz_url,
        'visualization_select': kwargs.get('viz_type', '3W-dashboard'),
        'viz_title': kwargs.get('viz_title', "Who's doing what and where?"),
        'colors': [
            '#c6d5ed', '#95b5df', '#659ad2', '#026bb5',
            '#659ad2', '#213b68', '#101d4e', '#000035'],
        'use_org_color': True,
        'modified_at': int(time()),
    }

    control_sheet_data = [data[k] for k in control_sheet_keys]

    if verbose:
        print('\nCustom pages control sheet data:')
        print(control_sheet_data)

    return control_sheet_data
Esempio n. 17
0
def update(pid, **kwargs):
    """Updates a package (aka dataset)"""
    kw = ft.Objectify(kwargs, type='dataset')
    verbose = not kw.quiet
    ckan_kwargs = {k: v for k, v in kwargs.items() if k in api.CKAN_KEYS}
    ckan = CKAN(**ckan_kwargs)

    licenses = it.imap(itemgetter('id'), ckan.license_list())
    groups = ckan.group_list()

    raw_tags = filter(None, kw.tags.split(',')) if kw.tags else []
    tags = [{'state': 'active', 'name': t} for t in raw_tags]

    if kw.start:
        start = parse(str(kw.start)).strftime('%m/%d/%Y')
    else:
        date = None

    if kw.start and kw.end:
        date = '%s-%s' % (start, parse(str(kw.end)).strftime('%m/%d/%Y'))
    elif kw.start:
        date = start

    if kw.location and kw.location in set(groups):
        group_list = [{'name': kw.location}]
    elif kw.location:
        sys.exit('group name: %s not found!' % kw.location)
    else:
        group_list = []

    if kw.license_id and kw.license_id not in set(licenses):
        sys.exit('license id: %s not found!' % kw.license_id)

    package_kwargs = {
        'title': kw.title,
        'name': kw.name,
        'license_id': kw.license_id,
        'dataset_source': kw.source,
        'notes': kw.description or kw.title,
        'type': kw.type,
        'tags': tags,
        'groups': group_list,
        'dataset_date': date,
        'caveats': kw.caveats,
        'methodology': methods.get(kw.methodology, 'Other'),
        'methodology_other': methods.get(kw.methodology) or kw.methodology,
    }

    try:
        old_package = ckan.package_show(id=pid)
    except api.ValidationError as e:
        exit(e)

    if any(package_kwargs.values()):
        # combine keys by returning the last non-empty result
        pred = lambda key: True
        last = lambda pair: filter(None, pair)[-1] if any(pair) else None
        records = [old_package, package_kwargs]
        new_kwargs = pr.merge(records, pred=pred, op=last)

        if verbose:
            print('Submitting your package request.')
            pprint(new_kwargs)
            print('\n')

        package = ckan.package_update(**new_kwargs)
    else:
        package = old_package

    if kw.private:
        org = package['organization']
        ckan.package_privatize(org_id=org['id'], datasets=[package['id']])

    print(package['id'])
    print('\n')
Esempio n. 18
0
def customize(org_id, **kwargs):
    """Introspects custom organization values"""
    verbose = not kwargs['quiet']
    ckan_kwargs = {k: v for k, v in kwargs.items() if k in api.CKAN_KEYS}
    image_sq = kwargs.get('image_sq')
    image_rect = kwargs.get('image_rect')
    sanitize = kwargs.get('sanitize')
    three_dub_id = kwargs.get('3w')
    geojson_id = kwargs.get('geojson')
    topline_id = kwargs.get('topline')

    ckan = CKAN(**ckan_kwargs)
    organization = ckan.organization_show(id=org_id, include_datasets=True)
    org_packages = organization['packages']
    hdx = ckan.organization_show(id='hdx', include_datasets=True)
    extras = {e['key']: e['value'] for e in organization['extras']}

    if three_dub_id:
        three_dub_set_id = ckan.get_package_id(three_dub_id)
    else:
        ids = ckan.find_ids(org_packages, pnamed='3w', ptagged='3w')
        three_dub_set_id = ids['pname']
        three_dub_id = ids['rid']

    if not three_dub_id:
        sys.exit(1)

    if not topline_id:
        topline_id = ckan.find_ids(org_packages, pnamed='topline')['rid']

    if geojson_id:
        geojson_set_id = ckan.get_package_id(geojson_id)
    else:
        country = org_id.split('-')[1]
        hkwargs = {'pnamed': 'json-repository', 'rnamed': country}
        ids = ckan.find_ids(hdx['packages'], **hkwargs)
        geojson_set_id = ids['pname']
        geojson_id = ids['rid']

    viz_url = '%s/dataset/%s' % (kwargs['remote'], three_dub_set_id)
    three_dub_r = ckan.fetch_resource(three_dub_id)
    _fields = three_dub_r.iter_lines().next().split(',')
    three_dub_fields = tup.underscorify(_fields) if sanitize else _fields

    if geojson_id:
        geojson_r = ckan.fetch_resource(geojson_id)
        geojson_fields = geojson_r.json()['features'][0]['properties'].keys()
    else:
        geojson_fields = []

    if verbose:
        print('3w fields:')
        pprint(three_dub_fields)
        print('geojson fields:')
        pprint(geojson_fields)

    def_where = tup.find(three_dub_fields, geojson_fields) or ''
    who_column = find_field(three_dub_fields, 'who', **kwargs)
    what_column = find_field(three_dub_fields, 'what', **kwargs)
    where_column = find_field(three_dub_fields, 'where', def_where, **kwargs)

    where_column_2 = find_field(geojson_fields, 'where', def_where, **kwargs)
    name_column = kwargs.get('where') or def_where

    if 'http' not in image_sq:
        gdocs = 'https://docs.google.com'
        image_sq = '%s/uc?id=%s&export=download' % (gdocs, image_sq)

    if 'http' not in image_rect:
        gdocs = 'https://docs.google.com'
        image_rect = '%s/uc?id=%s&export=download' % (gdocs, image_rect)

    data = {
        'name':
        org_id,
        'resource_id_1':
        three_dub_id,
        'resource_id_2':
        geojson_id,
        'topline_resource':
        topline_id,
        'datatype_1':
        kwargs.get('datatype_1') or 'datastore',
        'datatype_2':
        kwargs.get('datatype_2') or 'filestore',
        'org_url':
        extras['org_url'],
        'description':
        organization['description'],
        'title':
        organization['title'],
        'image_sq':
        image_sq,
        'image_rect':
        image_rect,
        'highlight_color':
        kwargs.get('color'),
        'dataset_id_1':
        three_dub_set_id,
        'dataset_id_2':
        geojson_set_id,
        'who_column':
        deref_field(three_dub_fields, who_column),
        'what_column':
        deref_field(three_dub_fields, what_column),
        'where_column':
        deref_field(three_dub_fields, where_column),
        'where_column_2':
        deref_field(geojson_fields, where_column_2),
        'map_district_name_column':
        deref_field(geojson_fields, name_column),
        'viz_data_link_url':
        viz_url,
        'visualization_select':
        kwargs.get('viz_type', '3W-dashboard'),
        'viz_title':
        kwargs.get('viz_title', "Who's doing what and where?"),
        'colors': [
            '#c6d5ed', '#95b5df', '#659ad2', '#026bb5', '#659ad2', '#213b68',
            '#101d4e', '#000035'
        ],
        'use_org_color':
        True,
        'modified_at':
        int(time()),
    }

    control_sheet_data = [data[k] for k in control_sheet_keys]

    if verbose:
        print('\nCustom pages control sheet data:')
        print(control_sheet_data)

    return control_sheet_data