def fetch(resource_id, **kwargs): """Downloads a filestore resource""" verbose = not kwargs['quiet'] filepath = kwargs['destination'] name_from_id = kwargs.get('name_from_id') chunksize = kwargs.get('chunksize_bytes') ckan_kwargs = {k: v for k, v in kwargs.items() if k in api.CKAN_KEYS} ckan = CKAN(**ckan_kwargs) try: r = ckan.fetch_resource(resource_id) except api.NotAuthorized as err: sys.exit('ERROR: %s\n' % str(err)) else: fkwargs = { 'headers': r.headers, 'name_from_id': name_from_id, 'resource_id': resource_id} filepath = tup.make_filepath(filepath, **fkwargs) tio.write(filepath, r.iter_content, chunksize=chunksize) # save encoding to extended attributes x = xattr(filepath) if verbose and r.encoding: print('saving encoding %s to extended attributes' % r.encoding) if r.encoding: x['com.ckanny.encoding'] = r.encoding print(filepath)
def migrate(resource_id, **kwargs): """Copies a filestore resource from one ckan instance to another""" src_remote, dest_remote = kwargs['src_remote'], kwargs['dest_remote'] if src_remote == dest_remote: msg = ( 'ERROR: `dest-remote` of %s is the same as `src-remote` of %s.\n' 'The dest and src remotes must be different.\n' % (src_remote, dest_remote)) sys.exit(msg) verbose = not kwargs['quiet'] chunksize = kwargs['chunksize_bytes'] ckan_kwargs = {k: v for k, v in kwargs.items() if k in api.CKAN_KEYS} src_ckan = CKAN(remote=src_remote, **ckan_kwargs) dest_ckan = CKAN(remote=dest_remote, **ckan_kwargs) try: r = src_ckan.fetch_resource(resource_id) filepath = NamedTemporaryFile(delete=False).name except api.NotAuthorized as err: sys.exit('ERROR: %s\n' % str(err)) except Exception as err: sys.exit('ERROR: %s\n' % str(err)) else: tio.write(filepath, r.raw.read(), chunksize=chunksize) resource = dest_ckan.update_filestore(resource_id, filepath=filepath) if resource and verbose: print('Success! Resource %s updated.' % resource_id) elif not resource: sys.exit('Error uploading file!') finally: if verbose: print('Removing tempfile...') unlink(filepath)
def customize(org_id, **kwargs): """Introspects custom organization values""" verbose = not kwargs['quiet'] ckan_kwargs = {k: v for k, v in kwargs.items() if k in api.CKAN_KEYS} image_sq = kwargs.get('image_sq') image_rect = kwargs.get('image_rect') sanitize = kwargs.get('sanitize') three_dub_id = kwargs.get('3w') geojson_id = kwargs.get('geojson') topline_id = kwargs.get('topline') ckan = CKAN(**ckan_kwargs) organization = ckan.organization_show(id=org_id, include_datasets=True) org_packages = organization['packages'] hdx = ckan.organization_show(id='hdx', include_datasets=True) extras = {e['key']: e['value'] for e in organization['extras']} if three_dub_id: three_dub_set_id = ckan.get_package_id(three_dub_id) else: ids = ckan.find_ids(org_packages, pnamed='3w', ptagged='3w') three_dub_set_id = ids['pname'] three_dub_id = ids['rid'] if not three_dub_id: sys.exit(1) if not topline_id: topline_id = ckan.find_ids(org_packages, pnamed='topline')['rid'] if geojson_id: geojson_set_id = ckan.get_package_id(geojson_id) else: country = org_id.split('-')[1] hkwargs = {'pnamed': 'json-repository', 'rnamed': country} ids = ckan.find_ids(hdx['packages'], **hkwargs) geojson_set_id = ids['pname'] geojson_id = ids['rid'] viz_url = '%s/dataset/%s' % (kwargs['remote'], three_dub_set_id) three_dub_r = ckan.fetch_resource(three_dub_id) _fields = three_dub_r.iter_lines().next().split(',') three_dub_fields = tup.underscorify(_fields) if sanitize else _fields if geojson_id: geojson_r = ckan.fetch_resource(geojson_id) geojson_fields = geojson_r.json()['features'][0]['properties'].keys() else: geojson_fields = [] if verbose: print('3w fields:') pprint(three_dub_fields) print('geojson fields:') pprint(geojson_fields) def_where = tup.find(three_dub_fields, geojson_fields) or '' who_column = find_field(three_dub_fields, 'who', **kwargs) what_column = find_field(three_dub_fields, 'what', **kwargs) where_column = find_field(three_dub_fields, 'where', def_where, **kwargs) where_column_2 = find_field(geojson_fields, 'where', def_where, **kwargs) name_column = kwargs.get('where') or def_where if 'http' not in image_sq: gdocs = 'https://docs.google.com' image_sq = '%s/uc?id=%s&export=download' % (gdocs, image_sq) if 'http' not in image_rect: gdocs = 'https://docs.google.com' image_rect = '%s/uc?id=%s&export=download' % (gdocs, image_rect) data = { 'name': org_id, 'resource_id_1': three_dub_id, 'resource_id_2': geojson_id, 'topline_resource': topline_id, 'datatype_1': kwargs.get('datatype_1') or 'datastore', 'datatype_2': kwargs.get('datatype_2') or 'filestore', 'org_url': extras['org_url'], 'description': organization['description'], 'title': organization['title'], 'image_sq': image_sq, 'image_rect': image_rect, 'highlight_color': kwargs.get('color'), 'dataset_id_1': three_dub_set_id, 'dataset_id_2': geojson_set_id, 'who_column': deref_field(three_dub_fields, who_column), 'what_column': deref_field(three_dub_fields, what_column), 'where_column': deref_field(three_dub_fields, where_column), 'where_column_2': deref_field(geojson_fields, where_column_2), 'map_district_name_column': deref_field(geojson_fields, name_column), 'viz_data_link_url': viz_url, 'visualization_select': kwargs.get('viz_type', '3W-dashboard'), 'viz_title': kwargs.get('viz_title', "Who's doing what and where?"), 'colors': [ '#c6d5ed', '#95b5df', '#659ad2', '#026bb5', '#659ad2', '#213b68', '#101d4e', '#000035'], 'use_org_color': True, 'modified_at': int(time()), } control_sheet_data = [data[k] for k in control_sheet_keys] if verbose: print('\nCustom pages control sheet data:') print(control_sheet_data) return control_sheet_data
def update(resource_id, force=None, **kwargs): """Updates a datastore table based on the current filestore resource""" verbose = not kwargs.get('quiet') chunk_bytes = kwargs.get('chunk_bytes', api.CHUNKSIZE_BYTES) ckan_kwargs = {k: v for k, v in kwargs.items() if k in api.CKAN_KEYS} hash_kwargs = {'chunksize': chunk_bytes, 'verbose': verbose} ckan = CKAN(**ckan_kwargs) try: r = ckan.fetch_resource(resource_id) except (api.NotFound, api.NotAuthorized) as err: sys.exit('ERROR: %s\n' % str(err)) else: f = SpooledTemporaryFile(suffix='.xlsx', mode='r+b') write_kwargs = { 'length': r.headers.get('content-length'), 'chunksize': chunk_bytes } tio.write(f, r.iter_content, **write_kwargs) try: old_hash = ckan.get_hash(resource_id) except api.NotFound as err: item = err.args[0]['item'] if item == 'package': orgs = ckan.organization_list(permission='admin_group') owner_org = ( o['id'] for o in orgs if o['display_name'] == kwargs['hash_group']).next() package_kwargs = { 'name': kwargs['hash_table'], 'owner_org': owner_org, 'package_creator': 'Hash Table', 'dataset_source': 'Multiple sources', 'notes': 'Datastore resource hash table' } ckan.hash_table_pack = ckan.package_create(**package_kwargs) if item in {'package', 'resource'}: fileobj = StringIO('datastore_id,hash\n') create_kwargs = {'fileobj': fileobj, 'name': api.DEF_HASH_RES} table = kwargs['hash_table'] resource = ckan.create_resource(table, **create_kwargs) ckan.hash_table_id = resource['id'] ckan.create_hash_table(verbose) old_hash = ckan.get_hash(resource_id) new_hash = tio.hash_file(f, **hash_kwargs) changed = new_hash != old_hash if old_hash else True if verbose: print(get_message(changed, force)) if not (changed or force): sys.exit(0) kwargs['encoding'] = r.encoding kwargs['content_type'] = r.headers['content-type'] updated = ckan.update_datastore(resource_id, f, **kwargs) if updated and verbose: print('Success! Resource %s updated.' % resource_id) if updated and changed: ckan.update_hash_table(resource_id, new_hash, verbose) elif not updated: sys.exit('ERROR: resource %s not updated.' % resource_id)
def customize(org_id, **kwargs): """Introspects custom organization values""" verbose = not kwargs['quiet'] ckan_kwargs = {k: v for k, v in kwargs.items() if k in api.CKAN_KEYS} image_sq = kwargs.get('image_sq') image_rect = kwargs.get('image_rect') sanitize = kwargs.get('sanitize') three_dub_id = kwargs.get('3w') geojson_id = kwargs.get('geojson') topline_id = kwargs.get('topline') ckan = CKAN(**ckan_kwargs) organization = ckan.organization_show(id=org_id, include_datasets=True) org_packages = organization['packages'] hdx = ckan.organization_show(id='hdx', include_datasets=True) extras = {e['key']: e['value'] for e in organization['extras']} if three_dub_id: three_dub_set_id = ckan.get_package_id(three_dub_id) else: ids = ckan.find_ids(org_packages, pnamed='3w', ptagged='3w') three_dub_set_id = ids['pname'] three_dub_id = ids['rid'] if not three_dub_id: sys.exit(1) if not topline_id: topline_id = ckan.find_ids(org_packages, pnamed='topline')['rid'] if geojson_id: geojson_set_id = ckan.get_package_id(geojson_id) else: country = org_id.split('-')[1] hkwargs = {'pnamed': 'json-repository', 'rnamed': country} ids = ckan.find_ids(hdx['packages'], **hkwargs) geojson_set_id = ids['pname'] geojson_id = ids['rid'] viz_url = '%s/dataset/%s' % (kwargs['remote'], three_dub_set_id) three_dub_r = ckan.fetch_resource(three_dub_id) _fields = three_dub_r.iter_lines().next().split(',') three_dub_fields = tup.underscorify(_fields) if sanitize else _fields if geojson_id: geojson_r = ckan.fetch_resource(geojson_id) geojson_fields = geojson_r.json()['features'][0]['properties'].keys() else: geojson_fields = [] if verbose: print('3w fields:') pprint(three_dub_fields) print('geojson fields:') pprint(geojson_fields) def_where = tup.find(three_dub_fields, geojson_fields) or '' who_column = find_field(three_dub_fields, 'who', **kwargs) what_column = find_field(three_dub_fields, 'what', **kwargs) where_column = find_field(three_dub_fields, 'where', def_where, **kwargs) where_column_2 = find_field(geojson_fields, 'where', def_where, **kwargs) name_column = kwargs.get('where') or def_where if 'http' not in image_sq: gdocs = 'https://docs.google.com' image_sq = '%s/uc?id=%s&export=download' % (gdocs, image_sq) if 'http' not in image_rect: gdocs = 'https://docs.google.com' image_rect = '%s/uc?id=%s&export=download' % (gdocs, image_rect) data = { 'name': org_id, 'resource_id_1': three_dub_id, 'resource_id_2': geojson_id, 'topline_resource': topline_id, 'datatype_1': kwargs.get('datatype_1') or 'datastore', 'datatype_2': kwargs.get('datatype_2') or 'filestore', 'org_url': extras['org_url'], 'description': organization['description'], 'title': organization['title'], 'image_sq': image_sq, 'image_rect': image_rect, 'highlight_color': kwargs.get('color'), 'dataset_id_1': three_dub_set_id, 'dataset_id_2': geojson_set_id, 'who_column': deref_field(three_dub_fields, who_column), 'what_column': deref_field(three_dub_fields, what_column), 'where_column': deref_field(three_dub_fields, where_column), 'where_column_2': deref_field(geojson_fields, where_column_2), 'map_district_name_column': deref_field(geojson_fields, name_column), 'viz_data_link_url': viz_url, 'visualization_select': kwargs.get('viz_type', '3W-dashboard'), 'viz_title': kwargs.get('viz_title', "Who's doing what and where?"), 'colors': [ '#c6d5ed', '#95b5df', '#659ad2', '#026bb5', '#659ad2', '#213b68', '#101d4e', '#000035' ], 'use_org_color': True, 'modified_at': int(time()), } control_sheet_data = [data[k] for k in control_sheet_keys] if verbose: print('\nCustom pages control sheet data:') print(control_sheet_data) return control_sheet_data