def fetch(resource_id, **kwargs): """Downloads a filestore resource""" verbose = not kwargs['quiet'] filepath = kwargs['destination'] name_from_id = kwargs.get('name_from_id') chunksize = kwargs.get('chunksize_bytes') ckan_kwargs = {k: v for k, v in kwargs.items() if k in api.CKAN_KEYS} ckan = CKAN(**ckan_kwargs) try: r = ckan.fetch_resource(resource_id) except api.NotAuthorized as err: sys.exit('ERROR: %s\n' % str(err)) else: fkwargs = { 'headers': r.headers, 'name_from_id': name_from_id, 'resource_id': resource_id} filepath = tup.make_filepath(filepath, **fkwargs) tio.write(filepath, r.iter_content, chunksize=chunksize) # save encoding to extended attributes x = xattr(filepath) if verbose and r.encoding: print('saving encoding %s to extended attributes' % r.encoding) if r.encoding: x['com.ckanny.encoding'] = r.encoding print(filepath)
def test_write(self): url = "http://google.com" body = '<!doctype html><html itemtype="http://schema.org/page">' content = StringIO("Iñtërnâtiônàližætiøn") nt.assert_equal(20, io.write(StringIO(), content)) content.seek(0) nt.assert_equal(28, io.write(TemporaryFile(), content)) content = io.IterStringIO(iter("Hello World")) nt.assert_equal(12, io.write(TemporaryFile(), content, chunksize=2)) responses.add(responses.GET, url=url, body=body) r = requests.get(url, stream=True) nt.assert_equal(55, io.write(TemporaryFile(), r.iter_content))
def run(): if args.debug: pprint(dict(args._get_kwargs())) exit(0) if args.version: from . import __version__ as version print('v%s' % version) exit(0) mapping = import_module('csv2ofx.mappings.%s' % args.mapping).mapping okwargs = { 'def_type': args.account_type or 'Bank' if args.qif else 'CHECKING', 'split_header': args.split, 'start': parse(args.start), 'end': parse(args.end) } cont = QIF(mapping, **okwargs) if args.qif else OFX(mapping, **okwargs) records = read_csv(args.source, has_header=cont.has_header) groups = cont.gen_groups(records, args.chunksize) trxns = cont.gen_trxns(groups, args.collapse) cleaned_trxns = cont.clean_trxns(trxns) data = utils.gen_data(cleaned_trxns) body = cont.gen_body(data) try: mtime = p.getmtime(args.source.name) except AttributeError: mtime = time.time() server_date = dt.fromtimestamp(mtime) header = cont.header(date=server_date, language=args.language) footer = cont.footer(date=server_date) content = it.chain([header, body, footer]) kwargs = {'overwrite': args.overwrite, 'chunksize': args.chunksize} try: write(args.dest, IterStringIO(content), **kwargs) except TypeError as e: msg = str(e) if not args.collapse: msg += 'Try again with `-c` option.' exit(msg)
def migrate(resource_id, **kwargs): """Copies a filestore resource from one ckan instance to another""" src_remote, dest_remote = kwargs['src_remote'], kwargs['dest_remote'] if src_remote == dest_remote: msg = ( 'ERROR: `dest-remote` of %s is the same as `src-remote` of %s.\n' 'The dest and src remotes must be different.\n' % (src_remote, dest_remote)) sys.exit(msg) verbose = not kwargs['quiet'] chunksize = kwargs['chunksize_bytes'] ckan_kwargs = {k: v for k, v in kwargs.items() if k in api.CKAN_KEYS} src_ckan = CKAN(remote=src_remote, **ckan_kwargs) dest_ckan = CKAN(remote=dest_remote, **ckan_kwargs) try: r = src_ckan.fetch_resource(resource_id) filepath = NamedTemporaryFile(delete=False).name except api.NotAuthorized as err: sys.exit('ERROR: %s\n' % str(err)) except Exception as err: sys.exit('ERROR: %s\n' % str(err)) else: tio.write(filepath, r.raw.read(), chunksize=chunksize) resource = dest_ckan.update_filestore(resource_id, filepath=filepath) if resource and verbose: print('Success! Resource %s updated.' % resource_id) elif not resource: sys.exit('Error uploading file!') finally: if verbose: print('Removing tempfile...') unlink(filepath)
def update(resource_id, force=None, **kwargs): """Updates a datastore table based on the current filestore resource""" verbose = not kwargs.get('quiet') chunk_bytes = kwargs.get('chunk_bytes', api.CHUNKSIZE_BYTES) ckan_kwargs = {k: v for k, v in kwargs.items() if k in api.CKAN_KEYS} hash_kwargs = {'chunksize': chunk_bytes, 'verbose': verbose} ckan = CKAN(**ckan_kwargs) try: r = ckan.fetch_resource(resource_id) except (api.NotFound, api.NotAuthorized) as err: sys.exit('ERROR: %s\n' % str(err)) else: f = SpooledTemporaryFile(suffix='.xlsx', mode='r+b') write_kwargs = { 'length': r.headers.get('content-length'), 'chunksize': chunk_bytes } tio.write(f, r.iter_content, **write_kwargs) try: old_hash = ckan.get_hash(resource_id) except api.NotFound as err: item = err.args[0]['item'] if item == 'package': orgs = ckan.organization_list(permission='admin_group') owner_org = ( o['id'] for o in orgs if o['display_name'] == kwargs['hash_group']).next() package_kwargs = { 'name': kwargs['hash_table'], 'owner_org': owner_org, 'package_creator': 'Hash Table', 'dataset_source': 'Multiple sources', 'notes': 'Datastore resource hash table' } ckan.hash_table_pack = ckan.package_create(**package_kwargs) if item in {'package', 'resource'}: fileobj = StringIO('datastore_id,hash\n') create_kwargs = {'fileobj': fileobj, 'name': api.DEF_HASH_RES} table = kwargs['hash_table'] resource = ckan.create_resource(table, **create_kwargs) ckan.hash_table_id = resource['id'] ckan.create_hash_table(verbose) old_hash = ckan.get_hash(resource_id) new_hash = tio.hash_file(f, **hash_kwargs) changed = new_hash != old_hash if old_hash else True if verbose: print(get_message(changed, force)) if not (changed or force): sys.exit(0) kwargs['encoding'] = r.encoding kwargs['content_type'] = r.headers['content-type'] updated = ckan.update_datastore(resource_id, f, **kwargs) if updated and verbose: print('Success! Resource %s updated.' % resource_id) if updated and changed: ckan.update_hash_table(resource_id, new_hash, verbose) elif not updated: sys.exit('ERROR: resource %s not updated.' % resource_id)