Esempio n. 1
0
def fetch(resource_id, **kwargs):
    """Downloads a filestore resource"""
    verbose = not kwargs['quiet']
    filepath = kwargs['destination']
    name_from_id = kwargs.get('name_from_id')
    chunksize = kwargs.get('chunksize_bytes')
    ckan_kwargs = {k: v for k, v in kwargs.items() if k in api.CKAN_KEYS}
    ckan = CKAN(**ckan_kwargs)

    try:
        r = ckan.fetch_resource(resource_id)
    except api.NotAuthorized as err:
        sys.exit('ERROR: %s\n' % str(err))
    else:
        fkwargs = {
            'headers': r.headers,
            'name_from_id': name_from_id,
            'resource_id': resource_id}

        filepath = tup.make_filepath(filepath, **fkwargs)
        tio.write(filepath, r.iter_content, chunksize=chunksize)

        # save encoding to extended attributes
        x = xattr(filepath)

        if verbose and r.encoding:
            print('saving encoding %s to extended attributes' % r.encoding)

        if r.encoding:
            x['com.ckanny.encoding'] = r.encoding

        print(filepath)
Esempio n. 2
0
    def test_write(self):
        url = "http://google.com"
        body = '<!doctype html><html itemtype="http://schema.org/page">'
        content = StringIO("Iñtërnâtiônàližætiøn")
        nt.assert_equal(20, io.write(StringIO(), content))
        content.seek(0)
        nt.assert_equal(28, io.write(TemporaryFile(), content))

        content = io.IterStringIO(iter("Hello World"))
        nt.assert_equal(12, io.write(TemporaryFile(), content, chunksize=2))

        responses.add(responses.GET, url=url, body=body)
        r = requests.get(url, stream=True)
        nt.assert_equal(55, io.write(TemporaryFile(), r.iter_content))
Esempio n. 3
0
def run():
    if args.debug:
        pprint(dict(args._get_kwargs()))
        exit(0)

    if args.version:
        from . import __version__ as version
        print('v%s' % version)
        exit(0)

    mapping = import_module('csv2ofx.mappings.%s' % args.mapping).mapping

    okwargs = {
        'def_type': args.account_type or 'Bank' if args.qif else 'CHECKING',
        'split_header': args.split,
        'start': parse(args.start),
        'end': parse(args.end)
    }

    cont = QIF(mapping, **okwargs) if args.qif else OFX(mapping, **okwargs)
    records = read_csv(args.source, has_header=cont.has_header)
    groups = cont.gen_groups(records, args.chunksize)
    trxns = cont.gen_trxns(groups, args.collapse)
    cleaned_trxns = cont.clean_trxns(trxns)
    data = utils.gen_data(cleaned_trxns)
    body = cont.gen_body(data)

    try:
        mtime = p.getmtime(args.source.name)
    except AttributeError:
        mtime = time.time()

    server_date = dt.fromtimestamp(mtime)
    header = cont.header(date=server_date, language=args.language)
    footer = cont.footer(date=server_date)
    content = it.chain([header, body, footer])
    kwargs = {'overwrite': args.overwrite, 'chunksize': args.chunksize}

    try:
        write(args.dest, IterStringIO(content), **kwargs)
    except TypeError as e:
        msg = str(e)

        if not args.collapse:
            msg += 'Try again with `-c` option.'

        exit(msg)
Esempio n. 4
0
File: main.py Progetto: Drey/csv2ofx
def run():
    if args.debug:
        pprint(dict(args._get_kwargs()))
        exit(0)

    if args.version:
        from . import __version__ as version
        print('v%s' % version)
        exit(0)

    mapping = import_module('csv2ofx.mappings.%s' % args.mapping).mapping

    okwargs = {
        'def_type': args.account_type or 'Bank' if args.qif else 'CHECKING',
        'split_header': args.split,
        'start': parse(args.start),
        'end': parse(args.end)
    }

    cont = QIF(mapping, **okwargs) if args.qif else OFX(mapping, **okwargs)
    records = read_csv(args.source, has_header=cont.has_header)
    groups = cont.gen_groups(records, args.chunksize)
    trxns = cont.gen_trxns(groups, args.collapse)
    cleaned_trxns = cont.clean_trxns(trxns)
    data = utils.gen_data(cleaned_trxns)
    body = cont.gen_body(data)

    try:
        mtime = p.getmtime(args.source.name)
    except AttributeError:
        mtime = time.time()

    server_date = dt.fromtimestamp(mtime)
    header = cont.header(date=server_date, language=args.language)
    footer = cont.footer(date=server_date)
    content = it.chain([header, body, footer])
    kwargs = {'overwrite': args.overwrite, 'chunksize': args.chunksize}

    try:
        write(args.dest, IterStringIO(content), **kwargs)
    except TypeError as e:
        msg = str(e)

        if not args.collapse:
            msg += 'Try again with `-c` option.'

        exit(msg)
Esempio n. 5
0
def migrate(resource_id, **kwargs):
    """Copies a filestore resource from one ckan instance to another"""
    src_remote, dest_remote = kwargs['src_remote'], kwargs['dest_remote']

    if src_remote == dest_remote:
        msg = (
            'ERROR: `dest-remote` of %s is the same as `src-remote` of %s.\n'
            'The dest and src remotes must be different.\n' % (src_remote,
            dest_remote))

        sys.exit(msg)

    verbose = not kwargs['quiet']
    chunksize = kwargs['chunksize_bytes']
    ckan_kwargs = {k: v for k, v in kwargs.items() if k in api.CKAN_KEYS}
    src_ckan = CKAN(remote=src_remote, **ckan_kwargs)
    dest_ckan = CKAN(remote=dest_remote, **ckan_kwargs)

    try:
        r = src_ckan.fetch_resource(resource_id)
        filepath = NamedTemporaryFile(delete=False).name
    except api.NotAuthorized as err:
        sys.exit('ERROR: %s\n' % str(err))
    except Exception as err:
        sys.exit('ERROR: %s\n' % str(err))
    else:
        tio.write(filepath, r.raw.read(), chunksize=chunksize)
        resource = dest_ckan.update_filestore(resource_id, filepath=filepath)

        if resource and verbose:
            print('Success! Resource %s updated.' % resource_id)
        elif not resource:
            sys.exit('Error uploading file!')
    finally:
        if verbose:
            print('Removing tempfile...')

        unlink(filepath)
Esempio n. 6
0
def update(resource_id, force=None, **kwargs):
    """Updates a datastore table based on the current filestore resource"""
    verbose = not kwargs.get('quiet')
    chunk_bytes = kwargs.get('chunk_bytes', api.CHUNKSIZE_BYTES)
    ckan_kwargs = {k: v for k, v in kwargs.items() if k in api.CKAN_KEYS}
    hash_kwargs = {'chunksize': chunk_bytes, 'verbose': verbose}
    ckan = CKAN(**ckan_kwargs)

    try:
        r = ckan.fetch_resource(resource_id)
    except (api.NotFound, api.NotAuthorized) as err:
        sys.exit('ERROR: %s\n' % str(err))
    else:
        f = SpooledTemporaryFile(suffix='.xlsx', mode='r+b')
        write_kwargs = {
            'length': r.headers.get('content-length'),
            'chunksize': chunk_bytes
        }

        tio.write(f, r.iter_content, **write_kwargs)

        try:
            old_hash = ckan.get_hash(resource_id)
        except api.NotFound as err:
            item = err.args[0]['item']

            if item == 'package':
                orgs = ckan.organization_list(permission='admin_group')
                owner_org = (
                    o['id'] for o in orgs
                    if o['display_name'] == kwargs['hash_group']).next()

                package_kwargs = {
                    'name': kwargs['hash_table'],
                    'owner_org': owner_org,
                    'package_creator': 'Hash Table',
                    'dataset_source': 'Multiple sources',
                    'notes': 'Datastore resource hash table'
                }

                ckan.hash_table_pack = ckan.package_create(**package_kwargs)

            if item in {'package', 'resource'}:
                fileobj = StringIO('datastore_id,hash\n')
                create_kwargs = {'fileobj': fileobj, 'name': api.DEF_HASH_RES}
                table = kwargs['hash_table']
                resource = ckan.create_resource(table, **create_kwargs)
                ckan.hash_table_id = resource['id']

            ckan.create_hash_table(verbose)
            old_hash = ckan.get_hash(resource_id)

        new_hash = tio.hash_file(f, **hash_kwargs)
        changed = new_hash != old_hash if old_hash else True

        if verbose:
            print(get_message(changed, force))

        if not (changed or force):
            sys.exit(0)

        kwargs['encoding'] = r.encoding
        kwargs['content_type'] = r.headers['content-type']
        updated = ckan.update_datastore(resource_id, f, **kwargs)

        if updated and verbose:
            print('Success! Resource %s updated.' % resource_id)

        if updated and changed:
            ckan.update_hash_table(resource_id, new_hash, verbose)
        elif not updated:
            sys.exit('ERROR: resource %s not updated.' % resource_id)
Esempio n. 7
0
def update(resource_id, force=None, **kwargs):
    """Updates a datastore table based on the current filestore resource"""
    verbose = not kwargs.get('quiet')
    chunk_bytes = kwargs.get('chunk_bytes', api.CHUNKSIZE_BYTES)
    ckan_kwargs = {k: v for k, v in kwargs.items() if k in api.CKAN_KEYS}
    hash_kwargs = {'chunksize': chunk_bytes, 'verbose': verbose}
    ckan = CKAN(**ckan_kwargs)

    try:
        r = ckan.fetch_resource(resource_id)
    except (api.NotFound, api.NotAuthorized) as err:
        sys.exit('ERROR: %s\n' % str(err))
    else:
        f = SpooledTemporaryFile(suffix='.xlsx', mode='r+b')
        write_kwargs = {
            'length': r.headers.get('content-length'),
            'chunksize': chunk_bytes
        }

        tio.write(f, r.iter_content, **write_kwargs)

        try:
            old_hash = ckan.get_hash(resource_id)
        except api.NotFound as err:
            item = err.args[0]['item']

            if item == 'package':
                orgs = ckan.organization_list(permission='admin_group')
                owner_org = (
                    o['id'] for o in orgs
                    if o['display_name'] == kwargs['hash_group']).next()

                package_kwargs = {
                    'name': kwargs['hash_table'],
                    'owner_org': owner_org,
                    'package_creator': 'Hash Table',
                    'dataset_source': 'Multiple sources',
                    'notes': 'Datastore resource hash table'
                }

                ckan.hash_table_pack = ckan.package_create(**package_kwargs)

            if item in {'package', 'resource'}:
                fileobj = StringIO('datastore_id,hash\n')
                create_kwargs = {'fileobj': fileobj, 'name': api.DEF_HASH_RES}
                table = kwargs['hash_table']
                resource = ckan.create_resource(table, **create_kwargs)
                ckan.hash_table_id = resource['id']

            ckan.create_hash_table(verbose)
            old_hash = ckan.get_hash(resource_id)

        new_hash = tio.hash_file(f, **hash_kwargs)
        changed = new_hash != old_hash if old_hash else True

        if verbose:
            print(get_message(changed, force))

        if not (changed or force):
            sys.exit(0)

        kwargs['encoding'] = r.encoding
        kwargs['content_type'] = r.headers['content-type']
        updated = ckan.update_datastore(resource_id, f, **kwargs)

        if updated and verbose:
            print('Success! Resource %s updated.' % resource_id)

        if updated and changed:
            ckan.update_hash_table(resource_id, new_hash, verbose)
        elif not updated:
            sys.exit('ERROR: resource %s not updated.' % resource_id)