Ejemplo n.º 1
0
    def test_csv(self):
        filepath = p.join(io.DATA_DIR, "test.csv")
        header = ["some_date", "sparse_data", "some_value", "unicode_test"]

        with open(filepath, "rU", encoding="utf-8") as f:
            records = io._read_csv(f, header)
            nt.assert_equal(self.sheet0_alt, next(records))

        filepath = p.join(io.DATA_DIR, "no_header_row.csv")
        records = io.read_csv(filepath, has_header=False)
        expected = {"column_1": "1", "column_2": "2", "column_3": "3"}
        nt.assert_equal(expected, next(records))

        filepath = p.join(io.DATA_DIR, "test_bad.csv")
        kwargs = {"sanitize": True, "first_row": 1, "first_col": 1}
        records = io.read_csv(filepath, **kwargs)
        nt.assert_equal(self.sheet0_alt, next(records))

        filepath = p.join(io.DATA_DIR, "fixed_w_header.txt")
        widths = [0, 18, 29, 33, 38, 50]
        records = io.read_fixed_fmt(filepath, widths, has_header=True)
        expected = {
            "News Paper": "Chicago Reader",
            "Founded": "1971-01-01",
            "Int": "40",
            "Bool": "True",
            "Float": "1.0",
            "Timestamp": "04:14:001971-01-01T04:14:00",
        }

        nt.assert_equal(expected, next(records))
Ejemplo n.º 2
0
    def test_encoding_detection(self):
        filepath = p.join(io.DATA_DIR, "latin1.csv")
        records = io.read_csv(filepath, mode="rb")
        nt.assert_equal(self.row1, next(records))
        nt.assert_equal(self.row2, next(records))

        records = io.read_csv(filepath, encoding="ascii")
        nt.assert_equal(self.row1, next(records))
        nt.assert_equal(self.row2, next(records))
Ejemplo n.º 3
0
def fetch_data(config):
    """Fetches realtime data and generates records"""
    ckan = CKAN(config['ENDPOINT'], apikey=config['API_KEY'])
    # r = ckan.fetch_resource(config['RID'])  # if using ckanutils
    resource = ckan.action.resource_show(id=config['RID'])
    url = resource.get('perma_link') or resource.get('url')
    r = requests.get(url, stream=True)

    if any('403' in h.headers.get('x-ckan-error', '') for h in r.history):
        raise NotAuthorized(
            'Access to fetch resource %s was denied.' % config['RID'])

    try:
        ext = splitext(url)[1].split('.')[1]
    except IndexError:
        ext = cv.ctype2ext(r.headers['Content-Type'])

    if ext == 'csv':
        records = io.read_csv(r.raw, sanitize=True, encoding=r.encoding)
    elif ext in {'xls', 'xlsx'}:
        r = requests.get(url)
        f = SpooledTemporaryFile()
        f.write(r.content)
        records = io.read_xls(f, sanitize=True, encoding=r.encoding)
    else:
        msg = 'Filetype `%s` unsupported.'
        msg += 'Please view tabutils.io documentation for assistance.'
        raise TypeError(msg)

    constraints = [('adm0_name', 'a'), ('mp_month', '3'), ('mp_year', '2015')]

    filterer = lambda x: all(x[k].lower().startswith(v) for k, v in constraints)
    return it.ifilter(filterer, records)
Ejemplo n.º 4
0
def fetch_data(config):
    """Fetches realtime data and generates records"""
    ckan = CKAN(config['ENDPOINT'], apikey=config['API_KEY'])
    # r = ckan.fetch_resource(config['RID'])  # if using ckanutils
    resource = ckan.action.resource_show(id=config['RID'])
    url = resource.get('perma_link') or resource.get('url')
    r = requests.get(url, stream=True)

    if any('403' in h.headers.get('x-ckan-error', '') for h in r.history):
        raise NotAuthorized('Access to fetch resource %s was denied.' %
                            config['RID'])

    try:
        ext = splitext(url)[1].split('.')[1]
    except IndexError:
        ext = cv.ctype2ext(r.headers['Content-Type'])

    if ext == 'csv':
        records = io.read_csv(r.raw, sanitize=True, encoding=r.encoding)
    elif ext in {'xls', 'xlsx'}:
        r = requests.get(url)
        f = SpooledTemporaryFile()
        f.write(r.content)
        records = io.read_xls(f, sanitize=True, encoding=r.encoding)
    else:
        msg = 'Filetype `%s` unsupported.'
        msg += 'Please view tabutils.io documentation for assistance.'
        raise TypeError(msg)

    constraints = [('adm0_name', 'a'), ('mp_month', '3'), ('mp_year', '2015')]

    filterer = lambda x: all(x[k].lower().startswith(v)
                             for k, v in constraints)
    return it.ifilter(filterer, records)
Ejemplo n.º 5
0
def run():
    if args.debug:
        pprint(dict(args._get_kwargs()))
        exit(0)

    if args.version:
        from . import __version__ as version
        print('v%s' % version)
        exit(0)

    mapping = import_module('csv2ofx.mappings.%s' % args.mapping).mapping

    okwargs = {
        'def_type': args.account_type or 'Bank' if args.qif else 'CHECKING',
        'split_header': args.split,
        'start': parse(args.start),
        'end': parse(args.end)
    }

    cont = QIF(mapping, **okwargs) if args.qif else OFX(mapping, **okwargs)
    records = read_csv(args.source, has_header=cont.has_header)
    groups = cont.gen_groups(records, args.chunksize)
    trxns = cont.gen_trxns(groups, args.collapse)
    cleaned_trxns = cont.clean_trxns(trxns)
    data = utils.gen_data(cleaned_trxns)
    body = cont.gen_body(data)

    try:
        mtime = p.getmtime(args.source.name)
    except AttributeError:
        mtime = time.time()

    server_date = dt.fromtimestamp(mtime)
    header = cont.header(date=server_date, language=args.language)
    footer = cont.footer(date=server_date)
    content = it.chain([header, body, footer])
    kwargs = {'overwrite': args.overwrite, 'chunksize': args.chunksize}

    try:
        write(args.dest, IterStringIO(content), **kwargs)
    except TypeError as e:
        msg = str(e)

        if not args.collapse:
            msg += 'Try again with `-c` option.'

        exit(msg)
Ejemplo n.º 6
0
Archivo: main.py Proyecto: Drey/csv2ofx
def run():
    if args.debug:
        pprint(dict(args._get_kwargs()))
        exit(0)

    if args.version:
        from . import __version__ as version
        print('v%s' % version)
        exit(0)

    mapping = import_module('csv2ofx.mappings.%s' % args.mapping).mapping

    okwargs = {
        'def_type': args.account_type or 'Bank' if args.qif else 'CHECKING',
        'split_header': args.split,
        'start': parse(args.start),
        'end': parse(args.end)
    }

    cont = QIF(mapping, **okwargs) if args.qif else OFX(mapping, **okwargs)
    records = read_csv(args.source, has_header=cont.has_header)
    groups = cont.gen_groups(records, args.chunksize)
    trxns = cont.gen_trxns(groups, args.collapse)
    cleaned_trxns = cont.clean_trxns(trxns)
    data = utils.gen_data(cleaned_trxns)
    body = cont.gen_body(data)

    try:
        mtime = p.getmtime(args.source.name)
    except AttributeError:
        mtime = time.time()

    server_date = dt.fromtimestamp(mtime)
    header = cont.header(date=server_date, language=args.language)
    footer = cont.footer(date=server_date)
    content = it.chain([header, body, footer])
    kwargs = {'overwrite': args.overwrite, 'chunksize': args.chunksize}

    try:
        write(args.dest, IterStringIO(content), **kwargs)
    except TypeError as e:
        msg = str(e)

        if not args.collapse:
            msg += 'Try again with `-c` option.'

        exit(msg)
Ejemplo n.º 7
0
    def test_newline_json(self):
        expected = {
            "sepal_width": "3.5",
            "petal_width": "0.2",
            "species": "Iris-setosa",
            "sepal_length": "5.1",
            "petal_length": "1.4",
        }

        filepath = p.join(io.DATA_DIR, "iris.csv")
        records = io.read_csv(filepath)
        json = cv.records2json(records, newline=True)
        nt.assert_equal(expected, loads(next(json)))

        filepath = p.join(io.DATA_DIR, "newline.json")
        records = io.read_json(filepath, newline=True)
        nt.assert_equal({"a": 2, "b": 3}, next(records))
Ejemplo n.º 8
0
    def test_fill(self):
        content = 'column_a,column_b,column_c\n'
        content += '1,27,,too long!\n,too short!\n0,mixed types.uh oh,17'
        f = StringIO(content)
        records = io.read_csv(f)
        previous = {}
        current = next(records)
        expected = {'column_a': '1', 'column_b': '27', 'column_c': ''}
        nt.assert_equal(expected, current)

        length = len(current)
        filled = ft.fill(previous, current, value=0)
        previous = dict(it.islice(filled, length))
        count = next(filled)
        nt.assert_equal(count, {'column_a': 0, 'column_b': 0, 'column_c': 1})

        expected = {'column_a': '1', 'column_b': '27', 'column_c': 0}
        nt.assert_equal(expected, previous)

        current = next(records)

        expected = {
            'column_a': '',
            'column_b': u"too short!",
            'column_c': None,
        }

        nt.assert_equal(expected, current)

        kwargs = {'fill_key': 'column_b', 'count': count}
        filled = ft.fill(previous, current, **kwargs)
        previous = dict(it.islice(filled, length))
        count = next(filled)
        nt.assert_equal({'column_a': 1, 'column_b': 0, 'column_c': 2}, count)

        expected = {
            'column_a': u"too short!",
            'column_b': u"too short!",
            'column_c': u"too short!",
        }

        nt.assert_equal(expected, previous)
Ejemplo n.º 9
0
def gen_data(location=None, **kwargs):
    """Generates records from csv"""
    f, ext, encoding = get_file(location, **kwargs)
    records = io.read_csv(f, sanitize=True, encoding=encoding)
    return normalize(records)
Ejemplo n.º 10
0
 def test_utf8(self):
     filepath = p.join(io.DATA_DIR, "utf8.csv")
     records = io.read_csv(filepath, sanitize=True)
     nt.assert_equal(self.row1, next(records))
     nt.assert_equal(self.row3, next(records))
Ejemplo n.º 11
0
 def test_kwargs(self):
     filepath = p.join(io.DATA_DIR, "utf8.csv")
     kwargs = {"delimiter": ","}
     records = io.read_csv(filepath, **kwargs)
     nt.assert_equal(self.row1, next(records))
Ejemplo n.º 12
0
 def test_utf16_little(self):
     filepath = p.join(io.DATA_DIR, "utf16_little.csv")
     records = io.read_csv(filepath, encoding="utf-16-le")
     nt.assert_equal(self.row1, next(records))
     nt.assert_equal(self.row3, next(records))
Ejemplo n.º 13
0
 def test_latin1(self):
     filepath = p.join(io.DATA_DIR, "latin1.csv")
     records = io.read_csv(filepath, encoding="latin-1")
     nt.assert_equal(self.row1, next(records))
     nt.assert_equal(self.row2, next(records))