def test_csv(self): filepath = p.join(io.DATA_DIR, "test.csv") header = ["some_date", "sparse_data", "some_value", "unicode_test"] with open(filepath, "rU", encoding="utf-8") as f: records = io._read_csv(f, header) nt.assert_equal(self.sheet0_alt, next(records)) filepath = p.join(io.DATA_DIR, "no_header_row.csv") records = io.read_csv(filepath, has_header=False) expected = {"column_1": "1", "column_2": "2", "column_3": "3"} nt.assert_equal(expected, next(records)) filepath = p.join(io.DATA_DIR, "test_bad.csv") kwargs = {"sanitize": True, "first_row": 1, "first_col": 1} records = io.read_csv(filepath, **kwargs) nt.assert_equal(self.sheet0_alt, next(records)) filepath = p.join(io.DATA_DIR, "fixed_w_header.txt") widths = [0, 18, 29, 33, 38, 50] records = io.read_fixed_fmt(filepath, widths, has_header=True) expected = { "News Paper": "Chicago Reader", "Founded": "1971-01-01", "Int": "40", "Bool": "True", "Float": "1.0", "Timestamp": "04:14:001971-01-01T04:14:00", } nt.assert_equal(expected, next(records))
def test_encoding_detection(self): filepath = p.join(io.DATA_DIR, "latin1.csv") records = io.read_csv(filepath, mode="rb") nt.assert_equal(self.row1, next(records)) nt.assert_equal(self.row2, next(records)) records = io.read_csv(filepath, encoding="ascii") nt.assert_equal(self.row1, next(records)) nt.assert_equal(self.row2, next(records))
def fetch_data(config): """Fetches realtime data and generates records""" ckan = CKAN(config['ENDPOINT'], apikey=config['API_KEY']) # r = ckan.fetch_resource(config['RID']) # if using ckanutils resource = ckan.action.resource_show(id=config['RID']) url = resource.get('perma_link') or resource.get('url') r = requests.get(url, stream=True) if any('403' in h.headers.get('x-ckan-error', '') for h in r.history): raise NotAuthorized( 'Access to fetch resource %s was denied.' % config['RID']) try: ext = splitext(url)[1].split('.')[1] except IndexError: ext = cv.ctype2ext(r.headers['Content-Type']) if ext == 'csv': records = io.read_csv(r.raw, sanitize=True, encoding=r.encoding) elif ext in {'xls', 'xlsx'}: r = requests.get(url) f = SpooledTemporaryFile() f.write(r.content) records = io.read_xls(f, sanitize=True, encoding=r.encoding) else: msg = 'Filetype `%s` unsupported.' msg += 'Please view tabutils.io documentation for assistance.' raise TypeError(msg) constraints = [('adm0_name', 'a'), ('mp_month', '3'), ('mp_year', '2015')] filterer = lambda x: all(x[k].lower().startswith(v) for k, v in constraints) return it.ifilter(filterer, records)
def fetch_data(config): """Fetches realtime data and generates records""" ckan = CKAN(config['ENDPOINT'], apikey=config['API_KEY']) # r = ckan.fetch_resource(config['RID']) # if using ckanutils resource = ckan.action.resource_show(id=config['RID']) url = resource.get('perma_link') or resource.get('url') r = requests.get(url, stream=True) if any('403' in h.headers.get('x-ckan-error', '') for h in r.history): raise NotAuthorized('Access to fetch resource %s was denied.' % config['RID']) try: ext = splitext(url)[1].split('.')[1] except IndexError: ext = cv.ctype2ext(r.headers['Content-Type']) if ext == 'csv': records = io.read_csv(r.raw, sanitize=True, encoding=r.encoding) elif ext in {'xls', 'xlsx'}: r = requests.get(url) f = SpooledTemporaryFile() f.write(r.content) records = io.read_xls(f, sanitize=True, encoding=r.encoding) else: msg = 'Filetype `%s` unsupported.' msg += 'Please view tabutils.io documentation for assistance.' raise TypeError(msg) constraints = [('adm0_name', 'a'), ('mp_month', '3'), ('mp_year', '2015')] filterer = lambda x: all(x[k].lower().startswith(v) for k, v in constraints) return it.ifilter(filterer, records)
def run(): if args.debug: pprint(dict(args._get_kwargs())) exit(0) if args.version: from . import __version__ as version print('v%s' % version) exit(0) mapping = import_module('csv2ofx.mappings.%s' % args.mapping).mapping okwargs = { 'def_type': args.account_type or 'Bank' if args.qif else 'CHECKING', 'split_header': args.split, 'start': parse(args.start), 'end': parse(args.end) } cont = QIF(mapping, **okwargs) if args.qif else OFX(mapping, **okwargs) records = read_csv(args.source, has_header=cont.has_header) groups = cont.gen_groups(records, args.chunksize) trxns = cont.gen_trxns(groups, args.collapse) cleaned_trxns = cont.clean_trxns(trxns) data = utils.gen_data(cleaned_trxns) body = cont.gen_body(data) try: mtime = p.getmtime(args.source.name) except AttributeError: mtime = time.time() server_date = dt.fromtimestamp(mtime) header = cont.header(date=server_date, language=args.language) footer = cont.footer(date=server_date) content = it.chain([header, body, footer]) kwargs = {'overwrite': args.overwrite, 'chunksize': args.chunksize} try: write(args.dest, IterStringIO(content), **kwargs) except TypeError as e: msg = str(e) if not args.collapse: msg += 'Try again with `-c` option.' exit(msg)
def test_newline_json(self): expected = { "sepal_width": "3.5", "petal_width": "0.2", "species": "Iris-setosa", "sepal_length": "5.1", "petal_length": "1.4", } filepath = p.join(io.DATA_DIR, "iris.csv") records = io.read_csv(filepath) json = cv.records2json(records, newline=True) nt.assert_equal(expected, loads(next(json))) filepath = p.join(io.DATA_DIR, "newline.json") records = io.read_json(filepath, newline=True) nt.assert_equal({"a": 2, "b": 3}, next(records))
def test_fill(self): content = 'column_a,column_b,column_c\n' content += '1,27,,too long!\n,too short!\n0,mixed types.uh oh,17' f = StringIO(content) records = io.read_csv(f) previous = {} current = next(records) expected = {'column_a': '1', 'column_b': '27', 'column_c': ''} nt.assert_equal(expected, current) length = len(current) filled = ft.fill(previous, current, value=0) previous = dict(it.islice(filled, length)) count = next(filled) nt.assert_equal(count, {'column_a': 0, 'column_b': 0, 'column_c': 1}) expected = {'column_a': '1', 'column_b': '27', 'column_c': 0} nt.assert_equal(expected, previous) current = next(records) expected = { 'column_a': '', 'column_b': u"too short!", 'column_c': None, } nt.assert_equal(expected, current) kwargs = {'fill_key': 'column_b', 'count': count} filled = ft.fill(previous, current, **kwargs) previous = dict(it.islice(filled, length)) count = next(filled) nt.assert_equal({'column_a': 1, 'column_b': 0, 'column_c': 2}, count) expected = { 'column_a': u"too short!", 'column_b': u"too short!", 'column_c': u"too short!", } nt.assert_equal(expected, previous)
def gen_data(location=None, **kwargs): """Generates records from csv""" f, ext, encoding = get_file(location, **kwargs) records = io.read_csv(f, sanitize=True, encoding=encoding) return normalize(records)
def test_utf8(self): filepath = p.join(io.DATA_DIR, "utf8.csv") records = io.read_csv(filepath, sanitize=True) nt.assert_equal(self.row1, next(records)) nt.assert_equal(self.row3, next(records))
def test_kwargs(self): filepath = p.join(io.DATA_DIR, "utf8.csv") kwargs = {"delimiter": ","} records = io.read_csv(filepath, **kwargs) nt.assert_equal(self.row1, next(records))
def test_utf16_little(self): filepath = p.join(io.DATA_DIR, "utf16_little.csv") records = io.read_csv(filepath, encoding="utf-16-le") nt.assert_equal(self.row1, next(records)) nt.assert_equal(self.row3, next(records))
def test_latin1(self): filepath = p.join(io.DATA_DIR, "latin1.csv") records = io.read_csv(filepath, encoding="latin-1") nt.assert_equal(self.row1, next(records)) nt.assert_equal(self.row2, next(records))