def test_csv(self): filepath = p.join(io.DATA_DIR, 'test.csv') header = ['some_date', 'sparse_data', 'some_value', 'unicode_test'] with open(filepath, 'r', encoding='utf-8') as f: records = io._read_csv(f, header) nt.assert_equal(self.sheet0_alt, next(records)) filepath = p.join(io.DATA_DIR, 'no_header_row.csv') records = io.read_csv(filepath, has_header=False) expected = {'column_1': '1', 'column_2': '2', 'column_3': '3'} nt.assert_equal(expected, next(records)) filepath = p.join(io.DATA_DIR, 'test_bad.csv') kwargs = {'sanitize': True, 'first_row': 1, 'first_col': 1} records = io.read_csv(filepath, **kwargs) nt.assert_equal(self.sheet0_alt, next(records)) filepath = p.join(io.DATA_DIR, 'fixed_w_header.txt') widths = [0, 18, 29, 33, 38, 50] records = io.read_fixed_fmt(filepath, widths, has_header=True) expected = { 'News Paper': 'Chicago Reader', 'Founded': '1971-01-01', 'Int': '40', 'Bool': 'True', 'Float': '1.0', 'Timestamp': '04:14:001971-01-01T04:14:00' } nt.assert_equal(expected, next(records))
def test_csv(self): filepath = p.join(io.DATA_DIR, 'test.csv') header = ['some_date', 'sparse_data', 'some_value', 'unicode_test'] with open(filepath, 'rU', encoding='utf-8') as f: records = io._read_csv(f, header) nt.assert_equal(self.sheet0_alt, next(records)) filepath = p.join(io.DATA_DIR, 'no_header_row.csv') records = io.read_csv(filepath, has_header=False) expected = {'column_1': '1', 'column_2': '2', 'column_3': '3'} nt.assert_equal(expected, next(records)) filepath = p.join(io.DATA_DIR, 'test_bad.csv') kwargs = {'sanitize': True, 'first_row': 1, 'first_col': 1} records = io.read_csv(filepath, **kwargs) nt.assert_equal(self.sheet0_alt, next(records)) filepath = p.join(io.DATA_DIR, 'fixed_w_header.txt') widths = [0, 18, 29, 33, 38, 50] records = io.read_fixed_fmt(filepath, widths, has_header=True) expected = { 'News Paper': 'Chicago Reader', 'Founded': '1971-01-01', 'Int': '40', 'Bool': 'True', 'Float': '1.0', 'Timestamp': '04:14:001971-01-01T04:14:00'} nt.assert_equal(expected, next(records))
def test_csv(self): """Test for reading csv files""" filepath = p.join(io.DATA_DIR, 'no_header_row.csv') records = io.read_csv(filepath, has_header=False) expected = {'column_1': '1', 'column_2': '2', 'column_3': '3'} nt.assert_equal(expected, next(records)) filepath = p.join(io.DATA_DIR, 'test_bad.csv') kwargs = {'sanitize': True, 'first_row': 1, 'first_col': 1} records = io.read_csv(filepath, **kwargs) nt.assert_equal(self.sheet0_alt, next(records)) filepath = p.join(io.DATA_DIR, 'fixed_w_header.txt') widths = [0, 18, 29, 33, 38, 50] records = io.read_fixed_fmt(filepath, widths, has_header=True) expected = { 'News Paper': 'Chicago Reader', 'Founded': '1971-01-01', 'Int': '40', 'Bool': 'True', 'Float': '1.0', 'Timestamp': '04:14:001971-01-01T04:14:00' } nt.assert_equal(expected, next(records))
def test_csv(self): """Test for reading csv files""" filepath = p.join(io.DATA_DIR, "no_header_row.csv") records = io.read_csv(filepath, has_header=False) expected = {"column_1": "1", "column_2": "2", "column_3": "3"} nt.assert_equal(expected, next(records)) filepath = p.join(io.DATA_DIR, "test_bad.csv") kwargs = {"sanitize": True, "first_row": 1, "first_col": 1} records = io.read_csv(filepath, **kwargs) nt.assert_equal(self.sheet0_alt, next(records)) filepath = p.join(io.DATA_DIR, "fixed_w_header.txt") widths = [0, 18, 29, 33, 38, 50] records = io.read_fixed_fmt(filepath, widths, has_header=True) expected = { "News Paper": "Chicago Reader", "Founded": "1971-01-01", "Int": "40", "Bool": "True", "Float": "1.0", "Timestamp": "04:14:001971-01-01T04:14:00", } nt.assert_equal(expected, next(records))
def test_encoding_detection(self): filepath = p.join(io.DATA_DIR, 'latin1.csv') records = io.read_csv(filepath, mode='rb') nt.assert_equal(self.row1, next(records)) nt.assert_equal(self.row2, next(records)) records = io.read_csv(filepath, encoding='ascii') nt.assert_equal(self.row1, next(records)) nt.assert_equal(self.row2, next(records))
def parser(_, objconf, skip=False, **kwargs): """ Parses the pipe content Args: _ (None): Ignored objconf (obj): The pipe configuration (an Objectify instance) skip (bool): Don't parse the content Returns: Iter[dict]: The stream of items Examples: >>> from riko import get_path >>> from meza.fntools import Objectify >>> >>> url = get_path('spreadsheet.csv') >>> conf = { ... 'url': url, 'sanitize': True, 'skip_rows': 0, ... 'encoding': ENCODING} >>> objconf = Objectify(conf) >>> result = parser(None, objconf, stream={}) >>> next(result)['mileage'] == '7213' True """ if skip: stream = kwargs['stream'] else: first_row, custom_header = objconf.skip_rows, objconf.col_names renamed = {'first_row': first_row, 'custom_header': custom_header} f = fetch(decode=True, **objconf) rkwargs = merge([objconf, renamed]) stream = auto_close(read_csv(f, **rkwargs), f) return stream
def parser(_, objconf, skip, **kwargs): """ Parses the pipe content Args: _ (None): Ignored objconf (obj): The pipe configuration (an Objectify instance) skip (bool): Don't parse the content Returns: Tuple(Iter[dict], bool): Tuple of (stream, skip) Examples: >>> from riko import get_path >>> from riko.lib.utils import Objectify >>> >>> url = get_path('spreadsheet.csv') >>> conf = {'url': url, 'sanitize': True, 'skip_rows': 0} >>> objconf = Objectify(conf) >>> result, skip = parser(None, objconf, False, stream={}) >>> next(result)['mileage'] == '7213' True """ if skip: stream = kwargs['stream'] else: url = utils.get_abspath(objconf.url) first_row, custom_header = objconf.skip_rows, objconf.col_names renamed = {'first_row': first_row, 'custom_header': custom_header} response = urlopen(url) encoding = utils.get_response_encoding(response, objconf.encoding) rkwargs = utils.combine_dicts(objconf, renamed) rkwargs['encoding'] = encoding stream = read_csv(response, **rkwargs) return stream, skip
def test_bytes_io(self): """Test for reading BytesIO""" with open(p.join(io.DATA_DIR, "utf8.csv"), "rb") as f: b = BytesIO(f.read()) records = io.read_csv(b, sanitize=True) nt.assert_equal(self.row1, next(records)) nt.assert_equal(self.row2, next(records))
def test_urlopen_latin1(self): filepath = p.join(io.DATA_DIR, 'latin1.csv') with closing(urlopen('file://%s' % filepath)) as response: f = response.fp records = io.read_csv(f, encoding='latin-1') row = next(it.islice(records, 1, 2)) nt.assert_equal(self.latin_row, row)
def test_wrong_encoding_detection(self): """Test for properly detecting the encoding of a file opened with the wrong encoding """ filepath = p.join(io.DATA_DIR, 'latin1.csv') records = io.read_csv(filepath, encoding='ascii') nt.assert_equal(self.row1, next(records)) nt.assert_equal(self.row2, next(records))
def test_bytes_encoding_detection(self): """Test for properly detecting the encoding of a file opened in bytes mode """ filepath = p.join(io.DATA_DIR, 'latin1.csv') records = io.read_csv(filepath, mode='rb') nt.assert_equal(self.row1, next(records)) nt.assert_equal(self.row2, next(records))
def test_urlopen_utf8(self): filepath = p.join(io.DATA_DIR, 'utf8.csv') with closing(urlopen('file://%s' % filepath)) as response: f = response.fp records = io.read_csv(f) row = next(it.islice(records, 1, 2)) nt.assert_equal(self.utf8_row, row)
def test_urlopen_utf8(self): """Test for reading utf-8 files""" filepath = p.join(io.DATA_DIR, "utf8.csv") with closing(urlopen("file://{}".format(filepath))) as response: f = response.fp records = io.read_csv(f) row = next(it.islice(records, 1, 2)) nt.assert_equal(self.utf8_row, row)
def test_urlopen_latin1(self): """Test for reading latin-1 files""" filepath = p.join(io.DATA_DIR, "latin1.csv") with closing(urlopen("file://{}".format(filepath))) as response: f = response.fp records = io.read_csv(f, encoding="latin-1") row = next(it.islice(records, 1, 2)) nt.assert_equal(self.latin_row, row)
def test_urlopen_utf8(self): """Test for reading utf-8 files""" filepath = p.join(io.DATA_DIR, 'utf8.csv') with closing(urlopen('file://%s' % filepath)) as response: f = response.fp records = io.read_csv(f) row = next(it.islice(records, 1, 2)) nt.assert_equal(self.utf8_row, row)
def test_urlopen_latin1(self): """Test for reading latin-1 files""" filepath = p.join(io.DATA_DIR, 'latin1.csv') with closing(urlopen('file://%s' % filepath)) as response: f = response.fp records = io.read_csv(f, encoding='latin-1') row = next(it.islice(records, 1, 2)) nt.assert_equal(self.latin_row, row)
def test_windows(self): """Test for reading windows-1252 files""" filepath = p.join(io.DATA_DIR, "windows1252.csv") # based on my testing, when excel for mac saves a csv file as # 'Windows-1252', you have to open with 'mac-roman' in order # to properly read it records = io.read_csv(filepath, encoding="mac-roman") nt.assert_equal(self.row1, next(records)) nt.assert_equal(self.row4, next(records))
def test_newline_json(self): expected = { 'sepal_width': '3.5', 'petal_width': '0.2', 'species': 'Iris-setosa', 'sepal_length': '5.1', 'petal_length': '1.4'} filepath = p.join(io.DATA_DIR, 'iris.csv') records = io.read_csv(filepath) json = cv.records2json(records, newline=True) nt.assert_equal(expected, loads(next(json))) filepath = p.join(io.DATA_DIR, 'newline.json') records = io.read_json(filepath, newline=True) nt.assert_equal({'a': 2, 'b': 3}, next(records))
def test_csv_last_row(self): """Test for reading csv files with last_row option""" filepath = p.join(io.DATA_DIR, "iris.csv") expected = { "sepal_width": "3.5", "petal_width": "0.2", "species": "Iris-setosa", "sepal_length": "5.1", "petal_length": "1.4", } records = list(io.read_csv(filepath)) nt.assert_equal(expected, records[0]) nt.assert_equal(150, len(records)) records = list(io.read_csv(filepath, last_row=10)) nt.assert_equal(expected, records[0]) nt.assert_equal(10, len(records)) records = list(io.read_csv(filepath, last_row=-50)) nt.assert_equal(expected, records[0]) nt.assert_equal(100, len(records))
def test_opened_files(self): """Test for reading open files""" filepath = p.join(io.DATA_DIR, 'test.csv') header = ['some_date', 'sparse_data', 'some_value', 'unicode_test'] with open(filepath, encoding='utf-8') as f: records = io._read_csv(f, header) # pylint: disable=W0212 nt.assert_equal(self.sheet0_alt, next(records)) f = open(filepath, encoding='utf-8') try: records = io.read_csv(f, sanitize=True) nt.assert_equal(self.sheet0_alt, next(records)) finally: f.close() f = open(filepath, 'rU', newline=None) try: records = io.read_csv(f, sanitize=True) nt.assert_equal(self.sheet0_alt, next(records)) finally: f.close() filepath = p.join(io.DATA_DIR, 'test.xlsx') with open(filepath, 'r+b') as f: records = io.read_xls(f, sanitize=True, sheet=0) nt.assert_equal(self.sheet0, next(records)) f = open(filepath, 'r+b') try: records = io.read_xls(f, sanitize=True, sheet=0) nt.assert_equal(self.sheet0, next(records)) finally: f.close()
def convertir(filename): ofx = OFX(mapping) records = read_csv(filename, delimiter=';') groups = ofx.gen_groups(records) trxns = ofx.gen_trxns(groups) cleaned_trxns = ofx.clean_trxns(trxns) data = utils.gen_data(cleaned_trxns) header = ofx.header() body = ofx.gen_body(data) footer = ofx.footer() content = it.chain(header, body, footer) for line in IterStringIO(content): print(line)
def test_newline_json(self): # pylint: disable=R0201 """Test for reading newline delimited JSON files""" expected = { 'sepal_width': '3.5', 'petal_width': '0.2', 'species': 'Iris-setosa', 'sepal_length': '5.1', 'petal_length': '1.4'} filepath = p.join(io.DATA_DIR, 'iris.csv') records = io.read_csv(filepath) json = cv.records2json(records, newline=True) nt.assert_equal(expected, loads(next(json))) filepath = p.join(io.DATA_DIR, 'newline.json') records = io.read_json(filepath, newline=True) nt.assert_equal({'a': 2, 'b': 3}, next(records))
def async_parser(_, objconf, skip, **kwargs): """ Asynchronously parses the pipe content Args: _ (None): Ignored objconf (obj): The pipe configuration (an Objectify instance) skip (bool): Don't parse the content kwargs (dict): Keyword arguments Kwargs: stream (dict): The original item Returns: Tuple(Iter[dict], bool): Tuple of (stream, skip) Examples: >>> from riko import get_path >>> from riko.bado import react >>> from riko.bado.mock import FakeReactor >>> from riko.lib.utils import Objectify >>> >>> def run(reactor): ... callback = lambda x: print(next(x[0])['mileage']) ... url = get_path('spreadsheet.csv') ... conf = {'url': url, 'sanitize': True, 'skip_rows': 0} ... objconf = Objectify(conf) ... d = async_parser(None, objconf, False, stream={}) ... return d.addCallbacks(callback, logger.error) >>> >>> try: ... react(run, _reactor=FakeReactor()) ... except SystemExit: ... pass ... 7213 """ if skip: stream = kwargs['stream'] else: # TODO: write function to extract encoding from response url = utils.get_abspath(objconf.url) response = yield io.async_url_open(url) first_row, custom_header = objconf.skip_rows, objconf.col_names renamed = {'first_row': first_row, 'custom_header': custom_header} rkwargs = utils.combine_dicts(objconf, renamed) rkwargs['encoding'] = objconf.encoding stream = read_csv(response, **rkwargs) result = (stream, skip) return_value(result)
def test_opened_files(self): """Test for reading open files""" filepath = p.join(io.DATA_DIR, "test.csv") with open(filepath, encoding="utf-8") as f: records = io.read_csv(f, sanitize=True) # pylint: disable=W0212 nt.assert_equal(self.sheet0_alt, next(records)) f = open(filepath, encoding="utf-8") try: records = io.read_csv(f, sanitize=True) nt.assert_equal(self.sheet0_alt, next(records)) finally: f.close() f = open(filepath, "rU", newline=None) try: records = io.read_csv(f, sanitize=True) nt.assert_equal(self.sheet0_alt, next(records)) finally: f.close() filepath = p.join(io.DATA_DIR, "test.xlsx") with open(filepath, "r+b") as f: records = io.read_xls(f, sanitize=True, sheet=0) nt.assert_equal(self.sheet0, next(records)) f = open(filepath, "r+b") try: records = io.read_xls(f, sanitize=True, sheet=0) nt.assert_equal(self.sheet0, next(records)) finally: f.close()
def async_parser(_, objconf, skip=False, **kwargs): """ Asynchronously parses the pipe content Args: _ (None): Ignored objconf (obj): The pipe configuration (an Objectify instance) skip (bool): Don't parse the content kwargs (dict): Keyword arguments Kwargs: stream (dict): The original item Returns: Iter[dict]: The stream of items Examples: >>> from riko import get_path >>> from riko.bado import react >>> from riko.bado.mock import FakeReactor >>> from meza.fntools import Objectify >>> >>> def run(reactor): ... callback = lambda x: print(next(x)['mileage']) ... url = get_path('spreadsheet.csv') ... conf = { ... 'url': url, 'sanitize': True, 'skip_rows': 0, ... 'encoding': ENCODING} ... objconf = Objectify(conf) ... d = async_parser(None, objconf, stream={}) ... return d.addCallbacks(callback, logger.error) >>> >>> try: ... react(run, _reactor=FakeReactor()) ... except SystemExit: ... pass ... 7213 """ if skip: stream = kwargs['stream'] else: url = get_abspath(objconf.url) r = yield io.async_url_open(url) first_row, custom_header = objconf.skip_rows, objconf.col_names renamed = {'first_row': first_row, 'custom_header': custom_header} rkwargs = merge([objconf, renamed]) stream = auto_close(read_csv(r, **rkwargs), r) return_value(stream)
def gen_ofx(input_path, output_path, is_credit=False): if is_credit: from csv2ofx.mappings.DBS_credit import mapping else: from csv2ofx.mappings.DBS import mapping ofx = OFX(mapping) records = read_csv(input_path) groups = ofx.gen_groups(records) trxns = ofx.gen_trxns(groups) cleaned_trxns = ofx.clean_trxns(trxns) data = utils.gen_data(cleaned_trxns) content = it.chain([ofx.header(), ofx.gen_body(data), ofx.footer()]) with open(output_path, "w") as myfile: for line in IterStringIO(content): myfile.write(line.decode("utf-8"))
def test_csv(self): """Test for reading csv files""" filepath = p.join(io.DATA_DIR, 'no_header_row.csv') records = io.read_csv(filepath, has_header=False) expected = {'column_1': '1', 'column_2': '2', 'column_3': '3'} nt.assert_equal(expected, next(records)) filepath = p.join(io.DATA_DIR, 'test_bad.csv') kwargs = {'sanitize': True, 'first_row': 1, 'first_col': 1} records = io.read_csv(filepath, **kwargs) nt.assert_equal(self.sheet0_alt, next(records)) filepath = p.join(io.DATA_DIR, 'fixed_w_header.txt') widths = [0, 18, 29, 33, 38, 50] records = io.read_fixed_fmt(filepath, widths, has_header=True) expected = { 'News Paper': 'Chicago Reader', 'Founded': '1971-01-01', 'Int': '40', 'Bool': 'True', 'Float': '1.0', 'Timestamp': '04:14:001971-01-01T04:14:00'} nt.assert_equal(expected, next(records))
def test_newline_json(self): # pylint: disable=R0201 """Test for reading newline delimited JSON files""" expected = { "sepal_width": "3.5", "petal_width": "0.2", "species": "Iris-setosa", "sepal_length": "5.1", "petal_length": "1.4", } filepath = p.join(io.DATA_DIR, "iris.csv") records = io.read_csv(filepath) json = cv.records2json(records, newline=True) nt.assert_equal(expected, loads(next(json))) filepath = p.join(io.DATA_DIR, "newline.json") records = io.read_json(filepath, newline=True) nt.assert_equal({"a": 2, "b": 3}, next(records))
def test_fill(self): content = 'column_a,column_b,column_c\n' content += '1,27,,too long!\n,too short!\n0,mixed types.uh oh,17' f = StringIO(content) records = io.read_csv(f) previous = {} current = next(records) expected = {'column_a': '1', 'column_b': '27', 'column_c': ''} nt.assert_equal(expected, current) length = len(current) filled = ft.fill(previous, current, value=0) previous = dict(it.islice(filled, length)) count = next(filled) nt.assert_equal(count, {'column_a': 0, 'column_b': 0, 'column_c': 1}) expected = {'column_a': '1', 'column_b': '27', 'column_c': 0} nt.assert_equal(expected, previous) current = next(records) expected = { 'column_a': '', 'column_b': u"too short!", 'column_c': None, } nt.assert_equal(expected, current) kwargs = {'fill_key': 'column_b', 'count': count} filled = ft.fill(previous, current, **kwargs) previous = dict(it.islice(filled, length)) count = next(filled) nt.assert_equal({'column_a': 1, 'column_b': 0, 'column_c': 2}, count) expected = { 'column_a': u"too short!", 'column_b': u"too short!", 'column_c': u"too short!", } nt.assert_equal(expected, previous)
def test_fill(self): content = "column_a,column_b,column_c\n" content += "1,27,,too long!\n,too short!\n0,mixed types.uh oh,17" f = StringIO(content) records = io.read_csv(f) previous = {} current = next(records) expected = {"column_a": "1", "column_b": "27", "column_c": ""} nt.assert_equal(expected, current) length = len(current) filled = ft.fill(previous, current, value=0) previous = dict(it.islice(filled, length)) count = next(filled) nt.assert_equal(count, {"column_a": 0, "column_b": 0, "column_c": 1}) expected = {"column_a": "1", "column_b": "27", "column_c": 0} nt.assert_equal(expected, previous) current = next(records) expected = { "column_a": "", "column_b": "too short!", "column_c": None, } nt.assert_equal(expected, current) kwargs = {"fill_key": "column_b", "count": count} filled = ft.fill(previous, current, **kwargs) previous = dict(it.islice(filled, length)) count = next(filled) nt.assert_equal({"column_a": 1, "column_b": 0, "column_c": 2}, count) expected = { "column_a": "too short!", "column_b": "too short!", "column_c": "too short!", } nt.assert_equal(expected, previous)
def write_ofx(self, ofxfile): """ write out ofxfile from DataFrame """ mapping = { 'account': 'account', 'date': itemgetter('date'), 'payee': itemgetter('title'), 'amount': itemgetter('amount'), } ofx = OFX(mapping) data = self._df.to_csv(quoting=csv.QUOTE_ALL) records = read_csv(StringIO(data)) groups = ofx.gen_groups(records) cleaned_trxns = ofx.clean_trxns(groups) data = utils.gen_data(cleaned_trxns) content = it.chain([ofx.header(), ofx.gen_body(data), ofx.footer()]) with open(ofxfile, 'wb') as f: for line in IterStringIO(content): f.write(line)
def test_kwargs(self): filepath = p.join(io.DATA_DIR, 'utf8.csv') kwargs = {'delimiter': ','} records = io.read_csv(filepath, **kwargs) nt.assert_equal(self.row1, next(records))
def test_utf16_big(self): filepath = p.join(io.DATA_DIR, 'utf16_big.csv') records = io.read_csv(filepath, encoding='utf-16-be') nt.assert_equal(self.row1, next(records)) nt.assert_equal(self.row3, next(records))
def test_utf16_little(self): filepath = p.join(io.DATA_DIR, 'utf16_little.csv') records = io.read_csv(filepath, encoding='utf-16-le') nt.assert_equal(self.row1, next(records)) nt.assert_equal(self.row3, next(records))
def test_wrong_encoding_detection(self): filepath = p.join(io.DATA_DIR, 'latin1.csv') records = io.read_csv(filepath, encoding='ascii') nt.assert_equal(self.row1, next(records)) nt.assert_equal(self.row2, next(records))
def run(): # noqa: C901 """Parses the CLI options and runs the main program """ if args.debug: pprint(dict(args._get_kwargs())) # pylint: disable=W0212 exit(0) if args.version: from . import __version__ as version print("v%s" % version) exit(0) if args.list_mappings: print(", ".join(MODULES)) exit(0) if args.custom: name = p.splitext(p.split(args.custom)[1])[0] found = find_module(name, [p.dirname(p.abspath(args.custom))]) module = load_module(name, *found) else: module = import_module("csv2ofx.mappings.%s" % args.mapping) mapping = module.mapping okwargs = { "def_type": args.account_type or "Bank" if args.qif else "CHECKING", "split_header": args.split, "start": parse(args.start) if args.start else None, "end": parse(args.end) if args.end else None, } cont = QIF(mapping, **okwargs) if args.qif else OFX(mapping, **okwargs) source = open(args.source, encoding=args.encoding) if args.source else stdin try: records = read_csv(source, has_header=cont.has_header) groups = cont.gen_groups(records, args.chunksize) trxns = cont.gen_trxns(groups, args.collapse) cleaned_trxns = cont.clean_trxns(trxns) data = utils.gen_data(cleaned_trxns) body = cont.gen_body(data) if args.server_date: server_date = parse(args.server_date) else: try: mtime = p.getmtime(source.name) except AttributeError: mtime = time.time() server_date = dt.fromtimestamp(mtime) header = cont.header(date=server_date, language=args.language) footer = cont.footer(date=server_date) filtered = filter(None, [header, body, footer]) content = it.chain.from_iterable(filtered) kwargs = {"overwrite": args.overwrite, "chunksize": args.chunksize, "encoding": args.encoding} except: source.close() raise dest = open(args.dest, "w", encoding=args.encoding) if args.dest else stdout try: res = write(dest, IterStringIO(content), **kwargs) except KeyError as err: msg = "Field %s is missing from file. Check `mapping` option." % err except TypeError as err: msg = "No data to write. %s. " % str(err) if args.collapse: msg += "Check `start` and `end` options." else: msg += "Try again with `-c` option." except Exception as err: # pylint: disable=broad-except msg = 1 traceback.print_exc() else: msg = 0 if res else "No data to write. Check `start` and `end` options." finally: exit(msg) source.close() if args.source else None dest.close() if args.dest else None
def test_utf8(self): """Test for reading utf-8 files""" filepath = p.join(io.DATA_DIR, 'utf8.csv') records = io.read_csv(filepath, sanitize=True) nt.assert_equal(self.row1, next(records)) nt.assert_equal(self.row3, next(records))
def test_latin1(self): """Test for reading latin-1 files""" filepath = p.join(io.DATA_DIR, 'latin1.csv') records = io.read_csv(filepath, encoding='latin-1') nt.assert_equal(self.row1, next(records)) nt.assert_equal(self.row2, next(records))
def test_kwargs(self): """Test for passing kwargs while reading csv files""" filepath = p.join(io.DATA_DIR, 'utf8.csv') kwargs = {'delimiter': ','} records = io.read_csv(filepath, **kwargs) nt.assert_equal(self.row1, next(records))
from io import open, StringIO from meza import io fs_base = '/Users/Sam/Source/categorisation/' training_url = fs_base + 'categorised-txns-training-set.csv' records = io.read_csv(training_url) f = StringIO()
def csv2records(string, has_header=True, delimiter=","): return list( read_csv(StringIO(string), has_header=has_header, delimiter=delimiter))
def test_utf16_little(self): """Test for reading utf-16LE files""" filepath = p.join(io.DATA_DIR, 'utf16_little.csv') records = io.read_csv(filepath, encoding='utf-16-le') nt.assert_equal(self.row1, next(records)) nt.assert_equal(self.row3, next(records))