Exemple #1
0
    def test_csv(self):
        filepath = p.join(io.DATA_DIR, 'test.csv')
        header = ['some_date', 'sparse_data', 'some_value', 'unicode_test']

        with open(filepath, 'r', encoding='utf-8') as f:
            records = io._read_csv(f, header)
            nt.assert_equal(self.sheet0_alt, next(records))

        filepath = p.join(io.DATA_DIR, 'no_header_row.csv')
        records = io.read_csv(filepath, has_header=False)
        expected = {'column_1': '1', 'column_2': '2', 'column_3': '3'}
        nt.assert_equal(expected, next(records))

        filepath = p.join(io.DATA_DIR, 'test_bad.csv')
        kwargs = {'sanitize': True, 'first_row': 1, 'first_col': 1}
        records = io.read_csv(filepath, **kwargs)
        nt.assert_equal(self.sheet0_alt, next(records))

        filepath = p.join(io.DATA_DIR, 'fixed_w_header.txt')
        widths = [0, 18, 29, 33, 38, 50]
        records = io.read_fixed_fmt(filepath, widths, has_header=True)
        expected = {
            'News Paper': 'Chicago Reader',
            'Founded': '1971-01-01',
            'Int': '40',
            'Bool': 'True',
            'Float': '1.0',
            'Timestamp': '04:14:001971-01-01T04:14:00'
        }

        nt.assert_equal(expected, next(records))
Exemple #2
0
    def test_csv(self):
        filepath = p.join(io.DATA_DIR, 'test.csv')
        header = ['some_date', 'sparse_data', 'some_value', 'unicode_test']

        with open(filepath, 'rU', encoding='utf-8') as f:
            records = io._read_csv(f, header)
            nt.assert_equal(self.sheet0_alt, next(records))

        filepath = p.join(io.DATA_DIR, 'no_header_row.csv')
        records = io.read_csv(filepath, has_header=False)
        expected = {'column_1': '1', 'column_2': '2', 'column_3': '3'}
        nt.assert_equal(expected, next(records))

        filepath = p.join(io.DATA_DIR, 'test_bad.csv')
        kwargs = {'sanitize': True, 'first_row': 1, 'first_col': 1}
        records = io.read_csv(filepath, **kwargs)
        nt.assert_equal(self.sheet0_alt, next(records))

        filepath = p.join(io.DATA_DIR, 'fixed_w_header.txt')
        widths = [0, 18, 29, 33, 38, 50]
        records = io.read_fixed_fmt(filepath, widths, has_header=True)
        expected = {
            'News Paper': 'Chicago Reader',
            'Founded': '1971-01-01',
            'Int': '40',
            'Bool': 'True',
            'Float': '1.0',
            'Timestamp': '04:14:001971-01-01T04:14:00'}

        nt.assert_equal(expected, next(records))
Exemple #3
0
    def test_csv(self):
        """Test for reading csv files"""
        filepath = p.join(io.DATA_DIR, 'no_header_row.csv')
        records = io.read_csv(filepath, has_header=False)
        expected = {'column_1': '1', 'column_2': '2', 'column_3': '3'}
        nt.assert_equal(expected, next(records))

        filepath = p.join(io.DATA_DIR, 'test_bad.csv')
        kwargs = {'sanitize': True, 'first_row': 1, 'first_col': 1}
        records = io.read_csv(filepath, **kwargs)
        nt.assert_equal(self.sheet0_alt, next(records))

        filepath = p.join(io.DATA_DIR, 'fixed_w_header.txt')
        widths = [0, 18, 29, 33, 38, 50]
        records = io.read_fixed_fmt(filepath, widths, has_header=True)
        expected = {
            'News Paper': 'Chicago Reader',
            'Founded': '1971-01-01',
            'Int': '40',
            'Bool': 'True',
            'Float': '1.0',
            'Timestamp': '04:14:001971-01-01T04:14:00'
        }

        nt.assert_equal(expected, next(records))
Exemple #4
0
    def test_csv(self):
        """Test for reading csv files"""
        filepath = p.join(io.DATA_DIR, "no_header_row.csv")
        records = io.read_csv(filepath, has_header=False)
        expected = {"column_1": "1", "column_2": "2", "column_3": "3"}
        nt.assert_equal(expected, next(records))

        filepath = p.join(io.DATA_DIR, "test_bad.csv")
        kwargs = {"sanitize": True, "first_row": 1, "first_col": 1}
        records = io.read_csv(filepath, **kwargs)
        nt.assert_equal(self.sheet0_alt, next(records))

        filepath = p.join(io.DATA_DIR, "fixed_w_header.txt")
        widths = [0, 18, 29, 33, 38, 50]
        records = io.read_fixed_fmt(filepath, widths, has_header=True)
        expected = {
            "News Paper": "Chicago Reader",
            "Founded": "1971-01-01",
            "Int": "40",
            "Bool": "True",
            "Float": "1.0",
            "Timestamp": "04:14:001971-01-01T04:14:00",
        }

        nt.assert_equal(expected, next(records))
Exemple #5
0
    def test_encoding_detection(self):
        filepath = p.join(io.DATA_DIR, 'latin1.csv')
        records = io.read_csv(filepath, mode='rb')
        nt.assert_equal(self.row1, next(records))
        nt.assert_equal(self.row2, next(records))

        records = io.read_csv(filepath, encoding='ascii')
        nt.assert_equal(self.row1, next(records))
        nt.assert_equal(self.row2, next(records))
Exemple #6
0
def parser(_, objconf, skip=False, **kwargs):
    """ Parses the pipe content

    Args:
        _ (None): Ignored
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content

    Returns:
        Iter[dict]: The stream of items

    Examples:
        >>> from riko import get_path
        >>> from meza.fntools import Objectify
        >>>
        >>> url = get_path('spreadsheet.csv')
        >>> conf = {
        ...     'url': url, 'sanitize': True, 'skip_rows': 0,
        ...     'encoding': ENCODING}
        >>> objconf = Objectify(conf)
        >>> result = parser(None, objconf, stream={})
        >>> next(result)['mileage'] == '7213'
        True
    """
    if skip:
        stream = kwargs['stream']
    else:
        first_row, custom_header = objconf.skip_rows, objconf.col_names
        renamed = {'first_row': first_row, 'custom_header': custom_header}

        f = fetch(decode=True, **objconf)
        rkwargs = merge([objconf, renamed])
        stream = auto_close(read_csv(f, **rkwargs), f)

    return stream
Exemple #7
0
def parser(_, objconf, skip, **kwargs):
    """ Parses the pipe content

    Args:
        _ (None): Ignored
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content

    Returns:
        Tuple(Iter[dict], bool): Tuple of (stream, skip)

    Examples:
        >>> from riko import get_path
        >>> from riko.lib.utils import Objectify
        >>>
        >>> url = get_path('spreadsheet.csv')
        >>> conf = {'url': url, 'sanitize': True, 'skip_rows': 0}
        >>> objconf = Objectify(conf)
        >>> result, skip = parser(None, objconf, False, stream={})
        >>> next(result)['mileage'] == '7213'
        True
    """
    if skip:
        stream = kwargs['stream']
    else:
        url = utils.get_abspath(objconf.url)
        first_row, custom_header = objconf.skip_rows, objconf.col_names
        renamed = {'first_row': first_row, 'custom_header': custom_header}
        response = urlopen(url)
        encoding = utils.get_response_encoding(response, objconf.encoding)
        rkwargs = utils.combine_dicts(objconf, renamed)
        rkwargs['encoding'] = encoding
        stream = read_csv(response, **rkwargs)

    return stream, skip
Exemple #8
0
def parser(_, objconf, skip=False, **kwargs):
    """ Parses the pipe content

    Args:
        _ (None): Ignored
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content

    Returns:
        Iter[dict]: The stream of items

    Examples:
        >>> from riko import get_path
        >>> from meza.fntools import Objectify
        >>>
        >>> url = get_path('spreadsheet.csv')
        >>> conf = {
        ...     'url': url, 'sanitize': True, 'skip_rows': 0,
        ...     'encoding': ENCODING}
        >>> objconf = Objectify(conf)
        >>> result = parser(None, objconf, stream={})
        >>> next(result)['mileage'] == '7213'
        True
    """
    if skip:
        stream = kwargs['stream']
    else:
        first_row, custom_header = objconf.skip_rows, objconf.col_names
        renamed = {'first_row': first_row, 'custom_header': custom_header}

        f = fetch(decode=True, **objconf)
        rkwargs = merge([objconf, renamed])
        stream = auto_close(read_csv(f, **rkwargs), f)

    return stream
Exemple #9
0
def parser(_, objconf, skip, **kwargs):
    """ Parses the pipe content

    Args:
        _ (None): Ignored
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content

    Returns:
        Tuple(Iter[dict], bool): Tuple of (stream, skip)

    Examples:
        >>> from riko import get_path
        >>> from riko.lib.utils import Objectify
        >>>
        >>> url = get_path('spreadsheet.csv')
        >>> conf = {'url': url, 'sanitize': True, 'skip_rows': 0}
        >>> objconf = Objectify(conf)
        >>> result, skip = parser(None, objconf, False, stream={})
        >>> next(result)['mileage'] == '7213'
        True
    """
    if skip:
        stream = kwargs['stream']
    else:
        url = utils.get_abspath(objconf.url)
        first_row, custom_header = objconf.skip_rows, objconf.col_names
        renamed = {'first_row': first_row, 'custom_header': custom_header}
        response = urlopen(url)
        encoding = utils.get_response_encoding(response, objconf.encoding)
        rkwargs = utils.combine_dicts(objconf, renamed)
        rkwargs['encoding'] = encoding
        stream = read_csv(response, **rkwargs)

    return stream, skip
Exemple #10
0
 def test_bytes_io(self):
     """Test for reading BytesIO"""
     with open(p.join(io.DATA_DIR, "utf8.csv"), "rb") as f:
         b = BytesIO(f.read())
         records = io.read_csv(b, sanitize=True)
         nt.assert_equal(self.row1, next(records))
         nt.assert_equal(self.row2, next(records))
Exemple #11
0
    def test_urlopen_latin1(self):
        filepath = p.join(io.DATA_DIR, 'latin1.csv')

        with closing(urlopen('file://%s' % filepath)) as response:
            f = response.fp
            records = io.read_csv(f, encoding='latin-1')
            row = next(it.islice(records, 1, 2))
            nt.assert_equal(self.latin_row, row)
Exemple #12
0
 def test_wrong_encoding_detection(self):
     """Test for properly detecting the encoding of a file opened with the
     wrong encoding
     """
     filepath = p.join(io.DATA_DIR, 'latin1.csv')
     records = io.read_csv(filepath, encoding='ascii')
     nt.assert_equal(self.row1, next(records))
     nt.assert_equal(self.row2, next(records))
Exemple #13
0
 def test_bytes_encoding_detection(self):
     """Test for properly detecting the encoding of a file opened in bytes
     mode
     """
     filepath = p.join(io.DATA_DIR, 'latin1.csv')
     records = io.read_csv(filepath, mode='rb')
     nt.assert_equal(self.row1, next(records))
     nt.assert_equal(self.row2, next(records))
Exemple #14
0
 def test_bytes_encoding_detection(self):
     """Test for properly detecting the encoding of a file opened in bytes
     mode
     """
     filepath = p.join(io.DATA_DIR, 'latin1.csv')
     records = io.read_csv(filepath, mode='rb')
     nt.assert_equal(self.row1, next(records))
     nt.assert_equal(self.row2, next(records))
Exemple #15
0
    def test_urlopen_utf8(self):
        filepath = p.join(io.DATA_DIR, 'utf8.csv')

        with closing(urlopen('file://%s' % filepath)) as response:
            f = response.fp
            records = io.read_csv(f)
            row = next(it.islice(records, 1, 2))
            nt.assert_equal(self.utf8_row, row)
Exemple #16
0
 def test_wrong_encoding_detection(self):
     """Test for properly detecting the encoding of a file opened with the
     wrong encoding
     """
     filepath = p.join(io.DATA_DIR, 'latin1.csv')
     records = io.read_csv(filepath, encoding='ascii')
     nt.assert_equal(self.row1, next(records))
     nt.assert_equal(self.row2, next(records))
Exemple #17
0
    def test_urlopen_utf8(self):
        """Test for reading utf-8 files"""
        filepath = p.join(io.DATA_DIR, "utf8.csv")

        with closing(urlopen("file://{}".format(filepath))) as response:
            f = response.fp
            records = io.read_csv(f)
            row = next(it.islice(records, 1, 2))
            nt.assert_equal(self.utf8_row, row)
Exemple #18
0
    def test_urlopen_latin1(self):
        """Test for reading latin-1 files"""
        filepath = p.join(io.DATA_DIR, "latin1.csv")

        with closing(urlopen("file://{}".format(filepath))) as response:
            f = response.fp
            records = io.read_csv(f, encoding="latin-1")
            row = next(it.islice(records, 1, 2))
            nt.assert_equal(self.latin_row, row)
Exemple #19
0
    def test_urlopen_utf8(self):
        """Test for reading utf-8 files"""
        filepath = p.join(io.DATA_DIR, 'utf8.csv')

        with closing(urlopen('file://%s' % filepath)) as response:
            f = response.fp
            records = io.read_csv(f)
            row = next(it.islice(records, 1, 2))
            nt.assert_equal(self.utf8_row, row)
Exemple #20
0
    def test_urlopen_latin1(self):
        """Test for reading latin-1 files"""
        filepath = p.join(io.DATA_DIR, 'latin1.csv')

        with closing(urlopen('file://%s' % filepath)) as response:
            f = response.fp
            records = io.read_csv(f, encoding='latin-1')
            row = next(it.islice(records, 1, 2))
            nt.assert_equal(self.latin_row, row)
Exemple #21
0
    def test_windows(self):
        """Test for reading windows-1252 files"""
        filepath = p.join(io.DATA_DIR, "windows1252.csv")

        # based on my testing, when excel for mac saves a csv file as
        # 'Windows-1252', you have to open with 'mac-roman' in order
        # to properly read it
        records = io.read_csv(filepath, encoding="mac-roman")
        nt.assert_equal(self.row1, next(records))
        nt.assert_equal(self.row4, next(records))
Exemple #22
0
    def test_newline_json(self):
        expected = {
            'sepal_width': '3.5', 'petal_width': '0.2', 'species':
            'Iris-setosa', 'sepal_length': '5.1', 'petal_length': '1.4'}

        filepath = p.join(io.DATA_DIR, 'iris.csv')
        records = io.read_csv(filepath)
        json = cv.records2json(records, newline=True)
        nt.assert_equal(expected, loads(next(json)))

        filepath = p.join(io.DATA_DIR, 'newline.json')
        records = io.read_json(filepath, newline=True)
        nt.assert_equal({'a': 2, 'b': 3}, next(records))
Exemple #23
0
    def test_csv_last_row(self):
        """Test for reading csv files with last_row option"""
        filepath = p.join(io.DATA_DIR, "iris.csv")
        expected = {
            "sepal_width": "3.5",
            "petal_width": "0.2",
            "species": "Iris-setosa",
            "sepal_length": "5.1",
            "petal_length": "1.4",
        }

        records = list(io.read_csv(filepath))
        nt.assert_equal(expected, records[0])
        nt.assert_equal(150, len(records))

        records = list(io.read_csv(filepath, last_row=10))
        nt.assert_equal(expected, records[0])
        nt.assert_equal(10, len(records))

        records = list(io.read_csv(filepath, last_row=-50))
        nt.assert_equal(expected, records[0])
        nt.assert_equal(100, len(records))
Exemple #24
0
    def test_opened_files(self):
        """Test for reading open files"""
        filepath = p.join(io.DATA_DIR, 'test.csv')
        header = ['some_date', 'sparse_data', 'some_value', 'unicode_test']

        with open(filepath, encoding='utf-8') as f:
            records = io._read_csv(f, header)  # pylint: disable=W0212
            nt.assert_equal(self.sheet0_alt, next(records))

        f = open(filepath, encoding='utf-8')

        try:
            records = io.read_csv(f, sanitize=True)
            nt.assert_equal(self.sheet0_alt, next(records))
        finally:
            f.close()

        f = open(filepath, 'rU', newline=None)

        try:
            records = io.read_csv(f, sanitize=True)
            nt.assert_equal(self.sheet0_alt, next(records))
        finally:
            f.close()

        filepath = p.join(io.DATA_DIR, 'test.xlsx')

        with open(filepath, 'r+b') as f:
            records = io.read_xls(f, sanitize=True, sheet=0)
            nt.assert_equal(self.sheet0, next(records))

        f = open(filepath, 'r+b')

        try:
            records = io.read_xls(f, sanitize=True, sheet=0)
            nt.assert_equal(self.sheet0, next(records))
        finally:
            f.close()
Exemple #25
0
    def test_opened_files(self):
        """Test for reading open files"""
        filepath = p.join(io.DATA_DIR, 'test.csv')
        header = ['some_date', 'sparse_data', 'some_value', 'unicode_test']

        with open(filepath, encoding='utf-8') as f:
            records = io._read_csv(f, header)  # pylint: disable=W0212
            nt.assert_equal(self.sheet0_alt, next(records))

        f = open(filepath, encoding='utf-8')

        try:
            records = io.read_csv(f, sanitize=True)
            nt.assert_equal(self.sheet0_alt, next(records))
        finally:
            f.close()

        f = open(filepath, 'rU', newline=None)

        try:
            records = io.read_csv(f, sanitize=True)
            nt.assert_equal(self.sheet0_alt, next(records))
        finally:
            f.close()

        filepath = p.join(io.DATA_DIR, 'test.xlsx')

        with open(filepath, 'r+b') as f:
            records = io.read_xls(f, sanitize=True, sheet=0)
            nt.assert_equal(self.sheet0, next(records))

        f = open(filepath, 'r+b')

        try:
            records = io.read_xls(f, sanitize=True, sheet=0)
            nt.assert_equal(self.sheet0, next(records))
        finally:
            f.close()
Exemple #26
0
def convertir(filename):
    ofx = OFX(mapping)
    records = read_csv(filename, delimiter=';')
    groups = ofx.gen_groups(records)
    trxns = ofx.gen_trxns(groups)
    cleaned_trxns = ofx.clean_trxns(trxns)
    data = utils.gen_data(cleaned_trxns)
    header = ofx.header()
    body = ofx.gen_body(data)
    footer = ofx.footer()
    content = it.chain(header, body, footer)

    for line in IterStringIO(content):
        print(line)
Exemple #27
0
    def test_newline_json(self):  # pylint: disable=R0201
        """Test for reading newline delimited JSON files"""
        expected = {
            'sepal_width': '3.5', 'petal_width': '0.2', 'species':
            'Iris-setosa', 'sepal_length': '5.1', 'petal_length': '1.4'}

        filepath = p.join(io.DATA_DIR, 'iris.csv')
        records = io.read_csv(filepath)
        json = cv.records2json(records, newline=True)
        nt.assert_equal(expected, loads(next(json)))

        filepath = p.join(io.DATA_DIR, 'newline.json')
        records = io.read_json(filepath, newline=True)
        nt.assert_equal({'a': 2, 'b': 3}, next(records))
Exemple #28
0
def async_parser(_, objconf, skip, **kwargs):
    """ Asynchronously parses the pipe content

    Args:
        _ (None): Ignored
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content
        kwargs (dict): Keyword arguments

    Kwargs:
        stream (dict): The original item

    Returns:
        Tuple(Iter[dict], bool): Tuple of (stream, skip)

    Examples:
        >>> from riko import get_path
        >>> from riko.bado import react
        >>> from riko.bado.mock import FakeReactor
        >>> from riko.lib.utils import Objectify
        >>>
        >>> def run(reactor):
        ...     callback = lambda x: print(next(x[0])['mileage'])
        ...     url = get_path('spreadsheet.csv')
        ...     conf = {'url': url, 'sanitize': True, 'skip_rows': 0}
        ...     objconf = Objectify(conf)
        ...     d = async_parser(None, objconf, False, stream={})
        ...     return d.addCallbacks(callback, logger.error)
        >>>
        >>> try:
        ...     react(run, _reactor=FakeReactor())
        ... except SystemExit:
        ...     pass
        ...
        7213
    """
    if skip:
        stream = kwargs['stream']
    else:
        # TODO: write function to extract encoding from response
        url = utils.get_abspath(objconf.url)
        response = yield io.async_url_open(url)
        first_row, custom_header = objconf.skip_rows, objconf.col_names
        renamed = {'first_row': first_row, 'custom_header': custom_header}
        rkwargs = utils.combine_dicts(objconf, renamed)
        rkwargs['encoding'] = objconf.encoding
        stream = read_csv(response, **rkwargs)

    result = (stream, skip)
    return_value(result)
Exemple #29
0
def async_parser(_, objconf, skip, **kwargs):
    """ Asynchronously parses the pipe content

    Args:
        _ (None): Ignored
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content
        kwargs (dict): Keyword arguments

    Kwargs:
        stream (dict): The original item

    Returns:
        Tuple(Iter[dict], bool): Tuple of (stream, skip)

    Examples:
        >>> from riko import get_path
        >>> from riko.bado import react
        >>> from riko.bado.mock import FakeReactor
        >>> from riko.lib.utils import Objectify
        >>>
        >>> def run(reactor):
        ...     callback = lambda x: print(next(x[0])['mileage'])
        ...     url = get_path('spreadsheet.csv')
        ...     conf = {'url': url, 'sanitize': True, 'skip_rows': 0}
        ...     objconf = Objectify(conf)
        ...     d = async_parser(None, objconf, False, stream={})
        ...     return d.addCallbacks(callback, logger.error)
        >>>
        >>> try:
        ...     react(run, _reactor=FakeReactor())
        ... except SystemExit:
        ...     pass
        ...
        7213
    """
    if skip:
        stream = kwargs['stream']
    else:
        # TODO: write function to extract encoding from response
        url = utils.get_abspath(objconf.url)
        response = yield io.async_url_open(url)
        first_row, custom_header = objconf.skip_rows, objconf.col_names
        renamed = {'first_row': first_row, 'custom_header': custom_header}
        rkwargs = utils.combine_dicts(objconf, renamed)
        rkwargs['encoding'] = objconf.encoding
        stream = read_csv(response, **rkwargs)

    result = (stream, skip)
    return_value(result)
Exemple #30
0
    def test_opened_files(self):
        """Test for reading open files"""
        filepath = p.join(io.DATA_DIR, "test.csv")

        with open(filepath, encoding="utf-8") as f:
            records = io.read_csv(f, sanitize=True)  # pylint: disable=W0212
            nt.assert_equal(self.sheet0_alt, next(records))

        f = open(filepath, encoding="utf-8")

        try:
            records = io.read_csv(f, sanitize=True)
            nt.assert_equal(self.sheet0_alt, next(records))
        finally:
            f.close()

        f = open(filepath, "rU", newline=None)

        try:
            records = io.read_csv(f, sanitize=True)
            nt.assert_equal(self.sheet0_alt, next(records))
        finally:
            f.close()

        filepath = p.join(io.DATA_DIR, "test.xlsx")

        with open(filepath, "r+b") as f:
            records = io.read_xls(f, sanitize=True, sheet=0)
            nt.assert_equal(self.sheet0, next(records))

        f = open(filepath, "r+b")

        try:
            records = io.read_xls(f, sanitize=True, sheet=0)
            nt.assert_equal(self.sheet0, next(records))
        finally:
            f.close()
Exemple #31
0
def async_parser(_, objconf, skip=False, **kwargs):
    """ Asynchronously parses the pipe content

    Args:
        _ (None): Ignored
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content
        kwargs (dict): Keyword arguments

    Kwargs:
        stream (dict): The original item

    Returns:
        Iter[dict]: The stream of items

    Examples:
        >>> from riko import get_path
        >>> from riko.bado import react
        >>> from riko.bado.mock import FakeReactor
        >>> from meza.fntools import Objectify
        >>>
        >>> def run(reactor):
        ...     callback = lambda x: print(next(x)['mileage'])
        ...     url = get_path('spreadsheet.csv')
        ...     conf = {
        ...         'url': url, 'sanitize': True, 'skip_rows': 0,
        ...         'encoding': ENCODING}
        ...     objconf = Objectify(conf)
        ...     d = async_parser(None, objconf, stream={})
        ...     return d.addCallbacks(callback, logger.error)
        >>>
        >>> try:
        ...     react(run, _reactor=FakeReactor())
        ... except SystemExit:
        ...     pass
        ...
        7213
    """
    if skip:
        stream = kwargs['stream']
    else:
        url = get_abspath(objconf.url)
        r = yield io.async_url_open(url)
        first_row, custom_header = objconf.skip_rows, objconf.col_names
        renamed = {'first_row': first_row, 'custom_header': custom_header}
        rkwargs = merge([objconf, renamed])
        stream = auto_close(read_csv(r, **rkwargs), r)

    return_value(stream)
Exemple #32
0
def async_parser(_, objconf, skip=False, **kwargs):
    """ Asynchronously parses the pipe content

    Args:
        _ (None): Ignored
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content
        kwargs (dict): Keyword arguments

    Kwargs:
        stream (dict): The original item

    Returns:
        Iter[dict]: The stream of items

    Examples:
        >>> from riko import get_path
        >>> from riko.bado import react
        >>> from riko.bado.mock import FakeReactor
        >>> from meza.fntools import Objectify
        >>>
        >>> def run(reactor):
        ...     callback = lambda x: print(next(x)['mileage'])
        ...     url = get_path('spreadsheet.csv')
        ...     conf = {
        ...         'url': url, 'sanitize': True, 'skip_rows': 0,
        ...         'encoding': ENCODING}
        ...     objconf = Objectify(conf)
        ...     d = async_parser(None, objconf, stream={})
        ...     return d.addCallbacks(callback, logger.error)
        >>>
        >>> try:
        ...     react(run, _reactor=FakeReactor())
        ... except SystemExit:
        ...     pass
        ...
        7213
    """
    if skip:
        stream = kwargs['stream']
    else:
        url = get_abspath(objconf.url)
        r = yield io.async_url_open(url)
        first_row, custom_header = objconf.skip_rows, objconf.col_names
        renamed = {'first_row': first_row, 'custom_header': custom_header}
        rkwargs = merge([objconf, renamed])
        stream = auto_close(read_csv(r, **rkwargs), r)

    return_value(stream)
Exemple #33
0
def gen_ofx(input_path, output_path, is_credit=False):
    if is_credit:
        from csv2ofx.mappings.DBS_credit import mapping
    else:
        from csv2ofx.mappings.DBS import mapping
    ofx = OFX(mapping)
    records = read_csv(input_path)
    groups = ofx.gen_groups(records)
    trxns = ofx.gen_trxns(groups)
    cleaned_trxns = ofx.clean_trxns(trxns)
    data = utils.gen_data(cleaned_trxns)
    content = it.chain([ofx.header(), ofx.gen_body(data), ofx.footer()])
    with open(output_path, "w") as myfile:
        for line in IterStringIO(content):
            myfile.write(line.decode("utf-8"))
Exemple #34
0
    def test_csv(self):
        """Test for reading csv files"""
        filepath = p.join(io.DATA_DIR, 'no_header_row.csv')
        records = io.read_csv(filepath, has_header=False)
        expected = {'column_1': '1', 'column_2': '2', 'column_3': '3'}
        nt.assert_equal(expected, next(records))

        filepath = p.join(io.DATA_DIR, 'test_bad.csv')
        kwargs = {'sanitize': True, 'first_row': 1, 'first_col': 1}
        records = io.read_csv(filepath, **kwargs)
        nt.assert_equal(self.sheet0_alt, next(records))

        filepath = p.join(io.DATA_DIR, 'fixed_w_header.txt')
        widths = [0, 18, 29, 33, 38, 50]
        records = io.read_fixed_fmt(filepath, widths, has_header=True)
        expected = {
            'News Paper': 'Chicago Reader',
            'Founded': '1971-01-01',
            'Int': '40',
            'Bool': 'True',
            'Float': '1.0',
            'Timestamp': '04:14:001971-01-01T04:14:00'}

        nt.assert_equal(expected, next(records))
Exemple #35
0
    def test_newline_json(self):  # pylint: disable=R0201
        """Test for reading newline delimited JSON files"""
        expected = {
            "sepal_width": "3.5",
            "petal_width": "0.2",
            "species": "Iris-setosa",
            "sepal_length": "5.1",
            "petal_length": "1.4",
        }

        filepath = p.join(io.DATA_DIR, "iris.csv")
        records = io.read_csv(filepath)
        json = cv.records2json(records, newline=True)
        nt.assert_equal(expected, loads(next(json)))

        filepath = p.join(io.DATA_DIR, "newline.json")
        records = io.read_json(filepath, newline=True)
        nt.assert_equal({"a": 2, "b": 3}, next(records))
Exemple #36
0
    def test_fill(self):
        content = 'column_a,column_b,column_c\n'
        content += '1,27,,too long!\n,too short!\n0,mixed types.uh oh,17'
        f = StringIO(content)
        records = io.read_csv(f)
        previous = {}
        current = next(records)
        expected = {'column_a': '1', 'column_b': '27', 'column_c': ''}
        nt.assert_equal(expected, current)

        length = len(current)
        filled = ft.fill(previous, current, value=0)
        previous = dict(it.islice(filled, length))
        count = next(filled)
        nt.assert_equal(count, {'column_a': 0, 'column_b': 0, 'column_c': 1})

        expected = {'column_a': '1', 'column_b': '27', 'column_c': 0}
        nt.assert_equal(expected, previous)

        current = next(records)

        expected = {
            'column_a': '',
            'column_b': u"too short!",
            'column_c': None,
        }

        nt.assert_equal(expected, current)

        kwargs = {'fill_key': 'column_b', 'count': count}
        filled = ft.fill(previous, current, **kwargs)
        previous = dict(it.islice(filled, length))
        count = next(filled)
        nt.assert_equal({'column_a': 1, 'column_b': 0, 'column_c': 2}, count)

        expected = {
            'column_a': u"too short!",
            'column_b': u"too short!",
            'column_c': u"too short!",
        }

        nt.assert_equal(expected, previous)
Exemple #37
0
    def test_fill(self):
        content = "column_a,column_b,column_c\n"
        content += "1,27,,too long!\n,too short!\n0,mixed types.uh oh,17"
        f = StringIO(content)
        records = io.read_csv(f)
        previous = {}
        current = next(records)
        expected = {"column_a": "1", "column_b": "27", "column_c": ""}
        nt.assert_equal(expected, current)

        length = len(current)
        filled = ft.fill(previous, current, value=0)
        previous = dict(it.islice(filled, length))
        count = next(filled)
        nt.assert_equal(count, {"column_a": 0, "column_b": 0, "column_c": 1})

        expected = {"column_a": "1", "column_b": "27", "column_c": 0}
        nt.assert_equal(expected, previous)

        current = next(records)

        expected = {
            "column_a": "",
            "column_b": "too short!",
            "column_c": None,
        }

        nt.assert_equal(expected, current)

        kwargs = {"fill_key": "column_b", "count": count}
        filled = ft.fill(previous, current, **kwargs)
        previous = dict(it.islice(filled, length))
        count = next(filled)
        nt.assert_equal({"column_a": 1, "column_b": 0, "column_c": 2}, count)

        expected = {
            "column_a": "too short!",
            "column_b": "too short!",
            "column_c": "too short!",
        }

        nt.assert_equal(expected, previous)
Exemple #38
0
    def test_fill(self):
        content = 'column_a,column_b,column_c\n'
        content += '1,27,,too long!\n,too short!\n0,mixed types.uh oh,17'
        f = StringIO(content)
        records = io.read_csv(f)
        previous = {}
        current = next(records)
        expected = {'column_a': '1', 'column_b': '27', 'column_c': ''}
        nt.assert_equal(expected, current)

        length = len(current)
        filled = ft.fill(previous, current, value=0)
        previous = dict(it.islice(filled, length))
        count = next(filled)
        nt.assert_equal(count, {'column_a': 0, 'column_b': 0, 'column_c': 1})

        expected = {'column_a': '1', 'column_b': '27', 'column_c': 0}
        nt.assert_equal(expected, previous)

        current = next(records)

        expected = {
            'column_a': '',
            'column_b': u"too short!",
            'column_c': None,
        }

        nt.assert_equal(expected, current)

        kwargs = {'fill_key': 'column_b', 'count': count}
        filled = ft.fill(previous, current, **kwargs)
        previous = dict(it.islice(filled, length))
        count = next(filled)
        nt.assert_equal({'column_a': 1, 'column_b': 0, 'column_c': 2}, count)

        expected = {
            'column_a': u"too short!",
            'column_b': u"too short!",
            'column_c': u"too short!",
        }

        nt.assert_equal(expected, previous)
Exemple #39
0
    def write_ofx(self, ofxfile):
        """ write out ofxfile from DataFrame """
        mapping = {
            'account': 'account',
            'date': itemgetter('date'),
            'payee': itemgetter('title'),
            'amount': itemgetter('amount'),
            }

        ofx = OFX(mapping)
        data = self._df.to_csv(quoting=csv.QUOTE_ALL)
        records = read_csv(StringIO(data))
        groups = ofx.gen_groups(records)
        cleaned_trxns = ofx.clean_trxns(groups)
        data = utils.gen_data(cleaned_trxns)

        content = it.chain([ofx.header(), ofx.gen_body(data), ofx.footer()])

        with open(ofxfile, 'wb') as f:
            for line in IterStringIO(content):
                f.write(line)
Exemple #40
0
 def test_kwargs(self):
     filepath = p.join(io.DATA_DIR, 'utf8.csv')
     kwargs = {'delimiter': ','}
     records = io.read_csv(filepath, **kwargs)
     nt.assert_equal(self.row1, next(records))
Exemple #41
0
 def test_utf16_big(self):
     filepath = p.join(io.DATA_DIR, 'utf16_big.csv')
     records = io.read_csv(filepath, encoding='utf-16-be')
     nt.assert_equal(self.row1, next(records))
     nt.assert_equal(self.row3, next(records))
Exemple #42
0
 def test_kwargs(self):
     filepath = p.join(io.DATA_DIR, 'utf8.csv')
     kwargs = {'delimiter': ','}
     records = io.read_csv(filepath, **kwargs)
     nt.assert_equal(self.row1, next(records))
Exemple #43
0
 def test_utf16_little(self):
     filepath = p.join(io.DATA_DIR, 'utf16_little.csv')
     records = io.read_csv(filepath, encoding='utf-16-le')
     nt.assert_equal(self.row1, next(records))
     nt.assert_equal(self.row3, next(records))
Exemple #44
0
 def test_wrong_encoding_detection(self):
     filepath = p.join(io.DATA_DIR, 'latin1.csv')
     records = io.read_csv(filepath, encoding='ascii')
     nt.assert_equal(self.row1, next(records))
     nt.assert_equal(self.row2, next(records))
Exemple #45
0
def run():  # noqa: C901
    """Parses the CLI options and runs the main program
    """
    if args.debug:
        pprint(dict(args._get_kwargs()))  # pylint: disable=W0212
        exit(0)

    if args.version:
        from . import __version__ as version

        print("v%s" % version)
        exit(0)

    if args.list_mappings:
        print(", ".join(MODULES))
        exit(0)

    if args.custom:
        name = p.splitext(p.split(args.custom)[1])[0]
        found = find_module(name, [p.dirname(p.abspath(args.custom))])
        module = load_module(name, *found)
    else:
        module = import_module("csv2ofx.mappings.%s" % args.mapping)

    mapping = module.mapping

    okwargs = {
        "def_type": args.account_type or "Bank" if args.qif else "CHECKING",
        "split_header": args.split,
        "start": parse(args.start) if args.start else None,
        "end": parse(args.end) if args.end else None,
    }

    cont = QIF(mapping, **okwargs) if args.qif else OFX(mapping, **okwargs)
    source = open(args.source, encoding=args.encoding) if args.source else stdin

    try:
        records = read_csv(source, has_header=cont.has_header)
        groups = cont.gen_groups(records, args.chunksize)
        trxns = cont.gen_trxns(groups, args.collapse)
        cleaned_trxns = cont.clean_trxns(trxns)
        data = utils.gen_data(cleaned_trxns)
        body = cont.gen_body(data)

        if args.server_date:
            server_date = parse(args.server_date)
        else:
            try:
                mtime = p.getmtime(source.name)
            except AttributeError:
                mtime = time.time()

            server_date = dt.fromtimestamp(mtime)

        header = cont.header(date=server_date, language=args.language)
        footer = cont.footer(date=server_date)
        filtered = filter(None, [header, body, footer])
        content = it.chain.from_iterable(filtered)
        kwargs = {"overwrite": args.overwrite, "chunksize": args.chunksize, "encoding": args.encoding}
    except:
        source.close()
        raise

    dest = open(args.dest, "w", encoding=args.encoding) if args.dest else stdout

    try:
        res = write(dest, IterStringIO(content), **kwargs)
    except KeyError as err:
        msg = "Field %s is missing from file. Check `mapping` option." % err
    except TypeError as err:
        msg = "No data to write. %s. " % str(err)

        if args.collapse:
            msg += "Check `start` and `end` options."
        else:
            msg += "Try again with `-c` option."
    except Exception as err:  # pylint: disable=broad-except
        msg = 1
        traceback.print_exc()
    else:
        msg = 0 if res else "No data to write. Check `start` and `end` options."
    finally:
        exit(msg)
        source.close() if args.source else None
        dest.close() if args.dest else None
Exemple #46
0
 def test_utf8(self):
     """Test for reading utf-8 files"""
     filepath = p.join(io.DATA_DIR, 'utf8.csv')
     records = io.read_csv(filepath, sanitize=True)
     nt.assert_equal(self.row1, next(records))
     nt.assert_equal(self.row3, next(records))
Exemple #47
0
 def test_latin1(self):
     """Test for reading latin-1 files"""
     filepath = p.join(io.DATA_DIR, 'latin1.csv')
     records = io.read_csv(filepath, encoding='latin-1')
     nt.assert_equal(self.row1, next(records))
     nt.assert_equal(self.row2, next(records))
Exemple #48
0
 def test_kwargs(self):
     """Test for passing kwargs while reading csv files"""
     filepath = p.join(io.DATA_DIR, 'utf8.csv')
     kwargs = {'delimiter': ','}
     records = io.read_csv(filepath, **kwargs)
     nt.assert_equal(self.row1, next(records))
Exemple #49
0
from io import open, StringIO
from meza import io

fs_base = '/Users/Sam/Source/categorisation/'
training_url = fs_base + 'categorised-txns-training-set.csv'

records = io.read_csv(training_url)
f = StringIO()
Exemple #50
0
def csv2records(string, has_header=True, delimiter=","):
    return list(
        read_csv(StringIO(string), has_header=has_header, delimiter=delimiter))
Exemple #51
0
 def test_utf16_little(self):
     """Test for reading utf-16LE files"""
     filepath = p.join(io.DATA_DIR, 'utf16_little.csv')
     records = io.read_csv(filepath, encoding='utf-16-le')
     nt.assert_equal(self.row1, next(records))
     nt.assert_equal(self.row3, next(records))