def test_reader_func(self):
        with open('examples/testfixed_converted.csv') as f:
            csv_reader = csv.reader(f)
            csv_header = next(csv_reader)
            csv_data = list(csv_reader)

        with open('examples/testfixed') as f:
            with open('examples/testfixed_schema.csv') as schema_f:
                fixed_reader = fixed.reader(f, schema_f)
                fixed_data = list(fixed_reader)

        self.assertEqual(csv_header, fixed_reader.fieldnames)
        self.assertEqual(csv_data, fixed_data)
Example #2
0
File: fixed.py Project: 01-/agate
    def __init__(self, f, schema_f):
        from agate import csv

        self.file = f
        self.fields = []

        reader = csv.reader(schema_f)
        header = next(reader)

        if header != ['column', 'start', 'length']:
            raise ValueError('Schema must contain exactly three columns: "column", "start", and "length".')

        for row in reader:
            self.fields.append(Field(row[0], int(row[1]), int(row[2])))
Example #3
0
    def __init__(self, f, schema_f):
        from agate import csv

        self.file = f
        self.fields = []

        reader = csv.reader(schema_f)
        header = next(reader)

        if header != ['column', 'start', 'length']:
            raise ValueError(
                'Schema must contain exactly three columns: "column", "start", and "length".'
            )

        for row in reader:
            self.fields.append(Field(row[0], int(row[1]), int(row[2])))
Example #4
0
def from_csv(cls, path, column_names=None, column_types=None, row_names=None, skip_lines=0, header=True, sniff_limit=0, encoding='utf-8', **kwargs):
    """
    Create a new table from a CSV.

    This method uses agate's builtin CSV reader, which supplies encoding
    support for both Python 2 and Python 3.

    :code:`kwargs` will be passed through to the CSV reader.

    :param path:
        Filepath or file-like object from which to read CSV data.
    :param column_names:
        See :meth:`.Table.__init__`.
    :param column_types:
        See :meth:`.Table.__init__`.
    :param row_names:
        See :meth:`.Table.__init__`.
    :param skip_lines:
        Either a single number indicating the number of lines to skip from
        the top of the file or a sequence of line indexes to skip where the
        first line is index 0.
    :param header:
        If `True`, the first row of the CSV is assumed to contains headers
        and will be skipped. If `header` and `column_names` are both
        specified then a row will be skipped, but `column_names` will be
        used.
    :param sniff_limit:
        Limit CSV dialect sniffing to the specified number of bytes. Set to
        None to sniff the entire file. Defaults to 0 or no sniffing.
    :param encoding:
        Character encoding of the CSV file. Note: if passing in a file
        handle it is assumed you have already opened it with the correct
        encoding specified.
    """
    from agate import csv
    from agate.table import Table

    if hasattr(path, 'read'):
        lines = path.readlines()
    else:
        with io.open(path, encoding=encoding) as f:
            lines = f.readlines()

    if utils.issequence(skip_lines):
        lines = [line for i, line in enumerate(lines) if i not in skip_lines]
        contents = ''.join(lines)
    elif isinstance(skip_lines, int):
        contents = ''.join(lines[skip_lines:])
    else:
        raise ValueError('skip_lines argument must be an int or sequence')

    if sniff_limit is None:
        kwargs['dialect'] = csv.Sniffer().sniff(contents)
    elif sniff_limit > 0:
        kwargs['dialect'] = csv.Sniffer().sniff(contents[:sniff_limit])

    if six.PY2:
        contents = contents.encode('utf-8')

    rows = list(csv.reader(six.StringIO(contents), header=header, **kwargs))

    if header:
        if column_names is None:
            column_names = rows.pop(0)
        else:
            rows.pop(0)

    return Table(rows, column_names, column_types, row_names=row_names)
Example #5
0
def from_csv(cls, path, column_names=None, column_types=None, row_names=None, skip_lines=0, header=True, sniff_limit=0,
             encoding='utf-8', row_limit=None, **kwargs):
    """
    Create a new table from a CSV.

    This method uses agate's builtin CSV reader, which supplies encoding
    support for both Python 2 and Python 3.

    :code:`kwargs` will be passed through to the CSV reader.

    :param path:
        Filepath or file-like object from which to read CSV data. If a file-like
        object is specified, it must be seekable. If using Python 2, the file
        should be opened in binary mode (`rb`).
    :param column_names:
        See :meth:`.Table.__init__`.
    :param column_types:
        See :meth:`.Table.__init__`.
    :param row_names:
        See :meth:`.Table.__init__`.
    :param skip_lines:
        The number of lines to skip from the top of the file.
    :param header:
        If :code:`True`, the first row of the CSV is assumed to contain column
        names. If :code:`header` and :code:`column_names` are both specified
        then a row will be skipped, but :code:`column_names` will be used.
    :param sniff_limit:
        Limit CSV dialect sniffing to the specified number of bytes. Set to
        None to sniff the entire file. Defaults to 0 (no sniffing).
    :param encoding:
        Character encoding of the CSV file. Note: if passing in a file
        handle it is assumed you have already opened it with the correct
        encoding specified.
    :param row_limit:
        Limit how many rows of data will be read.
    """
    from agate import csv
    from agate.table import Table

    close = False

    try:
        if hasattr(path, 'read'):
            f = path
        else:
            if six.PY2:
                f = open(path, 'Urb')
            else:
                f = io.open(path, encoding=encoding)

            close = True

        if isinstance(skip_lines, int):
            while skip_lines > 0:
                f.readline()
                skip_lines -= 1
        else:
            raise ValueError('skip_lines argument must be an int')

        contents = six.StringIO(f.read())

        if sniff_limit is None:
            kwargs['dialect'] = csv.Sniffer().sniff(contents.getvalue())
        elif sniff_limit > 0:
            kwargs['dialect'] = csv.Sniffer().sniff(contents.getvalue()[:sniff_limit])

        if six.PY2:
            kwargs['encoding'] = encoding

        reader = csv.reader(contents, header=header, **kwargs)

        if header:
            if column_names is None:
                column_names = next(reader)
            else:
                next(reader)

        if row_limit is None:
            rows = tuple(reader)
        else:
            rows = tuple(itertools.islice(reader, row_limit))

    finally:
        if close:
            f.close()

    return Table(rows, column_names, column_types, row_names=row_names)
Example #6
0
def from_csv(
    cls,
    path,
    column_names=None,
    column_types=None,
    row_names=None,
    skip_lines=0,
    header=True,
    sniff_limit=0,
    encoding="utf-8",
    **kwargs
):
    """
    Create a new table from a CSV.

    This method uses agate's builtin CSV reader, which supplies encoding
    support for both Python 2 and Python 3.

    :code:`kwargs` will be passed through to the CSV reader.

    :param path:
        Filepath or file-like object from which to read CSV data. If a file-like
        object is specified, it must be seekable.
    :param column_names:
        See :meth:`.Table.__init__`.
    :param column_types:
        See :meth:`.Table.__init__`.
    :param row_names:
        See :meth:`.Table.__init__`.
    :param skip_lines:
        The number of lines to skip from the top of the file.
    :param header:
        If :code:`True`, the first row of the CSV is assumed to contain column
        names. If :code:`header` and :code:`column_names` are both specified
        then a row will be skipped, but :code:`column_names` will be used.
    :param sniff_limit:
        Limit CSV dialect sniffing to the specified number of bytes. Set to
        None to sniff the entire file. Defaults to 0 (no sniffing).
    :param encoding:
        Character encoding of the CSV file. Note: if passing in a file
        handle it is assumed you have already opened it with the correct
        encoding specified.
    """
    from agate import csv
    from agate.table import Table

    close = False

    if hasattr(path, "read"):
        f = path
    else:
        f = io.open(path, encoding=encoding)
        close = True

    if isinstance(skip_lines, int):
        while skip_lines > 0:
            f.readline()
            skip_lines -= 1
    else:
        raise ValueError("skip_lines argument must be an int")

    start = f.tell()

    if sniff_limit is None:
        kwargs["dialect"] = csv.Sniffer().sniff(f.read())
    elif sniff_limit > 0:
        kwargs["dialect"] = csv.Sniffer().sniff(f.read(sniff_limit))

    f.seek(start)

    if six.PY2:
        f = six.StringIO(f.read().encode("utf-8"))

    reader = csv.reader(f, header=header, **kwargs)

    if header:
        if column_names is None:
            column_names = next(reader)
        else:
            next(reader)

    rows = tuple(reader)

    if close:
        f.close()

    return Table(rows, column_names, column_types, row_names=row_names)