Beispiel #1
0
    def from_csv(cls, path, column_names=None, column_types=None, row_names=None, header=True, **kwargs):
        """
        Create a new table for a CSV. This method uses agate's builtin
        CSV reader, which supports unicode on both Python 2 and Python 3.

        `kwargs` will be passed through to the CSV reader.

        :param path:
            Filepath or file-like object from which to read CSV data.
        :param column_names:
            See :meth:`Table.__init__`.
        :param column_types:
            See :meth:`Table.__init__`.
        :param row_names:
            See :meth:`Table.__init__`.
        :param header:
            If `True`, the first row of the CSV is assumed to contains headers
            and will be skipped. If `header` and `column_names` are both
            specified then a row will be skipped, but `column_names` will be
            used.
        """
        if hasattr(path, 'read'):
            rows = list(csv.reader(path, **kwargs))
        else:
            with open(path) as f:
                rows = list(csv.reader(f, **kwargs))

        if header:
            if column_names is None:
                column_names = rows.pop(0)
            else:
                rows.pop(0)

        return Table(rows, column_names, column_types, row_names=row_names)
Beispiel #2
0
    def from_csv(cls,
                 path,
                 column_names=None,
                 column_types=None,
                 row_names=None,
                 header=True,
                 **kwargs):
        """
        Create a new table for a CSV. This method uses agate's builtin
        CSV reader, which supports unicode on both Python 2 and Python 3. 
        
        If any rows are shorter than the longest row, `None`s will be added to 
        fill gaps.

        `kwargs` will be passed through to the CSV reader.

        :param path:
            Filepath or file-like object from which to read CSV data.
        :param column_names:
            See :meth:`Table.__init__`.
        :param column_types:
            See :meth:`Table.__init__`.
        :param row_names:
            See :meth:`Table.__init__`.
        :param header:
            If `True`, the first row of the CSV is assumed to contains headers
            and will be skipped. If `header` and `column_names` are both
            specified then a row will be skipped, but `column_names` will be
            used.
        """
        if hasattr(path, 'read'):
            rows = list(csv.reader(path, **kwargs))
        else:
            with open(path) as f:
                rows = list(csv.reader(f, **kwargs))

        max_length = max(map(len, rows))
        for row in rows:
            if len(row) < max_length:
                row.extend([None] * (max_length - len(row)))

        if header:
            if column_names is None:
                column_names = rows.pop(0)
            else:
                rows.pop(0)

        return Table(rows, column_names, column_types, row_names=row_names)
Beispiel #3
0
 def test_reader_alias(self):
     with open('examples/test.csv') as f:
         reader = csv_py2.reader(f, encoding='utf-8')
         self.assertEqual(next(reader), ['one', 'two', 'three'])
         self.assertEqual(next(reader), ['1', '4', 'a'])
         self.assertEqual(next(reader), ['2', '3', 'b'])
         self.assertEqual(next(reader), ['', '2', u'👍'])
Beispiel #4
0
 def test_reader_alias(self):
     with open('examples/test.csv') as f:
         reader = csv_py2.reader(f, encoding='utf-8')
         self.assertEqual(next(reader), ['one', 'two', 'three'])
         self.assertEqual(next(reader), ['1', '4', 'a'])
         self.assertEqual(next(reader), ['2', '3', 'b'])
         self.assertEqual(next(reader), ['', '2', u'👍'])
Beispiel #5
0
    def from_csv(cls, path, column_info=None, row_names=None, header=True, **kwargs):
        """
        Create a new table for a CSV. This method uses agate's builtin
        CSV reader, which supports unicode on both Python 2 and Python 3.

        ``kwargs`` will be passed through to the CSV reader.

        :param path:
            Filepath or file-like object from which to read CSV data.
        :param column_info:
            May be any valid input to :meth:`Table.__init__` or an instance of
            :class:`.TypeTester`. Or, None, in which case a generic
            :class:`.TypeTester` will be created.
        :param row_names:
            See :meth:`Table.__init__`.
        :param header:
            If `True`, the first row of the CSV is assumed to contains headers
            and will be skipped.
        """
        if column_info is None:
            column_info = TypeTester()

        use_inference = isinstance(column_info, TypeTester)

        if hasattr(path, 'read'):
            rows = list(csv.reader(path, **kwargs))
        else:
            with open(path) as f:
                rows = list(csv.reader(f, **kwargs))

        if header:
            column_names = rows.pop(0)
        else:
            column_names = [None] * len(rows[0])

        if use_inference:
            column_info = column_info.run(rows, column_names)
        else:
            if len(column_names) != len(column_info):
                # TKTK Better Error
                raise ValueError('CSV contains more columns than were specified.')

        return Table(rows, column_info, row_names=row_names)
Beispiel #6
0
    def from_csv(cls, path, column_names=None, column_types=None, row_names=None, header=True, sniff_limit=0, encoding='utf-8', **kwargs):
        """
        Create a new table from a CSV.

        This method uses agate's builtin CSV reader, which supplies encoding
        support for both Python 2 and Python 3.

        :code:`kwargs` will be passed through to the CSV reader.

        :param path:
            Filepath or file-like object from which to read CSV data.
        :param column_names:
            See :meth:`.Table.__init__`.
        :param column_types:
            See :meth:`.Table.__init__`.
        :param row_names:
            See :meth:`.Table.__init__`.
        :param header:
            If `True`, the first row of the CSV is assumed to contains headers
            and will be skipped. If `header` and `column_names` are both
            specified then a row will be skipped, but `column_names` will be
            used.
        :param sniff_limit:
            Limit CSV dialect sniffing to the specified number of bytes. Set to
            None to sniff the entire file. Defaults to 0 or no sniffing.
        :param encoding:
            Character encoding of the CSV file. Note: if passing in a file
            handle it is assumed you have already opened it with the correct
            encoding specified.
        """
        if hasattr(path, 'read'):
            contents = path.read()
        else:
            with io.open(path, encoding=encoding) as f:
                contents = f.read()

        if sniff_limit is None:
            kwargs['dialect'] = csv.Sniffer().sniff(contents)
        elif sniff_limit > 0:
            kwargs['dialect'] = csv.Sniffer().sniff(contents[:sniff_limit])

        if six.PY2:
            contents = contents.encode('utf-8')

        rows = list(csv.reader(six.StringIO(contents), header=header, **kwargs))

        if header:
            if column_names is None:
                column_names = rows.pop(0)
            else:
                rows.pop(0)

        return Table(rows, column_names, column_types, row_names=row_names)
Beispiel #7
0
    def test_writer_alias(self):
        output = six.StringIO()
        writer = csv_py2.writer(output, encoding='utf-8')
        self.assertEqual(writer._eight_bit, True)
        writer.writerow(['a', 'b', 'c'])
        writer.writerow(['1', '2', '3'])
        writer.writerow(['4', '5', u'ʤ'])

        written = six.StringIO(output.getvalue())

        reader = csv_py2.reader(written, encoding='utf-8')
        self.assertEqual(next(reader), ['a', 'b', 'c'])
        self.assertEqual(next(reader), ['1', '2', '3'])
        self.assertEqual(next(reader), ['4', '5', u'ʤ'])
Beispiel #8
0
    def test_writer_alias(self):
        output = six.StringIO()
        writer = csv_py2.writer(output, encoding='utf-8')
        self.assertEqual(writer._eight_bit, True)
        writer.writerow(['a', 'b', 'c'])
        writer.writerow(['1', '2', '3'])
        writer.writerow(['4', '5', u'ʤ'])

        written = six.StringIO(output.getvalue())

        reader = csv_py2.reader(written, encoding='utf-8')
        self.assertEqual(next(reader), ['a', 'b', 'c'])
        self.assertEqual(next(reader), ['1', '2', '3'])
        self.assertEqual(next(reader), ['4', '5', u'ʤ'])
Beispiel #9
0
    def test_writer_alias(self):
        output = six.StringIO()
        writer = csv_py2.writer(output, encoding="utf-8")
        self.assertEqual(writer._eight_bit, True)
        writer.writerow(["a", "b", "c"])
        writer.writerow(["1", "2", "3"])
        writer.writerow(["4", "5", u"ʤ"])

        written = six.StringIO(output.getvalue())

        reader = csv_py2.reader(written, encoding="utf-8")
        self.assertEqual(next(reader), ["a", "b", "c"])
        self.assertEqual(next(reader), ["1", "2", "3"])
        self.assertEqual(next(reader), ["4", "5", u"ʤ"])
Beispiel #10
0
    def from_csv(cls, path, column_names=None, column_types=None, row_names=None, header=True, sniff_limit=0, **kwargs):
        """
        Create a new table for a CSV. This method uses agate's builtin
        CSV reader, which supports unicode on both Python 2 and Python 3.

        `kwargs` will be passed through to the CSV reader.

        :param path:
            Filepath or file-like object from which to read CSV data.
        :param column_names:
            See :meth:`Table.__init__`.
        :param column_types:
            See :meth:`Table.__init__`.
        :param row_names:
            See :meth:`Table.__init__`.
        :param header:
            If `True`, the first row of the CSV is assumed to contains headers
            and will be skipped. If `header` and `column_names` are both
            specified then a row will be skipped, but `column_names` will be
            used.
        :param sniff_limit:
            Limit CSV dialect sniffing to the specified number of bytes. Set to
            None to sniff the entire file. Defaults to 0 or no sniffing.
        """
        if hasattr(path, 'read'):
            contents = path.read()
        else:
            with open(path) as f:
                contents = f.read()

        if sniff_limit is None:
            kwargs['dialect'] = csv.Sniffer().sniff(contents)
        elif sniff_limit > 0:
            kwargs['dialect'] = csv.Sniffer().sniff(contents[:sniff_limit])

        rows = list(csv.reader(StringIO(contents), header=header, **kwargs))

        if header:
            if column_names is None:
                column_names = rows.pop(0)
            else:
                rows.pop(0)

        return Table(rows, column_names, column_types, row_names=row_names)
Beispiel #11
0
    def from_csv(cls,
                 path,
                 column_names=None,
                 column_types=None,
                 row_names=None,
                 skip_lines=0,
                 header=True,
                 sniff_limit=0,
                 encoding='utf-8',
                 **kwargs):
        """
        Create a new table from a CSV.

        This method uses agate's builtin CSV reader, which supplies encoding
        support for both Python 2 and Python 3.

        :code:`kwargs` will be passed through to the CSV reader.

        :param path:
            Filepath or file-like object from which to read CSV data.
        :param column_names:
            See :meth:`.Table.__init__`.
        :param column_types:
            See :meth:`.Table.__init__`.
        :param row_names:
            See :meth:`.Table.__init__`.
        :param skip_lines:
            Either a single number indicating the number of lines to skip from
            the top of the file or a sequence of line indexes to skip where the
            first line is index 0.
        :param header:
            If `True`, the first row of the CSV is assumed to contains headers
            and will be skipped. If `header` and `column_names` are both
            specified then a row will be skipped, but `column_names` will be
            used.
        :param sniff_limit:
            Limit CSV dialect sniffing to the specified number of bytes. Set to
            None to sniff the entire file. Defaults to 0 or no sniffing.
        :param encoding:
            Character encoding of the CSV file. Note: if passing in a file
            handle it is assumed you have already opened it with the correct
            encoding specified.
        """
        if hasattr(path, 'read'):
            lines = path.readlines()
        else:
            with io.open(path, encoding=encoding) as f:
                lines = f.readlines()

        if utils.issequence(skip_lines):
            lines = [
                line for i, line in enumerate(lines) if i not in skip_lines
            ]
            contents = ''.join(lines)
        elif isinstance(skip_lines, int):
            contents = ''.join(lines[skip_lines:])
        else:
            raise ValueError('skip_lines argument must be an int or sequence')

        if sniff_limit is None:
            kwargs['dialect'] = csv.Sniffer().sniff(contents)
        elif sniff_limit > 0:
            kwargs['dialect'] = csv.Sniffer().sniff(contents[:sniff_limit])

        if six.PY2:
            contents = contents.encode('utf-8')

        rows = list(csv.reader(six.StringIO(contents), header=header,
                               **kwargs))

        if header:
            if column_names is None:
                column_names = rows.pop(0)
            else:
                rows.pop(0)

        return Table(rows, column_names, column_types, row_names=row_names)