def from_csv(cls, path, column_names=None, column_types=None, row_names=None, header=True, **kwargs): """ Create a new table for a CSV. This method uses agate's builtin CSV reader, which supports unicode on both Python 2 and Python 3. `kwargs` will be passed through to the CSV reader. :param path: Filepath or file-like object from which to read CSV data. :param column_names: See :meth:`Table.__init__`. :param column_types: See :meth:`Table.__init__`. :param row_names: See :meth:`Table.__init__`. :param header: If `True`, the first row of the CSV is assumed to contains headers and will be skipped. If `header` and `column_names` are both specified then a row will be skipped, but `column_names` will be used. """ if hasattr(path, 'read'): rows = list(csv.reader(path, **kwargs)) else: with open(path) as f: rows = list(csv.reader(f, **kwargs)) if header: if column_names is None: column_names = rows.pop(0) else: rows.pop(0) return Table(rows, column_names, column_types, row_names=row_names)
def from_csv(cls, path, column_names=None, column_types=None, row_names=None, header=True, **kwargs): """ Create a new table for a CSV. This method uses agate's builtin CSV reader, which supports unicode on both Python 2 and Python 3. If any rows are shorter than the longest row, `None`s will be added to fill gaps. `kwargs` will be passed through to the CSV reader. :param path: Filepath or file-like object from which to read CSV data. :param column_names: See :meth:`Table.__init__`. :param column_types: See :meth:`Table.__init__`. :param row_names: See :meth:`Table.__init__`. :param header: If `True`, the first row of the CSV is assumed to contains headers and will be skipped. If `header` and `column_names` are both specified then a row will be skipped, but `column_names` will be used. """ if hasattr(path, 'read'): rows = list(csv.reader(path, **kwargs)) else: with open(path) as f: rows = list(csv.reader(f, **kwargs)) max_length = max(map(len, rows)) for row in rows: if len(row) < max_length: row.extend([None] * (max_length - len(row))) if header: if column_names is None: column_names = rows.pop(0) else: rows.pop(0) return Table(rows, column_names, column_types, row_names=row_names)
def test_reader_alias(self): with open('examples/test.csv') as f: reader = csv_py2.reader(f, encoding='utf-8') self.assertEqual(next(reader), ['one', 'two', 'three']) self.assertEqual(next(reader), ['1', '4', 'a']) self.assertEqual(next(reader), ['2', '3', 'b']) self.assertEqual(next(reader), ['', '2', u'👍'])
def from_csv(cls, path, column_info=None, row_names=None, header=True, **kwargs): """ Create a new table for a CSV. This method uses agate's builtin CSV reader, which supports unicode on both Python 2 and Python 3. ``kwargs`` will be passed through to the CSV reader. :param path: Filepath or file-like object from which to read CSV data. :param column_info: May be any valid input to :meth:`Table.__init__` or an instance of :class:`.TypeTester`. Or, None, in which case a generic :class:`.TypeTester` will be created. :param row_names: See :meth:`Table.__init__`. :param header: If `True`, the first row of the CSV is assumed to contains headers and will be skipped. """ if column_info is None: column_info = TypeTester() use_inference = isinstance(column_info, TypeTester) if hasattr(path, 'read'): rows = list(csv.reader(path, **kwargs)) else: with open(path) as f: rows = list(csv.reader(f, **kwargs)) if header: column_names = rows.pop(0) else: column_names = [None] * len(rows[0]) if use_inference: column_info = column_info.run(rows, column_names) else: if len(column_names) != len(column_info): # TKTK Better Error raise ValueError('CSV contains more columns than were specified.') return Table(rows, column_info, row_names=row_names)
def from_csv(cls, path, column_names=None, column_types=None, row_names=None, header=True, sniff_limit=0, encoding='utf-8', **kwargs): """ Create a new table from a CSV. This method uses agate's builtin CSV reader, which supplies encoding support for both Python 2 and Python 3. :code:`kwargs` will be passed through to the CSV reader. :param path: Filepath or file-like object from which to read CSV data. :param column_names: See :meth:`.Table.__init__`. :param column_types: See :meth:`.Table.__init__`. :param row_names: See :meth:`.Table.__init__`. :param header: If `True`, the first row of the CSV is assumed to contains headers and will be skipped. If `header` and `column_names` are both specified then a row will be skipped, but `column_names` will be used. :param sniff_limit: Limit CSV dialect sniffing to the specified number of bytes. Set to None to sniff the entire file. Defaults to 0 or no sniffing. :param encoding: Character encoding of the CSV file. Note: if passing in a file handle it is assumed you have already opened it with the correct encoding specified. """ if hasattr(path, 'read'): contents = path.read() else: with io.open(path, encoding=encoding) as f: contents = f.read() if sniff_limit is None: kwargs['dialect'] = csv.Sniffer().sniff(contents) elif sniff_limit > 0: kwargs['dialect'] = csv.Sniffer().sniff(contents[:sniff_limit]) if six.PY2: contents = contents.encode('utf-8') rows = list(csv.reader(six.StringIO(contents), header=header, **kwargs)) if header: if column_names is None: column_names = rows.pop(0) else: rows.pop(0) return Table(rows, column_names, column_types, row_names=row_names)
def test_writer_alias(self): output = six.StringIO() writer = csv_py2.writer(output, encoding='utf-8') self.assertEqual(writer._eight_bit, True) writer.writerow(['a', 'b', 'c']) writer.writerow(['1', '2', '3']) writer.writerow(['4', '5', u'ʤ']) written = six.StringIO(output.getvalue()) reader = csv_py2.reader(written, encoding='utf-8') self.assertEqual(next(reader), ['a', 'b', 'c']) self.assertEqual(next(reader), ['1', '2', '3']) self.assertEqual(next(reader), ['4', '5', u'ʤ'])
def test_writer_alias(self): output = six.StringIO() writer = csv_py2.writer(output, encoding="utf-8") self.assertEqual(writer._eight_bit, True) writer.writerow(["a", "b", "c"]) writer.writerow(["1", "2", "3"]) writer.writerow(["4", "5", u"ʤ"]) written = six.StringIO(output.getvalue()) reader = csv_py2.reader(written, encoding="utf-8") self.assertEqual(next(reader), ["a", "b", "c"]) self.assertEqual(next(reader), ["1", "2", "3"]) self.assertEqual(next(reader), ["4", "5", u"ʤ"])
def from_csv(cls, path, column_names=None, column_types=None, row_names=None, header=True, sniff_limit=0, **kwargs): """ Create a new table for a CSV. This method uses agate's builtin CSV reader, which supports unicode on both Python 2 and Python 3. `kwargs` will be passed through to the CSV reader. :param path: Filepath or file-like object from which to read CSV data. :param column_names: See :meth:`Table.__init__`. :param column_types: See :meth:`Table.__init__`. :param row_names: See :meth:`Table.__init__`. :param header: If `True`, the first row of the CSV is assumed to contains headers and will be skipped. If `header` and `column_names` are both specified then a row will be skipped, but `column_names` will be used. :param sniff_limit: Limit CSV dialect sniffing to the specified number of bytes. Set to None to sniff the entire file. Defaults to 0 or no sniffing. """ if hasattr(path, 'read'): contents = path.read() else: with open(path) as f: contents = f.read() if sniff_limit is None: kwargs['dialect'] = csv.Sniffer().sniff(contents) elif sniff_limit > 0: kwargs['dialect'] = csv.Sniffer().sniff(contents[:sniff_limit]) rows = list(csv.reader(StringIO(contents), header=header, **kwargs)) if header: if column_names is None: column_names = rows.pop(0) else: rows.pop(0) return Table(rows, column_names, column_types, row_names=row_names)
def from_csv(cls, path, column_names=None, column_types=None, row_names=None, skip_lines=0, header=True, sniff_limit=0, encoding='utf-8', **kwargs): """ Create a new table from a CSV. This method uses agate's builtin CSV reader, which supplies encoding support for both Python 2 and Python 3. :code:`kwargs` will be passed through to the CSV reader. :param path: Filepath or file-like object from which to read CSV data. :param column_names: See :meth:`.Table.__init__`. :param column_types: See :meth:`.Table.__init__`. :param row_names: See :meth:`.Table.__init__`. :param skip_lines: Either a single number indicating the number of lines to skip from the top of the file or a sequence of line indexes to skip where the first line is index 0. :param header: If `True`, the first row of the CSV is assumed to contains headers and will be skipped. If `header` and `column_names` are both specified then a row will be skipped, but `column_names` will be used. :param sniff_limit: Limit CSV dialect sniffing to the specified number of bytes. Set to None to sniff the entire file. Defaults to 0 or no sniffing. :param encoding: Character encoding of the CSV file. Note: if passing in a file handle it is assumed you have already opened it with the correct encoding specified. """ if hasattr(path, 'read'): lines = path.readlines() else: with io.open(path, encoding=encoding) as f: lines = f.readlines() if utils.issequence(skip_lines): lines = [ line for i, line in enumerate(lines) if i not in skip_lines ] contents = ''.join(lines) elif isinstance(skip_lines, int): contents = ''.join(lines[skip_lines:]) else: raise ValueError('skip_lines argument must be an int or sequence') if sniff_limit is None: kwargs['dialect'] = csv.Sniffer().sniff(contents) elif sniff_limit > 0: kwargs['dialect'] = csv.Sniffer().sniff(contents[:sniff_limit]) if six.PY2: contents = contents.encode('utf-8') rows = list(csv.reader(six.StringIO(contents), header=header, **kwargs)) if header: if column_names is None: column_names = rows.pop(0) else: rows.pop(0) return Table(rows, column_names, column_types, row_names=row_names)