def read_csv( filepath_or_buffer, sep=",", header=0, index_col=None, names=None, skiprows=None, na_values=None, parse_dates=False, date_parser=None, nrows=None, iterator=False, chunksize=None, skip_footer=0, converters=None, verbose=False, delimiter=None, encoding=None, ): if hasattr(filepath_or_buffer, "read"): f = filepath_or_buffer else: try: # universal newline mode f = com._get_handle(filepath_or_buffer, "U", encoding=encoding) except Exception: # pragma: no cover f = com._get_handle(filepath_or_buffer, "r", encoding=encoding) if delimiter is not None: sep = delimiter if date_parser is not None: parse_dates = True parser = TextParser( f, header=header, index_col=index_col, names=names, na_values=na_values, parse_dates=parse_dates, date_parser=date_parser, skiprows=skiprows, delimiter=sep, chunksize=chunksize, skip_footer=skip_footer, converters=converters, verbose=verbose, encoding=encoding, ) if nrows is not None: return parser.get_chunk(nrows) elif chunksize or iterator: return parser return parser.get_chunk()
def read_csv(filepath_or_buffer, sep=',', header=0, index_col=None, names=None, skiprows=None, na_values=None, parse_dates=False, date_parser=None, nrows=None, iterator=False, chunksize=None, skip_footer=0, converters=None, verbose=False, delimiter=None, encoding=None): if hasattr(filepath_or_buffer, 'read'): f = filepath_or_buffer else: try: # universal newline mode f = com._get_handle(filepath_or_buffer, 'U', encoding=encoding) except Exception: # pragma: no cover f = com._get_handle(filepath_or_buffer, 'r', encoding=encoding) if delimiter is not None: sep = delimiter if date_parser is not None: parse_dates = True parser = TextParser(f, header=header, index_col=index_col, names=names, na_values=na_values, parse_dates=parse_dates, date_parser=date_parser, skiprows=skiprows, delimiter=sep, chunksize=chunksize, skip_footer=skip_footer, converters=converters, verbose=verbose, encoding=encoding) if nrows is not None: return parser.get_chunk(nrows) elif chunksize or iterator: return parser return parser.get_chunk()
def _read(cls, filepath_or_buffer, kwds): "Generic reader of line files." encoding = kwds.get("encoding", None) if isinstance(filepath_or_buffer, str) and _is_url(filepath_or_buffer): from urllib2 import urlopen filepath_or_buffer = urlopen(filepath_or_buffer) if py3compat.PY3: # pragma: no cover from io import TextIOWrapper if encoding: errors = "strict" else: errors = "replace" encoding = "utf-8" bytes = filepath_or_buffer.read() filepath_or_buffer = StringIO(bytes.decode(encoding, errors)) if hasattr(filepath_or_buffer, "read"): f = filepath_or_buffer else: try: # universal newline mode f = com._get_handle(filepath_or_buffer, "U", encoding=encoding) except Exception: # pragma: no cover f = com._get_handle(filepath_or_buffer, "r", encoding=encoding) if kwds.get("date_parser", None) is not None: if isinstance(kwds["parse_dates"], bool): kwds["parse_dates"] = True # Extract some of the arguments (pass chunksize on). kwds.pop("filepath_or_buffer") iterator = kwds.pop("iterator") nrows = kwds.pop("nrows") chunksize = kwds.get("chunksize", None) # Create the parser. parser = cls(f, **kwds) if nrows is not None: return parser.get_chunk(nrows) elif chunksize or iterator: return parser return parser.get_chunk()
def _read(cls, filepath_or_buffer, kwds): "Generic reader of line files." encoding = kwds.get('encoding', None) if isinstance(filepath_or_buffer, str) and _is_url(filepath_or_buffer): from urllib2 import urlopen filepath_or_buffer = urlopen(filepath_or_buffer) if py3compat.PY3: # pragma: no cover from io import TextIOWrapper if encoding: errors = 'strict' else: errors = 'replace' encoding = 'utf-8' bytes = filepath_or_buffer.read() filepath_or_buffer = StringIO(bytes.decode(encoding, errors)) if hasattr(filepath_or_buffer, 'read'): f = filepath_or_buffer else: try: # universal newline mode f = com._get_handle(filepath_or_buffer, 'U', encoding=encoding) except Exception: # pragma: no cover f = com._get_handle(filepath_or_buffer, 'r', encoding=encoding) if kwds.get('date_parser', None) is not None: if isinstance(kwds['parse_dates'], bool): kwds['parse_dates'] = True # Extract some of the arguments (pass chunksize on). kwds.pop('filepath_or_buffer') iterator = kwds.pop('iterator') nrows = kwds.pop('nrows') chunksize = kwds.get('chunksize', None) # Create the parser. parser = cls(f, **kwds) if nrows is not None: return parser.get_chunk(nrows) elif chunksize or iterator: return parser return parser.get_chunk()
def my_to_csv(self, path_or_buf, sep=",", na_rep='', float_format=None, cols=None, header=True, index=True, index_label=None, mode='w', nanRep=None, encoding=None, quoting=None, line_terminator='\n', write_dtypes=None): """ Write DataFrame to a comma-separated values (csv) file Parameters ---------- path_or_buf : string or file handle / StringIO File path sep : character, default "," Field delimiter for the output file. na_rep : string, default '' Missing data representation float_format : string, default None Format string for floating point numbers cols : sequence, optional Columns to write header : boolean or list of string, default True Write out column names. If a list of string is given it is assumed to be aliases for the column names index : boolean, default True Write row names (index) index_label : string or sequence, or False, default None Column label for index column(s) if desired. If None is given, and `header` and `index` are True, then the index names are used. A sequence should be given if the DataFrame uses MultiIndex. If False do not print fields for index names. Use index_label=False for easier importing in R nanRep : deprecated, use na_rep mode : Python write mode, default 'w' encoding : string, optional a string representing the encoding to use if the contents are non-ascii, for python versions prior to 3 line_terminator: string, default '\n' The newline character or character sequence to use in the output file quoting : optional constant from csv module defaults to csv.QUOTE_MINIMAL """ if nanRep is not None: # pragma: no cover import warnings warnings.warn("nanRep is deprecated, use na_rep", FutureWarning) na_rep = nanRep if hasattr(path_or_buf, 'read'): f = path_or_buf close = False else: f = com._get_handle(path_or_buf, mode, encoding=encoding) close = True if quoting is None: quoting = csv.QUOTE_MINIMAL try: if encoding is not None: csvout = com.UnicodeWriter(f, lineterminator=line_terminator, delimiter=sep, encoding=encoding, quoting=quoting) else: csvout = csv.writer(f, lineterminator=line_terminator, delimiter=sep, quoting=quoting) self._helper_csv(csvout, na_rep=na_rep, float_format=float_format, cols=cols, header=header, index=index, index_label=index_label, write_dtypes=write_dtypes) finally: if close: f.close()