Example #1
0
def read_csv(
    filepath_or_buffer,
    sep=",",
    header=0,
    index_col=None,
    names=None,
    skiprows=None,
    na_values=None,
    parse_dates=False,
    date_parser=None,
    nrows=None,
    iterator=False,
    chunksize=None,
    skip_footer=0,
    converters=None,
    verbose=False,
    delimiter=None,
    encoding=None,
):
    if hasattr(filepath_or_buffer, "read"):
        f = filepath_or_buffer
    else:
        try:
            # universal newline mode
            f = com._get_handle(filepath_or_buffer, "U", encoding=encoding)
        except Exception:  # pragma: no cover
            f = com._get_handle(filepath_or_buffer, "r", encoding=encoding)

    if delimiter is not None:
        sep = delimiter

    if date_parser is not None:
        parse_dates = True

    parser = TextParser(
        f,
        header=header,
        index_col=index_col,
        names=names,
        na_values=na_values,
        parse_dates=parse_dates,
        date_parser=date_parser,
        skiprows=skiprows,
        delimiter=sep,
        chunksize=chunksize,
        skip_footer=skip_footer,
        converters=converters,
        verbose=verbose,
        encoding=encoding,
    )

    if nrows is not None:
        return parser.get_chunk(nrows)
    elif chunksize or iterator:
        return parser

    return parser.get_chunk()
Example #2
0
def read_csv(filepath_or_buffer,
             sep=',',
             header=0,
             index_col=None,
             names=None,
             skiprows=None,
             na_values=None,
             parse_dates=False,
             date_parser=None,
             nrows=None,
             iterator=False,
             chunksize=None,
             skip_footer=0,
             converters=None,
             verbose=False,
             delimiter=None,
             encoding=None):
    if hasattr(filepath_or_buffer, 'read'):
        f = filepath_or_buffer
    else:
        try:
            # universal newline mode
            f = com._get_handle(filepath_or_buffer, 'U', encoding=encoding)
        except Exception:  # pragma: no cover
            f = com._get_handle(filepath_or_buffer, 'r', encoding=encoding)

    if delimiter is not None:
        sep = delimiter

    if date_parser is not None:
        parse_dates = True

    parser = TextParser(f,
                        header=header,
                        index_col=index_col,
                        names=names,
                        na_values=na_values,
                        parse_dates=parse_dates,
                        date_parser=date_parser,
                        skiprows=skiprows,
                        delimiter=sep,
                        chunksize=chunksize,
                        skip_footer=skip_footer,
                        converters=converters,
                        verbose=verbose,
                        encoding=encoding)

    if nrows is not None:
        return parser.get_chunk(nrows)
    elif chunksize or iterator:
        return parser

    return parser.get_chunk()
Example #3
0
def _read(cls, filepath_or_buffer, kwds):
    "Generic reader of line files."
    encoding = kwds.get("encoding", None)

    if isinstance(filepath_or_buffer, str) and _is_url(filepath_or_buffer):
        from urllib2 import urlopen

        filepath_or_buffer = urlopen(filepath_or_buffer)
        if py3compat.PY3:  # pragma: no cover
            from io import TextIOWrapper

            if encoding:
                errors = "strict"
            else:
                errors = "replace"
                encoding = "utf-8"
            bytes = filepath_or_buffer.read()
            filepath_or_buffer = StringIO(bytes.decode(encoding, errors))

    if hasattr(filepath_or_buffer, "read"):
        f = filepath_or_buffer
    else:
        try:
            # universal newline mode
            f = com._get_handle(filepath_or_buffer, "U", encoding=encoding)
        except Exception:  # pragma: no cover
            f = com._get_handle(filepath_or_buffer, "r", encoding=encoding)

    if kwds.get("date_parser", None) is not None:
        if isinstance(kwds["parse_dates"], bool):
            kwds["parse_dates"] = True

    # Extract some of the arguments (pass chunksize on).
    kwds.pop("filepath_or_buffer")
    iterator = kwds.pop("iterator")
    nrows = kwds.pop("nrows")
    chunksize = kwds.get("chunksize", None)

    # Create the parser.
    parser = cls(f, **kwds)

    if nrows is not None:
        return parser.get_chunk(nrows)
    elif chunksize or iterator:
        return parser

    return parser.get_chunk()
Example #4
0
def _read(cls, filepath_or_buffer, kwds):
    "Generic reader of line files."
    encoding = kwds.get('encoding', None)

    if isinstance(filepath_or_buffer, str) and _is_url(filepath_or_buffer):
        from urllib2 import urlopen
        filepath_or_buffer = urlopen(filepath_or_buffer)
        if py3compat.PY3:  # pragma: no cover
            from io import TextIOWrapper
            if encoding:
                errors = 'strict'
            else:
                errors = 'replace'
                encoding = 'utf-8'
            bytes = filepath_or_buffer.read()
            filepath_or_buffer = StringIO(bytes.decode(encoding, errors))

    if hasattr(filepath_or_buffer, 'read'):
        f = filepath_or_buffer
    else:
        try:
            # universal newline mode
            f = com._get_handle(filepath_or_buffer, 'U', encoding=encoding)
        except Exception: # pragma: no cover
            f = com._get_handle(filepath_or_buffer, 'r', encoding=encoding)

    if kwds.get('date_parser', None) is not None:
        if isinstance(kwds['parse_dates'], bool):
            kwds['parse_dates'] = True

    # Extract some of the arguments (pass chunksize on).
    kwds.pop('filepath_or_buffer')
    iterator = kwds.pop('iterator')
    nrows = kwds.pop('nrows')
    chunksize = kwds.get('chunksize', None)

    # Create the parser.
    parser = cls(f, **kwds)

    if nrows is not None:
        return parser.get_chunk(nrows)
    elif chunksize or iterator:
        return parser

    return parser.get_chunk()
Example #5
0
def _read(cls, filepath_or_buffer, kwds):
    "Generic reader of line files."
    encoding = kwds.get('encoding', None)

    if isinstance(filepath_or_buffer, str) and _is_url(filepath_or_buffer):
        from urllib2 import urlopen
        filepath_or_buffer = urlopen(filepath_or_buffer)
        if py3compat.PY3:  # pragma: no cover
            from io import TextIOWrapper
            if encoding:
                errors = 'strict'
            else:
                errors = 'replace'
                encoding = 'utf-8'
            bytes = filepath_or_buffer.read()
            filepath_or_buffer = StringIO(bytes.decode(encoding, errors))

    if hasattr(filepath_or_buffer, 'read'):
        f = filepath_or_buffer
    else:
        try:
            # universal newline mode
            f = com._get_handle(filepath_or_buffer, 'U', encoding=encoding)
        except Exception:  # pragma: no cover
            f = com._get_handle(filepath_or_buffer, 'r', encoding=encoding)

    if kwds.get('date_parser', None) is not None:
        if isinstance(kwds['parse_dates'], bool):
            kwds['parse_dates'] = True

    # Extract some of the arguments (pass chunksize on).
    kwds.pop('filepath_or_buffer')
    iterator = kwds.pop('iterator')
    nrows = kwds.pop('nrows')
    chunksize = kwds.get('chunksize', None)

    # Create the parser.
    parser = cls(f, **kwds)

    if nrows is not None:
        return parser.get_chunk(nrows)
    elif chunksize or iterator:
        return parser

    return parser.get_chunk()
Example #6
0
def my_to_csv(self, path_or_buf, sep=",", na_rep='', float_format=None,
               cols=None, header=True, index=True, index_label=None,
               mode='w', nanRep=None, encoding=None, quoting=None,
               line_terminator='\n', write_dtypes=None):
        """
        Write DataFrame to a comma-separated values (csv) file

        Parameters
        ----------
        path_or_buf : string or file handle / StringIO
            File path
        sep : character, default ","
            Field delimiter for the output file.
        na_rep : string, default ''
            Missing data representation
        float_format : string, default None
            Format string for floating point numbers
        cols : sequence, optional
            Columns to write
        header : boolean or list of string, default True
            Write out column names. If a list of string is given it is
            assumed to be aliases for the column names
        index : boolean, default True
            Write row names (index)
        index_label : string or sequence, or False, default None
            Column label for index column(s) if desired. If None is given, and
            `header` and `index` are True, then the index names are used. A
            sequence should be given if the DataFrame uses MultiIndex.  If
            False do not print fields for index names. Use index_label=False
            for easier importing in R
        nanRep : deprecated, use na_rep
        mode : Python write mode, default 'w'
        encoding : string, optional
            a string representing the encoding to use if the contents are
            non-ascii, for python versions prior to 3
        line_terminator: string, default '\n'
            The newline character or character sequence to use in the output
            file
        quoting : optional constant from csv module
            defaults to csv.QUOTE_MINIMAL
        """
        if nanRep is not None:  # pragma: no cover
            import warnings
            warnings.warn("nanRep is deprecated, use na_rep",
                          FutureWarning)
            na_rep = nanRep

        if hasattr(path_or_buf, 'read'):
            f = path_or_buf
            close = False
        else:
            f = com._get_handle(path_or_buf, mode, encoding=encoding)
            close = True

        if quoting is None:
            quoting = csv.QUOTE_MINIMAL

        try:
            if encoding is not None:
                csvout = com.UnicodeWriter(f, lineterminator=line_terminator,
                                           delimiter=sep, encoding=encoding,
                                           quoting=quoting)
            else:
                csvout = csv.writer(f, lineterminator=line_terminator,
                                    delimiter=sep, quoting=quoting)
            self._helper_csv(csvout, na_rep=na_rep,
                             float_format=float_format, cols=cols,
                             header=header, index=index,
                             index_label=index_label, write_dtypes=write_dtypes)

        finally:
            if close:
                f.close()