Ejemplo n.º 1
0
    def replace(self, to_replace, value):
        """
        Replace values given in *to_replace* with *value*.

        Parameters
        ----------
        to_replace : numeric, str or list-like
            Value(s) to replace.

            * numeric or str:

                - values equal to *to_replace* will be replaced with *value*

            * list of numeric or str:

                - If *value* is also list-like, *to_replace* and *value* must
                be of same length.
        value : numeric, str, list-like, or dict
            Value(s) to replace `to_replace` with.

        See also
        --------
        Series.fillna

        Returns
        -------
        result : Series
            Series after replacement. The mask and index are preserved.
        """
        if not is_scalar(to_replace):
            if is_scalar(value):
                value = utils.scalar_broadcast_to(
                    value, (len(to_replace),), np.dtype(type(value))
                )
        else:
            if not is_scalar(value):
                raise TypeError(
                    "Incompatible types '{}' and '{}' "
                    "for *to_replace* and *value*.".format(
                        type(to_replace).__name__, type(value).__name__
                    )
                )
            to_replace = [to_replace]
            value = [value]

        if len(to_replace) != len(value):
            raise ValueError(
                "Replacement lists must be"
                "of same length."
                "Expected {}, got {}.".format(len(to_replace), len(value))
            )

        if is_dict_like(to_replace) or is_dict_like(value):
            raise TypeError("Dict-like args not supported in Series.replace()")

        result = self._column.find_and_replace(to_replace, value)

        return self._copy_construct(data=result)
Ejemplo n.º 2
0
    def __call__(self, arg):
        if is_scalar(arg):
            ret = pd.to_datetime(
                arg,
                errors=self._errors,
                dayfirst=self._dayfirst,
                yearfirst=self._yearfirst,
                utc=self._utc,
                format=self._format,
                exact=self._exact,
                unit=self._unit,
                infer_datetime_format=self._infer_datetime_format,
                origin=self._origin,
                cache=self._cache)
            return astensor(ret)

        dtype = np.datetime64(1, 'ns').dtype
        if isinstance(arg, (pd.Series, SERIES_TYPE)):
            arg = asseries(arg)
            return self.new_series([arg],
                                   shape=arg.shape,
                                   dtype=dtype,
                                   index_value=arg.index_value,
                                   name=arg.name)
        if is_dict_like(arg) or isinstance(arg, DATAFRAME_TYPE):
            arg = asdataframe(arg)
            columns = arg.columns_value.to_pandas().tolist()
            if sorted(columns) != sorted(['year', 'month', 'day']):
                missing = ','.join(c for c in ['day', 'month', 'year']
                                   if c not in columns)
                raise ValueError(
                    'to assemble mappings requires at least '
                    f'that [year, month, day] be specified: [{missing}] is missing'
                )
            return self.new_series([arg],
                                   shape=(arg.shape[0], ),
                                   dtype=dtype,
                                   index_value=arg.index_value)
        elif isinstance(arg, (pd.Index, INDEX_TYPE)):
            arg = asindex(arg)
            return self.new_index([arg],
                                  shape=arg.shape,
                                  dtype=dtype,
                                  index_value=parse_index(
                                      pd.Index([], dtype=dtype), self._params,
                                      arg),
                                  name=arg.name)
        else:
            arg = astensor(arg)
            if arg.ndim != 1:
                raise TypeError('arg must be a string, datetime, '
                                'list, tuple, 1-d tensor, or Series')
            return self.new_index([arg],
                                  shape=arg.shape,
                                  dtype=dtype,
                                  index_value=parse_index(
                                      pd.Index([], dtype=dtype), self._params,
                                      arg))
 def _write_one_pair(key, value):
     if is_scalar(value):
         if type(value).__module__ == 'numpy':
             value = value.item()
         scalar_dict[key] = value
     elif isinstance(value, np.ndarray):
         self.write_array(sub_group, key, value)
     elif isinstance(value, pd.DataFrame):
         self.write_dataframe(sub_group, key, value)
     elif is_dict_like(value):
         self.write_mapping(sub_group, key, value)
     elif issparse(value):
         assert isinstance(value, csr_matrix)
         self.write_csr(sub_group, key, value)
     else:
         # assume value is either list or tuple, converting it to np.ndarray
         self.write_array(sub_group, key, value.astype(str) if is_categorical_dtype(value) else np.array(value))
Ejemplo n.º 4
0
def convert_agg_func(agg_func):
    if isinstance(agg_func, str):
        if agg_func not in _SUPPORTED_AGGS:
            raise err._unsupported_error(
                f"Unsupported aggregation method: {agg_func}")
        return (agg_func, _NUMERIC_ONLY[agg_func])
    elif is_dict_like(agg_func):
        converted = {}
        for col, func in agg_func.items():
            funcs = util.to_list_if_scalar(convert_agg_func(func))
            converted[col] = funcs
        return converted
    elif is_list_like(agg_func):
        return [convert_agg_func(func) for func in agg_func]
    else:
        raise err._unsupported_error(
            f"Unsupported aggregation descriptor: {agg_func}")
Ejemplo n.º 5
0
    def format_index(self, formatter):
        """
        Format the text display value of index.

        .. versionadded:: 0.18.0

        Parameters
        ----------
        formatter : str, callable, or dict

        Returns
        -------
        self : Styler

        Notes
        -----

        ``formatter`` is either an ``a`` or a dict ``{index name: a}`` where
        ``a`` is one of

        - str: this will be wrapped in: ``a.format(x)``
        - callable: called with the value of an individual cell

        The default display value for index is "str(index)".

        Examples
        --------

        >>> df = pd.DataFrame(
                {'a': range(3), 'b': range(3)}, index=['c', 'd', 'e']
            )
        >>> styler = df.style.format_index({'d': lambda x: f'Index {x}'})
        >>> styler.render()
        """
        if is_dict_like(formatter):
            for index, index_formatter in formatter.items():
                index_formatter = _maybe_wrap_formatter(index_formatter)
                index_num = self.data.index.get_loc(index)
                self._display_index_funcs[index_num] = index_formatter
        else:
            for index_num in range(len(self.data)):
                index_formatter = _maybe_wrap_formatter(formatter)
                self._display_index_funcs[index_num] = index_formatter
        return self
Ejemplo n.º 6
0
    def __init__(
        self,
        data=None,
        index=None,
        columns=None,
        dtype=None,
        copy=False,
        frame=None,
    ):
        # TODO: We would want to hide the frame argument from the users,
        #       as it is intended only for internal uses
        if frame is not None:
            assert index is None
            assert dtype is None
            assert columns is not None
            assert len(columns) == len(frame._columns)
            self._frame = frame
            self._set_columns(columns)

        elif isinstance(data, type(self)):
            self._construct_from_dataframe(data, index, columns, dtype, copy)

        elif isinstance(data, Frame):
            self._construct_from_series(data, index, columns, dtype, copy)

        elif (not _is_pandas_container(data) and is_dict_like(data)
              and len(data) > 0):
            if all(isinstance(val, Frame) for val in data.values()):
                self._construct_from_frames(data, index, columns, dtype, copy)
            elif all(
                    hasattr(val, "__legate_data_interface__")
                    for val in data.values()):
                self._construct_from_legate_containers(data, index, columns,
                                                       dtype, copy)
            else:
                self._construct_fallback(data, index, columns, dtype, copy)

        else:
            self._construct_fallback(data, index, columns, dtype, copy)

        assert self._columns is not None
Ejemplo n.º 7
0
    def format(self, formatter, subset=None):
        """
        Format the text display value of cells.

        .. versionadded:: 0.18.0

        Parameters
        ----------
        formatter : str, callable, or dict
        subset : IndexSlice
            An argument to ``DataFrame.loc`` that restricts which elements
            ``formatter`` is applied to.

        Returns
        -------
        self : Styler

        Notes
        -----

        ``formatter`` is either an ``a`` or a dict ``{column name: a}`` where
        ``a`` is one of

        - str: this will be wrapped in: ``a.format(x)``
        - callable: called with the value of an individual cell

        The default display value for numeric values is the "general" (``g``)
        format with ``pd.options.display.precision`` precision.

        Examples
        --------

        >>> df = pd.DataFrame(np.random.randn(4, 2), columns=['a', 'b'])
        >>> df.style.format("{:.2%}")
        >>> df['c'] = ['a', 'b', 'c', 'd']
        >>> df.style.format({'c': str.upper})
        """
        if subset is None:
            row_locs = range(len(self.data))
            col_locs = range(len(self.data.columns))
        else:
            subset = _non_reducing_slice(subset)
            if len(subset) == 1:
                subset = subset, self.data.columns

            sub_df = self.data.loc[subset]
            row_locs = self.data.index.get_indexer_for(sub_df.index)
            col_locs = self.data.columns.get_indexer_for(sub_df.columns)

        if is_dict_like(formatter):
            for col, col_formatter in formatter.items():
                # formatter must be callable, so '{}' are converted to lambdas
                col_formatter = _maybe_wrap_formatter(col_formatter)
                col_num = self.data.columns.get_indexer_for([col])[0]

                for row_num in row_locs:
                    self._display_funcs[(row_num, col_num)] = col_formatter
        else:
            # single scalar to format all cells with
            locs = product(*(row_locs, col_locs))
            for i, j in locs:
                formatter = _maybe_wrap_formatter(formatter)
                self._display_funcs[(i, j)] = formatter
        return self
Ejemplo n.º 8
0
    def format(self, formatter, subset=None):
        """
        Format the text display value of cells.

        .. versionadded:: 0.18.0

        Parameters
        ----------
        formatter : str, callable, or dict
        subset : IndexSlice
            An argument to ``DataFrame.loc`` that restricts which elements
            ``formatter`` is applied to.

        Returns
        -------
        self : Styler

        Notes
        -----

        ``formatter`` is either an ``a`` or a dict ``{column name: a}`` where
        ``a`` is one of

        - str: this will be wrapped in: ``a.format(x)``
        - callable: called with the value of an individual cell

        The default display value for numeric values is the "general" (``g``)
        format with ``pd.options.display.precision`` precision.

        Examples
        --------

        >>> df = pd.DataFrame(np.random.randn(4, 2), columns=['a', 'b'])
        >>> df.style.format("{:.2%}")
        >>> df['c'] = ['a', 'b', 'c', 'd']
        >>> df.style.format({'c': str.upper})
        """
        if subset is None:
            row_locs = range(len(self.data))
            col_locs = range(len(self.data.columns))
        else:
            subset = _non_reducing_slice(subset)
            if len(subset) == 1:
                subset = subset, self.data.columns

            sub_df = self.data.loc[subset]
            row_locs = self.data.index.get_indexer_for(sub_df.index)
            col_locs = self.data.columns.get_indexer_for(sub_df.columns)

        if is_dict_like(formatter):
            for col, col_formatter in formatter.items():
                # formatter must be callable, so '{}' are converted to lambdas
                col_formatter = _maybe_wrap_formatter(col_formatter)
                col_num = self.data.columns.get_indexer_for([col])[0]

                for row_num in row_locs:
                    self._display_funcs[(row_num, col_num)] = col_formatter
        else:
            # single scalar to format all cells with
            locs = product(*(row_locs, col_locs))
            for i, j in locs:
                formatter = _maybe_wrap_formatter(formatter)
                self._display_funcs[(i, j)] = formatter
        return self
Ejemplo n.º 9
0
    def rename_categories(
        self, new_categories: Union[list, dict, Callable], inplace: bool = False
    ) -> Optional["ps.Series"]:
        """
        Rename categories.

        Parameters
        ----------
        new_categories : list-like, dict-like or callable

            New categories which will replace old categories.

            * list-like: all items must be unique and the number of items in
              the new categories must match the existing number of categories.

            * dict-like: specifies a mapping from
              old categories to new. Categories not contained in the mapping
              are passed through and extra categories in the mapping are
              ignored.

            * callable : a callable that is called on all items in the old
              categories and whose return values comprise the new categories.

        inplace : bool, default False
            Whether or not to rename the categories inplace or return a copy of
            this categorical with renamed categories.

            .. deprecated:: 3.2.0

        Returns
        -------
        cat : Series or None
            Categorical with removed categories or None if ``inplace=True``.

        Raises
        ------
        ValueError
            If new categories are list-like and do not have the same number of
            items than the current categories or do not validate as categories

        See Also
        --------
        reorder_categories : Reorder categories.
        add_categories : Add new categories.
        remove_categories : Remove the specified categories.
        remove_unused_categories : Remove categories which are not used.
        set_categories : Set the categories to the specified ones.

        Examples
        --------
        >>> s = ps.Series(["a", "a", "b"], dtype="category")
        >>> s.cat.rename_categories([0, 1])  # doctest: +SKIP
        0    0
        1    0
        2    1
        dtype: category
        Categories (2, int64): [0, 1]

        For dict-like ``new_categories``, extra keys are ignored and
        categories not in the dictionary are passed through

        >>> s.cat.rename_categories({'a': 'A', 'c': 'C'})  # doctest: +SKIP
        0    A
        1    A
        2    b
        dtype: category
        Categories (2, object): ['A', 'b']

        You may also provide a callable to create the new categories

        >>> s.cat.rename_categories(lambda x: x.upper())  # doctest: +SKIP
        0    A
        1    A
        2    B
        dtype: category
        Categories (2, object): ['A', 'B']
        """
        from pyspark.pandas.frame import DataFrame

        if inplace:
            warnings.warn(
                "The `inplace` parameter in rename_categories is deprecated "
                "and will be removed in a future version.",
                FutureWarning,
            )

        if is_dict_like(new_categories):
            categories = [cast(dict, new_categories).get(item, item) for item in self.categories]
        elif callable(new_categories):
            categories = [new_categories(item) for item in self.categories]
        elif is_list_like(new_categories):
            if len(self.categories) != len(new_categories):
                raise ValueError(
                    "new categories need to have the same number of items as the old categories!"
                )
            categories = cast(list, new_categories)
        else:
            raise TypeError("new_categories must be list-like, dict-like or callable.")

        internal = self._data._psdf._internal.with_new_spark_column(
            self._data._column_label,
            self._data.spark.column,
            field=self._data._internal.data_fields[0].copy(
                dtype=CategoricalDtype(categories=categories, ordered=self.ordered)
            ),
        )

        if inplace:
            self._data._psdf._update_internal_frame(internal)
            return None
        else:
            return DataFrame(internal)._psser_for(self._data._column_label).copy()
Ejemplo n.º 10
0
    def _groupby_reduce(self, ops=None):
        columns = self._df._get_columns()
        dtypes = self._df._frame.dtypes

        valid_ops = {}
        valid_columns = []

        size_reduction = len(ops) == 1 and ops[0][0] == "size"
        # If the ops are given as a list, apply them across all the columns
        # with compatible data types
        if isinstance(ops, list):
            key_indices = set(self._keys)
            for col_idx, col in enumerate(columns):
                if col_idx in key_indices:
                    continue

                if reduction.incompatible_ops(ops, dtypes[col_idx]):
                    continue

                valid_ops[col_idx] = [desc[0] for desc in ops]
                valid_columns.append(col)
                # Special case with the size reduction, which produces a single
                # output regardless of the number of input columns
                if size_reduction:
                    break

        # If the ops are passed in a dictionary, it also specifies the input
        # columns on which the aggregation are performed
        else:
            assert is_dict_like(ops)
            for col, descs in ops.items():
                col_idx = columns.get_indexer_for([col])
                if len(col_idx) > 1:
                    raise KeyError(f"ambiguous column name {col}")
                if col_idx[0] == -1:
                    raise KeyError(col)

                if reduction.incompatible_ops(descs, dtypes[col_idx[0]]):
                    continue

                valid_ops[col_idx[0]] = [desc[0] for desc in descs]
                valid_columns.append(col)

        frame = self._df._frame.groupby_reduce(self._keys, valid_ops,
                                               self._method, self._sort)

        # If more than one aggregation is requested for a column,
        # the output column names should use MultiIndex
        multi_aggs = any(len(set(ops)) > 1 for ops in valid_ops.values())

        def _generate_columns(columns, all_ops):
            if multi_aggs:
                from pandas import MultiIndex

                pairs = []
                for idx, ops in all_ops.items():
                    pairs.extend([(columns[idx], op) for op in ops])
                index = MultiIndex.from_tuples(pairs)

                if self._is_series_groupby:
                    index = index.droplevel(0)

                return index

            else:
                from pandas import Index

                return Index([columns[idx] for idx in all_ops.keys()])

        from .dataframe import DataFrame

        if self._as_index:
            # Groupby keys are rearranged to come first in the frame,
            # no matter where they were in the input frame, so the
            # indexer should be picking the first N keys in the frame,
            # where N is the number of keys
            indexer = list(range(len(self._keys)))
            index_columns = frame.select_columns(indexer)

            # However, the index names are derived from the input
            # dataframe, which is not rearranged, so we use the original
            # indexer to select the names
            index_names = columns[self._keys]

            value_names = _generate_columns(columns, valid_ops)

            # Once we find the index columns, we drop them from the frame
            frame = frame.drop_columns(indexer)
            frame = frame.set_index(index_columns, index_names)

            if size_reduction or (self._is_series_groupby and not multi_aggs):
                # Size reduction always produces a series
                from .series import Series

                return Series(frame=frame, name=value_names[0])
            else:
                return DataFrame(frame=frame, columns=value_names)

        else:
            # Index levels don't survive in the output when as_index is False
            levels = set(self._levels)
            keys = [key for key in self._keys if key not in levels]

            key_names = columns[keys]
            value_names = _generate_columns(columns, valid_ops)

            # If the column names are stored in a MultiIndex,
            # we should extend the key names to match the shape
            if multi_aggs:
                from pandas import MultiIndex

                key_names = MultiIndex.from_arrays(
                    [key_names, [""] * len(key_names)])

            value_names = key_names.append(value_names)
            frame = frame.drop_columns(self._levels)
            return DataFrame(frame=frame, columns=value_names)
Ejemplo n.º 11
0
    def fillna(
        self,
        value=None,
        method=None,
        axis=None,
        inplace=False,
        limit=None,
        downcast=None,
    ):
        axis = self._get_axis_number(axis, 0)
        inplace = validate_bool_kwarg(inplace, "inplace")

        if axis not in (0, ):
            raise err._unsupported_error("axis", axis)

        if value is None and method is None:
            raise ValueError("must specify a fill method or value")

        if value is not None and method is not None:
            raise ValueError("cannot specify both a fill method and value")

        # Checks on method

        if method is not None:
            raise err._unsupported_error("method", method)

        if method is not None and method not in [
                "backfill",
                "bfill",
                "pad",
                "ffill",
        ]:
            expecting = "pad (ffill) or backfill (bfill)"
            msg = "Invalid fill method. Expecting {expecting}. Got {method}"
            msg = msg.format(expecting=expecting, method=method)
            raise ValueError(msg)

        # Checks on limit

        if limit is not None:
            raise err._unsupported_error("limit", limit)

        if limit is not None:
            if not isinstance(limit, int):
                raise ValueError("Limit must be an integer")
            elif limit <= 0:
                raise ValueError("Limit must be greater than 0")

        # Checks on value

        if isinstance(value, (list, tuple)):
            raise TypeError("'value' parameter must be a scalar or dict, but "
                            f"you passed a {type(value).__name__}")

        if is_scalar(value):
            values = {}
            for idx in range(len(self._get_columns())):
                values[idx] = util.sanitize_scalar(value)

        elif is_dict_like(value):
            if self._is_series:
                raise err._unsupported_error(
                    "'value' cannot be a dict for series")

            values = {}
            for col, val in value.items():
                if not is_scalar(val):
                    raise err._unsupported_error(
                        "'value' must be a dict of scalars for now")
                idxr = self.columns.get_indexer_for([col])
                if idxr[0] != -1:
                    values[idxr[0]] = util.sanitize_scalar(val)

        new_frame = self._frame.fillna(values)
        return self._create_or_update_frame(new_frame, inplace)
Ejemplo n.º 12
0
def read_csv(
    filepath_or_buffer,
    sep=",",
    delimiter=None,
    header="infer",
    names=None,
    index_col=None,
    usecols=None,
    prefix=None,
    mangle_dupe_cols=True,
    dtype=None,
    true_values=None,
    false_values=None,
    skiprows=None,
    skipfooter=0,
    nrows=None,
    na_values=None,
    skip_blank_lines=True,
    parse_dates=False,
    compression="infer",
    quotechar='"',
    quoting=0,
    doublequote=True,
    verify_header=False,
    **kwargs,
    # TODO: Put back these options once we figure out how to support them
    #       with the Arrows CSV reader.
    # skipinitialspace=False,  # GPU only
    # keep_default_na=True,  # GPU only
    # na_filter=True,  # GPU only
    # dayfirst=False, # GPU only
    # thousands=None,  # GPU only
    # decimal=".",  # GPU only
    # lineterminator=None, # GPU only
    # comment=None,  # GPU only
    # delim_whitespace=False,  # GPU only
):

    # Checks on filepath_or_buffer
    paths = util.to_list_if_scalar(filepath_or_buffer)

    if any(not isinstance(path, str) for path in paths):
        raise err._unsupported_error(
            "'filepath_or_buffer' must be a string or a list of strings")
    if len(paths) == 0:
        raise ValueError("'filepath_or_buffer' must be a non-empty list")

    for path in paths:
        if not os.path.exists(path):
            raise ValueError(f"{path} does not exist")

    if not isinstance(compression, str):
        raise err._unsupported_error("compression", compression)
    compressions = [
        _parse_compression(infer_compression(path, compression))
        for path in paths
    ]

    # Checks on sep and delimiter
    if sep is None and delimiter is None:
        raise ValueError("at least one of 'sep' or 'delimiter' must be given")
    sep = delimiter if delimiter is not None else sep
    if len(sep) > 1:
        raise ValueError("'sep' must be a 1-character string")

    # Checks on sep and delimiter
    if header == "infer":
        header = 0 if names is None else None

    if header not in (
            0,
            None,
    ):
        raise err._unsupported_error("header", header)

    # Checks on skiprows, kipfooter, and nrows
    skiprows = 0 if skiprows is None else skiprows
    if not is_integer(skiprows):
        raise ValueError("'skiprows' must be an integer")
    if not is_integer(skipfooter):
        raise ValueError("'skipfooter' must be an integer")
    if not (nrows is None or is_integer(nrows)):
        raise ValueError("'nrows' must be None or an integer")

    # If either column names or dtype is missing, infer them by parsing
    # the first few of lines using Pandas
    # FIXME: We should use cuDF for this
    if names is None or dtype is None:
        engine = ("python" if skipfooter > 0 else "c", )
        column_names, dtypes = _extract_header_using_pandas(
            paths[0],
            sep,
            header,
            names,
            dtype,
            true_values,
            false_values,
            skiprows,
            na_values,
            skip_blank_lines,
            parse_dates,
            compression,
            quotechar,
            quoting,
            doublequote,
            engine,
            peek_rows=3,
        )
        if verify_header:
            for path in paths[1:]:
                result = _extract_header_using_pandas(
                    path,
                    sep,
                    header,
                    names,
                    dtype,
                    true_values,
                    false_values,
                    skiprows,
                    na_values,
                    skip_blank_lines,
                    parse_dates,
                    compression,
                    quotechar,
                    quoting,
                    doublequote,
                    engine,
                    peek_rows=3,
                )
                if not column_names.equals(result[0]):
                    raise ValueError(
                        f"{paths[0]} and {path} have different headers")

    else:
        column_names = pandas.Index(names)

        if is_dict_like(dtype):
            dtypes = []
            for name in names:
                if name not in dtype:
                    raise ValueError(f"'dtype' has no entry for '{name}'")
                dtypes.append(_ensure_dtype(dtype[name]))
        elif is_list_like(dtype):
            raise err._unsupported_error(
                "'dtype' must be a string, a dtype, or a dictionary")
        else:
            dtype = _ensure_dtype(dtype)
            dtypes = [dtype] * len(names)

    if column_names.has_duplicates:
        raise ValueError("Header must not have any duplicates")

    # Checks on unsupported options
    if prefix is not None:
        raise err._unsupported_error("prefix", prefix)
    if mangle_dupe_cols not in (True, ):
        raise err._unsupported_error("mangle_dupe_cols", mangle_dupe_cols)

    # If there was a header in the file, we should skip that line as well
    if header == 0:
        skiprows += 1

    # Checks on parse_dates
    _ERR_MSG_PARSE_DATES = (
        "'parse_dates' must be a list of integers or strings for now")

    if is_dict_like(parse_dates):
        raise err._unsupported_error(_ERR_MSG_PARSE_DATES)

    parse_dates = parse_dates if parse_dates is not False else []
    if not is_list_like(parse_dates):
        raise err._unsupported_error(_ERR_MSG_PARSE_DATES)

    date_cols = _get_indexer(column_names, parse_dates, "parse_dates")

    # Override dtypes for the datetime columns
    for idx in date_cols:
        dtypes[idx] = ty.ts_ns

    # If a column is given a datetime dtype but not added to the parse_dates,
    # we should record it
    for idx, dtype in enumerate(dtypes):
        if idx not in parse_dates:
            parse_dates.append(idx)

    # Checks on quoting
    if quoting != 0:
        raise err._unsupported_error("quoting", quoting)
    if len(quotechar) > 1:
        raise ValueError("'quotechar' must be a 1-character string")

    # Checks on index_col
    index_col = None if index_col is False else index_col
    if index_col is not None:
        if is_integer(index_col) or isinstance(index_col, str):
            index_col = [index_col]
        if not is_list_like(index_col):
            raise err._unsupported_error("index_col", index_col)
        index_col = _get_indexer(column_names, index_col, "index_col")

    # Checks on true_values, false_values, and na_values
    _check_string_list(true_values, "true_values")
    _check_string_list(false_values, "false_values")
    _check_string_list(na_values, "na_values")

    # Checks on nrows
    if skipfooter != 0 and nrows is not None:
        raise ValueError("'skipfooter' not supported with 'nrows'")

    df = DataFrame(
        frame=io.read_csv(
            paths,
            sep=sep,
            usecols=usecols,
            dtypes=dtypes,
            true_values=true_values,
            false_values=false_values,
            skiprows=skiprows,
            skipfooter=skipfooter,
            nrows=nrows,
            na_values=na_values,
            skip_blank_lines=skip_blank_lines,
            date_cols=date_cols,
            compressions=compressions,
            quotechar=quotechar,
            quoting=quoting,
            doublequote=doublequote,
        ),
        columns=column_names,
    )

    if index_col is not None:
        df = df.set_index(column_names[index_col])
        # Make sure we reset the names for unnamed indices
        names = df._raw_index.names
        names = [
            None if name.startswith("Unnamed") else name for name in names
        ]
        df._raw_index.names = names

    return df