Exemple #1
0
 def _verify_for_rename(
         self, name: List[Name]) -> List[Label]:  # type: ignore[override]
     if is_list_like(name):
         if self._internal.index_level != len(name):
             raise ValueError(
                 "Length of new names must be {}, got {}".format(
                     self._internal.index_level, len(name)))
         if any(not is_hashable(n) for n in name):
             raise TypeError("MultiIndex.name must be a hashable type")
         return [n if is_name_like_tuple(n) else (n, ) for n in name]
     else:
         raise TypeError("Must pass list-like as `names`.")
Exemple #2
0
def cast_to_category_pd(df: pd.DataFrame, deep: bool = True) -> pd.DataFrame:
    """
    Automatically converts columns of pandas DataFrame that are worth stored as ``category`` dtype.

    To be casted a column must not be numerical, must be hashable and must have less than 50%
    of unique values.

    Parameters
    ----------
    df : pd.DataFrame
        DataFrame with the columns to cast.
    deep : bool, default True
        Whether or not to perform a deep copy of the original DataFrame.

    Returns
    -------
    pd.DataFrame
        Optimized copy of the input DataFrame.

    Examples
    --------
    >>> import pandas as pd
    >>> columns = ['name', 'age', 'country']
    >>> df = pd.DataFrame([['John', 24, 'China'],
    ...                    ['Mary', 20, 'China'],
    ...                    ['Jane', 25, 'Switzerland'],
    ...                    ['Greg', 23, 'China'],
    ...                    ['James', 28, 'China']],
    ...                   columns=columns)
    >>> df
        name  age      country
    0   John   24        China
    1   Jane   25  Switzerland
    2  James   28        China
    >>> df.dtypes
    name       object
    age         int64
    country    object
    dtype: object
    >>> df_optimized = cast_to_category_pd(df)
    >>> df_optimized.dtypes
    name       object
    age         int64
    country  category
    dtype: object
    """
    return (df.copy(deep=deep).astype({
        col: 'category'
        for col in df.columns
        if (df[col].dtype == 'object' and is_hashable(df[col].iloc[0])
            and df[col].nunique() / df[col].shape[0] < 0.5)
    }))
Exemple #3
0
    def __new__(cls, data=None, categories=None, ordered=None, dtype=None, copy=False, name=None):
        if not is_hashable(name):
            raise TypeError("Index.name must be a hashable type")

        if isinstance(data, (Series, Index)):
            if dtype is None:
                dtype = "category"
            return Index(data, dtype=dtype, copy=copy, name=name)

        return pp.from_pandas(
            pd.CategoricalIndex(
                data=data, categories=categories, ordered=ordered, dtype=dtype, name=name
            )
        )
Exemple #4
0
    def __new__(
        cls,
        data: Optional[Any] = None,
        dtype: Optional[Union[str, Dtype]] = None,
        copy: bool = False,
        name: Optional[Union[Any, Tuple]] = None,
    ) -> "Int64Index":
        if not is_hashable(name):
            raise TypeError("Index.name must be a hashable type")

        if isinstance(data, (Series, Index)):
            if dtype is None:
                dtype = "int64"
            return cast(Int64Index, Index(data, dtype=dtype, copy=copy, name=name))

        return cast(
            Int64Index, ps.from_pandas(pd.Int64Index(data=data, dtype=dtype, copy=copy, name=name))
        )
Exemple #5
0
    def __new__(
        cls,
        data=None,
        freq=_NoValue,
        normalize=False,
        closed=None,
        ambiguous="raise",
        dayfirst=False,
        yearfirst=False,
        dtype=None,
        copy=False,
        name=None,
    ) -> "DatetimeIndex":
        if not is_hashable(name):
            raise TypeError("Index.name must be a hashable type")

        if isinstance(data, (Series, Index)):
            if dtype is None:
                dtype = "datetime64[ns]"
            return cast(DatetimeIndex,
                        Index(data, dtype=dtype, copy=copy, name=name))

        kwargs = dict(
            data=data,
            normalize=normalize,
            closed=closed,
            ambiguous=ambiguous,
            dayfirst=dayfirst,
            yearfirst=yearfirst,
            dtype=dtype,
            copy=copy,
            name=name,
        )
        if freq is not _NoValue:
            kwargs["freq"] = freq

        return cast(DatetimeIndex, ps.from_pandas(pd.DatetimeIndex(**kwargs)))
Exemple #6
0
 def _set_name(self, name):
     if not is_hashable(name):
         raise TypeError(
             f"{type(self).__name__}.name must be a hashable type")
     self._name = name