def _verify_for_rename( self, name: List[Name]) -> List[Label]: # type: ignore[override] if is_list_like(name): if self._internal.index_level != len(name): raise ValueError( "Length of new names must be {}, got {}".format( self._internal.index_level, len(name))) if any(not is_hashable(n) for n in name): raise TypeError("MultiIndex.name must be a hashable type") return [n if is_name_like_tuple(n) else (n, ) for n in name] else: raise TypeError("Must pass list-like as `names`.")
def cast_to_category_pd(df: pd.DataFrame, deep: bool = True) -> pd.DataFrame: """ Automatically converts columns of pandas DataFrame that are worth stored as ``category`` dtype. To be casted a column must not be numerical, must be hashable and must have less than 50% of unique values. Parameters ---------- df : pd.DataFrame DataFrame with the columns to cast. deep : bool, default True Whether or not to perform a deep copy of the original DataFrame. Returns ------- pd.DataFrame Optimized copy of the input DataFrame. Examples -------- >>> import pandas as pd >>> columns = ['name', 'age', 'country'] >>> df = pd.DataFrame([['John', 24, 'China'], ... ['Mary', 20, 'China'], ... ['Jane', 25, 'Switzerland'], ... ['Greg', 23, 'China'], ... ['James', 28, 'China']], ... columns=columns) >>> df name age country 0 John 24 China 1 Jane 25 Switzerland 2 James 28 China >>> df.dtypes name object age int64 country object dtype: object >>> df_optimized = cast_to_category_pd(df) >>> df_optimized.dtypes name object age int64 country category dtype: object """ return (df.copy(deep=deep).astype({ col: 'category' for col in df.columns if (df[col].dtype == 'object' and is_hashable(df[col].iloc[0]) and df[col].nunique() / df[col].shape[0] < 0.5) }))
def __new__(cls, data=None, categories=None, ordered=None, dtype=None, copy=False, name=None): if not is_hashable(name): raise TypeError("Index.name must be a hashable type") if isinstance(data, (Series, Index)): if dtype is None: dtype = "category" return Index(data, dtype=dtype, copy=copy, name=name) return pp.from_pandas( pd.CategoricalIndex( data=data, categories=categories, ordered=ordered, dtype=dtype, name=name ) )
def __new__( cls, data: Optional[Any] = None, dtype: Optional[Union[str, Dtype]] = None, copy: bool = False, name: Optional[Union[Any, Tuple]] = None, ) -> "Int64Index": if not is_hashable(name): raise TypeError("Index.name must be a hashable type") if isinstance(data, (Series, Index)): if dtype is None: dtype = "int64" return cast(Int64Index, Index(data, dtype=dtype, copy=copy, name=name)) return cast( Int64Index, ps.from_pandas(pd.Int64Index(data=data, dtype=dtype, copy=copy, name=name)) )
def __new__( cls, data=None, freq=_NoValue, normalize=False, closed=None, ambiguous="raise", dayfirst=False, yearfirst=False, dtype=None, copy=False, name=None, ) -> "DatetimeIndex": if not is_hashable(name): raise TypeError("Index.name must be a hashable type") if isinstance(data, (Series, Index)): if dtype is None: dtype = "datetime64[ns]" return cast(DatetimeIndex, Index(data, dtype=dtype, copy=copy, name=name)) kwargs = dict( data=data, normalize=normalize, closed=closed, ambiguous=ambiguous, dayfirst=dayfirst, yearfirst=yearfirst, dtype=dtype, copy=copy, name=name, ) if freq is not _NoValue: kwargs["freq"] = freq return cast(DatetimeIndex, ps.from_pandas(pd.DatetimeIndex(**kwargs)))
def _set_name(self, name): if not is_hashable(name): raise TypeError( f"{type(self).__name__}.name must be a hashable type") self._name = name