def test_is_categorical(): cat = pd.Categorical([1, 2, 3]) assert com.is_categorical(cat) assert com.is_categorical(pd.Series(cat)) assert com.is_categorical(pd.CategoricalIndex([1, 2, 3])) assert not com.is_categorical([1, 2, 3])
def test_is_categorical(): cat = pd.Categorical([1, 2, 3]) with tm.assert_produces_warning(FutureWarning): assert com.is_categorical(cat) assert com.is_categorical(pd.Series(cat)) assert com.is_categorical(pd.CategoricalIndex([1, 2, 3])) assert not com.is_categorical([1, 2, 3])
def test_basic(self): assert is_categorical_dtype(self.dtype) factor = Categorical(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']) s = Series(factor, name='A') # dtypes assert is_categorical_dtype(s.dtype) assert is_categorical_dtype(s) assert not is_categorical_dtype(np.dtype('float64')) assert is_categorical(s.dtype) assert is_categorical(s) assert not is_categorical(np.dtype('float64')) assert not is_categorical(1.0)
def get_indexer( self, target: AnyArrayLike, method: Optional[str] = None, limit: Optional[int] = None, tolerance: Optional[Any] = None, ) -> np.ndarray: self._check_method(method) if self.is_overlapping: raise InvalidIndexError("cannot handle overlapping indices; " "use IntervalIndex.get_indexer_non_unique") target_as_index = ensure_index(target) if isinstance(target_as_index, IntervalIndex): # equal indexes -> 1:1 positional match if self.equals(target_as_index): return np.arange(len(self), dtype="intp") # different closed or incompatible subtype -> no matches common_subtype = find_common_type( [self.dtype.subtype, target_as_index.dtype.subtype]) if self.closed != target_as_index.closed or is_object_dtype( common_subtype): return np.repeat(np.intp(-1), len(target_as_index)) # non-overlapping -> at most one match per interval in target_as_index # want exact matches -> need both left/right to match, so defer to # left/right get_indexer, compare elementwise, equality -> match left_indexer = self.left.get_indexer(target_as_index.left) right_indexer = self.right.get_indexer(target_as_index.right) indexer = np.where(left_indexer == right_indexer, left_indexer, -1) elif is_categorical(target_as_index): # get an indexer for unique categories then propagate to codes via take_1d categories_indexer = self.get_indexer(target_as_index.categories) indexer = take_1d(categories_indexer, target_as_index.codes, fill_value=-1) elif not is_object_dtype(target_as_index): # homogeneous scalar index: use IntervalTree target_as_index = self._maybe_convert_i8(target_as_index) indexer = self._engine.get_indexer(target_as_index.values) else: # heterogeneous scalar index: defer elementwise to get_loc # (non-overlapping so get_loc guarantees scalar of KeyError) indexer = [] for key in target_as_index: try: loc = self.get_loc(key) except KeyError: loc = -1 except InvalidIndexError as err: # i.e. non-scalar key raise TypeError(key) from err indexer.append(loc) return ensure_platform_int(indexer)
def test_basic(self): assert is_categorical_dtype(self.dtype) factor = Categorical(["a", "b", "b", "a", "a", "c", "c", "c"]) s = Series(factor, name="A") # dtypes assert is_categorical_dtype(s.dtype) assert is_categorical_dtype(s) assert not is_categorical_dtype(np.dtype("float64")) assert is_categorical(s.dtype) assert is_categorical(s) assert not is_categorical(np.dtype("float64")) assert not is_categorical(1.0)
def test_basic(self): self.assertTrue(is_categorical_dtype(self.dtype)) factor = Categorical(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']) s = Series(factor, name='A') # dtypes self.assertTrue(is_categorical_dtype(s.dtype)) self.assertTrue(is_categorical_dtype(s)) self.assertFalse(is_categorical_dtype(np.dtype('float64'))) self.assertTrue(is_categorical(s.dtype)) self.assertTrue(is_categorical(s)) self.assertFalse(is_categorical(np.dtype('float64'))) self.assertFalse(is_categorical(1.0))
def test_basic(self, dtype): assert is_categorical_dtype(dtype) factor = Categorical(["a", "b", "b", "a", "a", "c", "c", "c"]) s = Series(factor, name="A") # dtypes assert is_categorical_dtype(s.dtype) assert is_categorical_dtype(s) assert not is_categorical_dtype(np.dtype("float64")) with tm.assert_produces_warning(FutureWarning): # GH#33385 deprecated assert is_categorical(s.dtype) assert is_categorical(s) assert not is_categorical(np.dtype("float64")) assert not is_categorical(1.0)
def _from_values_or_dtype(cls, values=None, categories=None, ordered=None, dtype=None): """ Construct dtype from the input parameters used in :class:`Categorical`. This constructor method specifically does not do the factorization step, if that is needed to find the categories. This constructor may therefore return ``CategoricalDtype(categories=None, ordered=None)``, which may not be useful. Additional steps may therefore have to be taken to create the final dtype. The return dtype is specified from the inputs in this prioritized order: 1. if dtype is a CategoricalDtype, return dtype 2. if dtype is the string 'category', create a CategoricalDtype from the supplied categories and ordered parameters, and return that. 3. if values is a categorical, use value.dtype, but override it with categories and ordered if either/both of those are not None. 4. if dtype is None and values is not a categorical, construct the dtype from categories and ordered, even if either of those is None. Parameters ---------- values : list-like, optional The list-like must be 1-dimensional. categories : list-like, optional Categories for the CategoricalDtype. ordered : bool, optional Designating if the categories are ordered. dtype : CategoricalDtype or the string "category", optional If ``CategoricalDtype``, cannot be used together with `categories` or `ordered`. Returns ------- CategoricalDtype Examples -------- >>> CategoricalDtype._from_values_or_dtype() CategoricalDtype(categories=None, ordered=None) >>> CategoricalDtype._from_values_or_dtype(categories=['a', 'b'], ... ordered=True) CategoricalDtype(categories=['a', 'b'], ordered=True) >>> dtype1 = CategoricalDtype(['a', 'b'], ordered=True) >>> dtype2 = CategoricalDtype(['x', 'y'], ordered=False) >>> c = Categorical([0, 1], dtype=dtype1, fastpath=True) >>> CategoricalDtype._from_values_or_dtype(c, ['x', 'y'], ordered=True, ... dtype=dtype2) ValueError: Cannot specify `categories` or `ordered` together with `dtype`. The supplied dtype takes precedence over values' dtype: >>> CategoricalDtype._from_values_or_dtype(c, dtype=dtype2) CategoricalDtype(['x', 'y'], ordered=False) """ from pandas.core.dtypes.common import is_categorical if dtype is not None: # The dtype argument takes precedence over values.dtype (if any) if isinstance(dtype, str): if dtype == 'category': dtype = CategoricalDtype(categories, ordered) else: msg = "Unknown dtype {dtype!r}" raise ValueError(msg.format(dtype=dtype)) elif categories is not None or ordered is not None: raise ValueError("Cannot specify `categories` or `ordered` " "together with `dtype`.") elif is_categorical(values): # If no "dtype" was passed, use the one from "values", but honor # the "ordered" and "categories" arguments dtype = values.dtype._from_categorical_dtype(values.dtype, categories, ordered) else: # If dtype=None and values is not categorical, create a new dtype. # Note: This could potentially have categories=None and # ordered=None. dtype = CategoricalDtype(categories, ordered) return dtype
def _from_values_or_dtype(cls, values=None, categories=None, ordered=None, dtype=None): """ Construct dtype from the input parameters used in :class:`Categorical`. This constructor method specifically does not do the factorization step, if that is needed to find the categories. This constructor may therefore return ``CategoricalDtype(categories=None, ordered=None)``, which may not be useful. Additional steps may therefore have to be taken to create the final dtype. The return dtype is specified from the inputs in this prioritized order: 1. if dtype is a CategoricalDtype, return dtype 2. if dtype is the string 'category', create a CategoricalDtype from the supplied categories and ordered parameters, and return that. 3. if values is a categorical, use value.dtype, but override it with categories and ordered if either/both of those are not None. 4. if dtype is None and values is not a categorical, construct the dtype from categories and ordered, even if either of those is None. Parameters ---------- values : list-like, optional The list-like must be 1-dimensional. categories : list-like, optional Categories for the CategoricalDtype. ordered : bool, optional Designating if the categories are ordered. dtype : CategoricalDtype or the string "category", optional If ``CategoricalDtype``, cannot be used together with `categories` or `ordered`. Returns ------- CategoricalDtype Examples -------- >>> CategoricalDtype._from_values_or_dtype() CategoricalDtype(categories=None, ordered=None) >>> CategoricalDtype._from_values_or_dtype(categories=['a', 'b'], ... ordered=True) CategoricalDtype(categories=['a', 'b'], ordered=True) >>> dtype1 = CategoricalDtype(['a', 'b'], ordered=True) >>> dtype2 = CategoricalDtype(['x', 'y'], ordered=False) >>> c = Categorical([0, 1], dtype=dtype1, fastpath=True) >>> CategoricalDtype._from_values_or_dtype(c, ['x', 'y'], ordered=True, ... dtype=dtype2) ValueError: Cannot specify `categories` or `ordered` together with `dtype`. The supplied dtype takes precedence over values' dtype: >>> CategoricalDtype._from_values_or_dtype(c, dtype=dtype2) CategoricalDtype(['x', 'y'], ordered=False) """ from pandas.core.dtypes.common import is_categorical if dtype is not None: # The dtype argument takes precedence over values.dtype (if any) if isinstance(dtype, compat.string_types): if dtype == 'category': dtype = CategoricalDtype(categories, ordered) else: msg = "Unknown dtype {dtype!r}" raise ValueError(msg.format(dtype=dtype)) elif categories is not None or ordered is not None: raise ValueError("Cannot specify `categories` or `ordered` " "together with `dtype`.") elif is_categorical(values): # If no "dtype" was passed, use the one from "values", but honor # the "ordered" and "categories" arguments dtype = values.dtype._from_categorical_dtype( values.dtype, categories, ordered) else: # If dtype=None and values is not categorical, create a new dtype. # Note: This could potentially have categories=None and # ordered=None. dtype = CategoricalDtype(categories, ordered) return dtype
def test_is_categorical_deprecation(): # GH#33385 with tm.assert_produces_warning(FutureWarning): com.is_categorical([1, 2, 3])