Esempio n. 1
0
def test_is_categorical():
    cat = pd.Categorical([1, 2, 3])
    assert com.is_categorical(cat)
    assert com.is_categorical(pd.Series(cat))
    assert com.is_categorical(pd.CategoricalIndex([1, 2, 3]))

    assert not com.is_categorical([1, 2, 3])
Esempio n. 2
0
def test_is_categorical():
    cat = pd.Categorical([1, 2, 3])
    assert com.is_categorical(cat)
    assert com.is_categorical(pd.Series(cat))
    assert com.is_categorical(pd.CategoricalIndex([1, 2, 3]))

    assert not com.is_categorical([1, 2, 3])
Esempio n. 3
0
def test_is_categorical():
    cat = pd.Categorical([1, 2, 3])
    with tm.assert_produces_warning(FutureWarning):
        assert com.is_categorical(cat)
        assert com.is_categorical(pd.Series(cat))
        assert com.is_categorical(pd.CategoricalIndex([1, 2, 3]))

        assert not com.is_categorical([1, 2, 3])
Esempio n. 4
0
    def test_basic(self):
        assert is_categorical_dtype(self.dtype)

        factor = Categorical(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'])

        s = Series(factor, name='A')

        # dtypes
        assert is_categorical_dtype(s.dtype)
        assert is_categorical_dtype(s)
        assert not is_categorical_dtype(np.dtype('float64'))

        assert is_categorical(s.dtype)
        assert is_categorical(s)
        assert not is_categorical(np.dtype('float64'))
        assert not is_categorical(1.0)
Esempio n. 5
0
    def get_indexer(
        self,
        target: AnyArrayLike,
        method: Optional[str] = None,
        limit: Optional[int] = None,
        tolerance: Optional[Any] = None,
    ) -> np.ndarray:

        self._check_method(method)

        if self.is_overlapping:
            raise InvalidIndexError("cannot handle overlapping indices; "
                                    "use IntervalIndex.get_indexer_non_unique")

        target_as_index = ensure_index(target)

        if isinstance(target_as_index, IntervalIndex):
            # equal indexes -> 1:1 positional match
            if self.equals(target_as_index):
                return np.arange(len(self), dtype="intp")

            # different closed or incompatible subtype -> no matches
            common_subtype = find_common_type(
                [self.dtype.subtype, target_as_index.dtype.subtype])
            if self.closed != target_as_index.closed or is_object_dtype(
                    common_subtype):
                return np.repeat(np.intp(-1), len(target_as_index))

            # non-overlapping -> at most one match per interval in target_as_index
            # want exact matches -> need both left/right to match, so defer to
            # left/right get_indexer, compare elementwise, equality -> match
            left_indexer = self.left.get_indexer(target_as_index.left)
            right_indexer = self.right.get_indexer(target_as_index.right)
            indexer = np.where(left_indexer == right_indexer, left_indexer, -1)
        elif is_categorical(target_as_index):
            # get an indexer for unique categories then propagate to codes via take_1d
            categories_indexer = self.get_indexer(target_as_index.categories)
            indexer = take_1d(categories_indexer,
                              target_as_index.codes,
                              fill_value=-1)
        elif not is_object_dtype(target_as_index):
            # homogeneous scalar index: use IntervalTree
            target_as_index = self._maybe_convert_i8(target_as_index)
            indexer = self._engine.get_indexer(target_as_index.values)
        else:
            # heterogeneous scalar index: defer elementwise to get_loc
            # (non-overlapping so get_loc guarantees scalar of KeyError)
            indexer = []
            for key in target_as_index:
                try:
                    loc = self.get_loc(key)
                except KeyError:
                    loc = -1
                except InvalidIndexError as err:
                    # i.e. non-scalar key
                    raise TypeError(key) from err
                indexer.append(loc)

        return ensure_platform_int(indexer)
Esempio n. 6
0
    def test_basic(self):

        assert is_categorical_dtype(self.dtype)

        factor = Categorical(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'])

        s = Series(factor, name='A')

        # dtypes
        assert is_categorical_dtype(s.dtype)
        assert is_categorical_dtype(s)
        assert not is_categorical_dtype(np.dtype('float64'))

        assert is_categorical(s.dtype)
        assert is_categorical(s)
        assert not is_categorical(np.dtype('float64'))
        assert not is_categorical(1.0)
Esempio n. 7
0
    def test_basic(self):

        assert is_categorical_dtype(self.dtype)

        factor = Categorical(["a", "b", "b", "a", "a", "c", "c", "c"])

        s = Series(factor, name="A")

        # dtypes
        assert is_categorical_dtype(s.dtype)
        assert is_categorical_dtype(s)
        assert not is_categorical_dtype(np.dtype("float64"))

        assert is_categorical(s.dtype)
        assert is_categorical(s)
        assert not is_categorical(np.dtype("float64"))
        assert not is_categorical(1.0)
Esempio n. 8
0
    def test_basic(self):

        self.assertTrue(is_categorical_dtype(self.dtype))

        factor = Categorical(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'])

        s = Series(factor, name='A')

        # dtypes
        self.assertTrue(is_categorical_dtype(s.dtype))
        self.assertTrue(is_categorical_dtype(s))
        self.assertFalse(is_categorical_dtype(np.dtype('float64')))

        self.assertTrue(is_categorical(s.dtype))
        self.assertTrue(is_categorical(s))
        self.assertFalse(is_categorical(np.dtype('float64')))
        self.assertFalse(is_categorical(1.0))
Esempio n. 9
0
    def test_basic(self):

        self.assertTrue(is_categorical_dtype(self.dtype))

        factor = Categorical(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'])

        s = Series(factor, name='A')

        # dtypes
        self.assertTrue(is_categorical_dtype(s.dtype))
        self.assertTrue(is_categorical_dtype(s))
        self.assertFalse(is_categorical_dtype(np.dtype('float64')))

        self.assertTrue(is_categorical(s.dtype))
        self.assertTrue(is_categorical(s))
        self.assertFalse(is_categorical(np.dtype('float64')))
        self.assertFalse(is_categorical(1.0))
Esempio n. 10
0
    def test_basic(self, dtype):

        assert is_categorical_dtype(dtype)

        factor = Categorical(["a", "b", "b", "a", "a", "c", "c", "c"])

        s = Series(factor, name="A")

        # dtypes
        assert is_categorical_dtype(s.dtype)
        assert is_categorical_dtype(s)
        assert not is_categorical_dtype(np.dtype("float64"))

        with tm.assert_produces_warning(FutureWarning):
            # GH#33385 deprecated
            assert is_categorical(s.dtype)
            assert is_categorical(s)
            assert not is_categorical(np.dtype("float64"))
            assert not is_categorical(1.0)
Esempio n. 11
0
    def _from_values_or_dtype(cls, values=None, categories=None, ordered=None,
                              dtype=None):
        """
        Construct dtype from the input parameters used in :class:`Categorical`.

        This constructor method specifically does not do the factorization
        step, if that is needed to find the categories. This constructor may
        therefore return ``CategoricalDtype(categories=None, ordered=None)``,
        which may not be useful. Additional steps may therefore have to be
        taken to create the final dtype.

        The return dtype is specified from the inputs in this prioritized
        order:
        1. if dtype is a CategoricalDtype, return dtype
        2. if dtype is the string 'category', create a CategoricalDtype from
           the supplied categories and ordered parameters, and return that.
        3. if values is a categorical, use value.dtype, but override it with
           categories and ordered if either/both of those are not None.
        4. if dtype is None and values is not a categorical, construct the
           dtype from categories and ordered, even if either of those is None.

        Parameters
        ----------
        values : list-like, optional
            The list-like must be 1-dimensional.
        categories : list-like, optional
            Categories for the CategoricalDtype.
        ordered : bool, optional
            Designating if the categories are ordered.
        dtype : CategoricalDtype or the string "category", optional
            If ``CategoricalDtype``, cannot be used together with
            `categories` or `ordered`.

        Returns
        -------
        CategoricalDtype

        Examples
        --------
        >>> CategoricalDtype._from_values_or_dtype()
        CategoricalDtype(categories=None, ordered=None)
        >>> CategoricalDtype._from_values_or_dtype(categories=['a', 'b'],
        ...                                        ordered=True)
        CategoricalDtype(categories=['a', 'b'], ordered=True)
        >>> dtype1 = CategoricalDtype(['a', 'b'], ordered=True)
        >>> dtype2 = CategoricalDtype(['x', 'y'], ordered=False)
        >>> c = Categorical([0, 1], dtype=dtype1, fastpath=True)
        >>> CategoricalDtype._from_values_or_dtype(c, ['x', 'y'], ordered=True,
        ...                                        dtype=dtype2)
        ValueError: Cannot specify `categories` or `ordered` together with
        `dtype`.

        The supplied dtype takes precedence over values' dtype:

        >>> CategoricalDtype._from_values_or_dtype(c, dtype=dtype2)
        CategoricalDtype(['x', 'y'], ordered=False)
        """
        from pandas.core.dtypes.common import is_categorical

        if dtype is not None:
            # The dtype argument takes precedence over values.dtype (if any)
            if isinstance(dtype, str):
                if dtype == 'category':
                    dtype = CategoricalDtype(categories, ordered)
                else:
                    msg = "Unknown dtype {dtype!r}"
                    raise ValueError(msg.format(dtype=dtype))
            elif categories is not None or ordered is not None:
                raise ValueError("Cannot specify `categories` or `ordered` "
                                 "together with `dtype`.")
        elif is_categorical(values):
            # If no "dtype" was passed, use the one from "values", but honor
            # the "ordered" and "categories" arguments
            dtype = values.dtype._from_categorical_dtype(values.dtype,
                                                         categories, ordered)
        else:
            # If dtype=None and values is not categorical, create a new dtype.
            # Note: This could potentially have categories=None and
            # ordered=None.
            dtype = CategoricalDtype(categories, ordered)

        return dtype
Esempio n. 12
0
    def _from_values_or_dtype(cls,
                              values=None,
                              categories=None,
                              ordered=None,
                              dtype=None):
        """
        Construct dtype from the input parameters used in :class:`Categorical`.

        This constructor method specifically does not do the factorization
        step, if that is needed to find the categories. This constructor may
        therefore return ``CategoricalDtype(categories=None, ordered=None)``,
        which may not be useful. Additional steps may therefore have to be
        taken to create the final dtype.

        The return dtype is specified from the inputs in this prioritized
        order:
        1. if dtype is a CategoricalDtype, return dtype
        2. if dtype is the string 'category', create a CategoricalDtype from
           the supplied categories and ordered parameters, and return that.
        3. if values is a categorical, use value.dtype, but override it with
           categories and ordered if either/both of those are not None.
        4. if dtype is None and values is not a categorical, construct the
           dtype from categories and ordered, even if either of those is None.

        Parameters
        ----------
        values : list-like, optional
            The list-like must be 1-dimensional.
        categories : list-like, optional
            Categories for the CategoricalDtype.
        ordered : bool, optional
            Designating if the categories are ordered.
        dtype : CategoricalDtype or the string "category", optional
            If ``CategoricalDtype``, cannot be used together with
            `categories` or `ordered`.

        Returns
        -------
        CategoricalDtype

        Examples
        --------
        >>> CategoricalDtype._from_values_or_dtype()
        CategoricalDtype(categories=None, ordered=None)
        >>> CategoricalDtype._from_values_or_dtype(categories=['a', 'b'],
        ...                                        ordered=True)
        CategoricalDtype(categories=['a', 'b'], ordered=True)
        >>> dtype1 = CategoricalDtype(['a', 'b'], ordered=True)
        >>> dtype2 = CategoricalDtype(['x', 'y'], ordered=False)
        >>> c = Categorical([0, 1], dtype=dtype1, fastpath=True)
        >>> CategoricalDtype._from_values_or_dtype(c, ['x', 'y'], ordered=True,
        ...                                        dtype=dtype2)
        ValueError: Cannot specify `categories` or `ordered` together with
        `dtype`.

        The supplied dtype takes precedence over values' dtype:

        >>> CategoricalDtype._from_values_or_dtype(c, dtype=dtype2)
        CategoricalDtype(['x', 'y'], ordered=False)
        """
        from pandas.core.dtypes.common import is_categorical

        if dtype is not None:
            # The dtype argument takes precedence over values.dtype (if any)
            if isinstance(dtype, compat.string_types):
                if dtype == 'category':
                    dtype = CategoricalDtype(categories, ordered)
                else:
                    msg = "Unknown dtype {dtype!r}"
                    raise ValueError(msg.format(dtype=dtype))
            elif categories is not None or ordered is not None:
                raise ValueError("Cannot specify `categories` or `ordered` "
                                 "together with `dtype`.")
        elif is_categorical(values):
            # If no "dtype" was passed, use the one from "values", but honor
            # the "ordered" and "categories" arguments
            dtype = values.dtype._from_categorical_dtype(
                values.dtype, categories, ordered)
        else:
            # If dtype=None and values is not categorical, create a new dtype.
            # Note: This could potentially have categories=None and
            # ordered=None.
            dtype = CategoricalDtype(categories, ordered)

        return dtype
Esempio n. 13
0
def test_is_categorical_deprecation():
    # GH#33385
    with tm.assert_produces_warning(FutureWarning):
        com.is_categorical([1, 2, 3])