Ejemplo n.º 1
0
    def test_method_delegation(self):

        ci = CategoricalIndex(list('aabbca'), categories=list('cabdef'))
        result = ci.set_categories(list('cab'))
        tm.assert_index_equal(
            result, CategoricalIndex(list('aabbca'), categories=list('cab')))

        ci = CategoricalIndex(list('aabbca'), categories=list('cab'))
        result = ci.rename_categories(list('efg'))
        tm.assert_index_equal(
            result, CategoricalIndex(list('ffggef'), categories=list('efg')))

        ci = CategoricalIndex(list('aabbca'), categories=list('cab'))
        result = ci.add_categories(['d'])
        tm.assert_index_equal(
            result, CategoricalIndex(list('aabbca'), categories=list('cabd')))

        ci = CategoricalIndex(list('aabbca'), categories=list('cab'))
        result = ci.remove_categories(['c'])
        tm.assert_index_equal(
            result,
            CategoricalIndex(list('aabb') + [np.nan] + ['a'],
                             categories=list('ab')))

        ci = CategoricalIndex(list('aabbca'), categories=list('cabdef'))
        result = ci.as_unordered()
        tm.assert_index_equal(result, ci)

        ci = CategoricalIndex(list('aabbca'), categories=list('cabdef'))
        result = ci.as_ordered()
        tm.assert_index_equal(
            result,
            CategoricalIndex(list('aabbca'),
                             categories=list('cabdef'),
                             ordered=True))

        # invalid
        pytest.raises(ValueError,
                      lambda: ci.set_categories(list('cab'), inplace=True))
Ejemplo n.º 2
0
 def test_format_different_scalar_lengths(self):
     # GH35439
     idx = CategoricalIndex(["aaaaaaaaa", "b"])
     expected = ["aaaaaaaaa", "b"]
     assert idx.format() == expected
Ejemplo n.º 3
0
    def test_method_delegation(self):

        ci = CategoricalIndex(list("aabbca"), categories=list("cabdef"))
        result = ci.set_categories(list("cab"))
        tm.assert_index_equal(
            result, CategoricalIndex(list("aabbca"), categories=list("cab")))

        ci = CategoricalIndex(list("aabbca"), categories=list("cab"))
        result = ci.rename_categories(list("efg"))
        tm.assert_index_equal(
            result, CategoricalIndex(list("ffggef"), categories=list("efg")))

        # GH18862 (let rename_categories take callables)
        result = ci.rename_categories(lambda x: x.upper())
        tm.assert_index_equal(
            result, CategoricalIndex(list("AABBCA"), categories=list("CAB")))

        ci = CategoricalIndex(list("aabbca"), categories=list("cab"))
        result = ci.add_categories(["d"])
        tm.assert_index_equal(
            result, CategoricalIndex(list("aabbca"), categories=list("cabd")))

        ci = CategoricalIndex(list("aabbca"), categories=list("cab"))
        result = ci.remove_categories(["c"])
        tm.assert_index_equal(
            result,
            CategoricalIndex(list("aabb") + [np.nan] + ["a"],
                             categories=list("ab")),
        )

        ci = CategoricalIndex(list("aabbca"), categories=list("cabdef"))
        result = ci.as_unordered()
        tm.assert_index_equal(result, ci)

        ci = CategoricalIndex(list("aabbca"), categories=list("cabdef"))
        result = ci.as_ordered()
        tm.assert_index_equal(
            result,
            CategoricalIndex(list("aabbca"),
                             categories=list("cabdef"),
                             ordered=True),
        )

        # invalid
        msg = "cannot use inplace with CategoricalIndex"
        with pytest.raises(ValueError, match=msg):
            ci.set_categories(list("cab"), inplace=True)
Ejemplo n.º 4
0
 def test_insert_na_mismatched_dtype(self):
     ci = CategoricalIndex([0, 1, 1])
     msg = "'fill_value=NaT' is not present in this Categorical's categories"
     with pytest.raises(TypeError, match=msg):
         ci.insert(0, pd.NaT)
Ejemplo n.º 5
0
 def test_frame_repr(self):
     df = pd.DataFrame({"A": [1, 2, 3]},
                       index=CategoricalIndex(["a", "b", "c"]))
     result = repr(df)
     expected = "   A\na  1\nb  2\nc  3"
     assert result == expected
Ejemplo n.º 6
0
    def __init__(
        self,
        index: Index,
        grouper=None,
        obj: Optional[FrameOrSeries] = None,
        name=None,
        level=None,
        sort: bool = True,
        observed: bool = False,
        in_axis: bool = False,
    ):
        self.name = name
        self.level = level
        self.grouper = _convert_grouper(index, grouper)
        self.all_grouper = None
        self.index = index
        self.sort = sort
        self.obj = obj
        self.observed = observed
        self.in_axis = in_axis

        # right place for this?
        if isinstance(grouper, (Series, Index)) and name is None:
            self.name = grouper.name

        if isinstance(grouper, MultiIndex):
            self.grouper = grouper.values

        # we have a single grouper which may be a myriad of things,
        # some of which are dependent on the passing in level

        if level is not None:
            if not isinstance(level, int):
                if level not in index.names:
                    raise AssertionError(f"Level {level} not in index")
                level = index.names.index(level)

            if self.name is None:
                self.name = index.names[level]

            (
                self.grouper,
                self._codes,
                self._group_index,
            ) = index._get_grouper_for_level(self.grouper, level)

        # a passed Grouper like, directly get the grouper in the same way
        # as single grouper groupby, use the group_info to get codes
        elif isinstance(self.grouper, Grouper):
            # get the new grouper; we already have disambiguated
            # what key/level refer to exactly, don't need to
            # check again as we have by this point converted these
            # to an actual value (rather than a pd.Grouper)
            _, grouper, _ = self.grouper._get_grouper(self.obj, validate=False)
            if self.name is None:
                self.name = grouper.result_index.name
            self.obj = self.grouper.obj
            self.grouper = grouper._get_grouper()

        else:
            if self.grouper is None and self.name is not None and self.obj is not None:
                self.grouper = self.obj[self.name]

            elif isinstance(self.grouper, (list, tuple)):
                self.grouper = com.asarray_tuplesafe(self.grouper)

            # a passed Categorical
            elif is_categorical_dtype(self.grouper):

                self.grouper, self.all_grouper = recode_for_groupby(
                    self.grouper, self.sort, observed)
                categories = self.grouper.categories

                # we make a CategoricalIndex out of the cat grouper
                # preserving the categories / ordered attributes
                self._codes = self.grouper.codes
                if observed:
                    codes = algorithms.unique1d(self.grouper.codes)
                    codes = codes[codes != -1]
                    if sort or self.grouper.ordered:
                        codes = np.sort(codes)
                else:
                    codes = np.arange(len(categories))

                self._group_index = CategoricalIndex(
                    Categorical.from_codes(codes=codes,
                                           categories=categories,
                                           ordered=self.grouper.ordered),
                    name=self.name,
                )

            # we are done
            if isinstance(self.grouper, Grouping):
                self.grouper = self.grouper.grouper

            # no level passed
            elif not isinstance(self.grouper,
                                (Series, Index, ExtensionArray, np.ndarray)):
                if getattr(self.grouper, "ndim", 1) != 1:
                    t = self.name or str(type(self.grouper))
                    raise ValueError(f"Grouper for '{t}' not 1-dimensional")
                self.grouper = self.index.map(self.grouper)
                if not (hasattr(self.grouper, "__len__")
                        and len(self.grouper) == len(self.index)):
                    grper = pprint_thing(self.grouper)
                    errmsg = ("Grouper result violates len(labels) == "
                              f"len(data)\nresult: {grper}")
                    self.grouper = None  # Try for sanity
                    raise AssertionError(errmsg)

        # if we have a date/time-like grouper, make sure that we have
        # Timestamps like
        if getattr(self.grouper, "dtype", None) is not None:
            if is_datetime64_dtype(self.grouper):
                self.grouper = self.grouper.astype("datetime64[ns]")
            elif is_timedelta64_dtype(self.grouper):

                self.grouper = self.grouper.astype("timedelta64[ns]")
Ejemplo n.º 7
0
    def test_method_delegation(self):

        ci = CategoricalIndex(list('aabbca'), categories=list('cabdef'))
        result = ci.set_categories(list('cab'))
        tm.assert_index_equal(
            result, CategoricalIndex(list('aabbca'), categories=list('cab')))

        ci = CategoricalIndex(list('aabbca'), categories=list('cab'))
        result = ci.rename_categories(list('efg'))
        tm.assert_index_equal(
            result, CategoricalIndex(list('ffggef'), categories=list('efg')))

        # GH18862 (let rename_categories take callables)
        result = ci.rename_categories(lambda x: x.upper())
        tm.assert_index_equal(
            result, CategoricalIndex(list('AABBCA'), categories=list('CAB')))

        ci = CategoricalIndex(list('aabbca'), categories=list('cab'))
        result = ci.add_categories(['d'])
        tm.assert_index_equal(
            result, CategoricalIndex(list('aabbca'), categories=list('cabd')))

        ci = CategoricalIndex(list('aabbca'), categories=list('cab'))
        result = ci.remove_categories(['c'])
        tm.assert_index_equal(
            result,
            CategoricalIndex(list('aabb') + [np.nan] + ['a'],
                             categories=list('ab')))

        ci = CategoricalIndex(list('aabbca'), categories=list('cabdef'))
        result = ci.as_unordered()
        tm.assert_index_equal(result, ci)

        ci = CategoricalIndex(list('aabbca'), categories=list('cabdef'))
        result = ci.as_ordered()
        tm.assert_index_equal(
            result,
            CategoricalIndex(list('aabbca'),
                             categories=list('cabdef'),
                             ordered=True))

        # invalid
        msg = "cannot use inplace with CategoricalIndex"
        with pytest.raises(ValueError, match=msg):
            ci.set_categories(list('cab'), inplace=True)
Ejemplo n.º 8
0
 def test_reindex_empty_index(self):
     # See GH16770
     c = CategoricalIndex([])
     res, indexer = c.reindex(["a", "b"])
     tm.assert_index_equal(res, Index(["a", "b"]), exact=True)
     tm.assert_numpy_array_equal(indexer, np.array([-1, -1], dtype=np.intp))
Ejemplo n.º 9
0
    def test_has_duplicates(self):

        idx = CategoricalIndex([0, 0, 0], name="foo")
        assert idx.is_unique is False
        assert idx.has_duplicates is True
Ejemplo n.º 10
0
 def create_index(self, *, categories=None, ordered=False):
     if categories is None:
         categories = list("cab")
     return CategoricalIndex(list("aabbca"),
                             categories=categories,
                             ordered=ordered)
Ejemplo n.º 11
0
 def test_insert_na_mismatched_dtype(self):
     ci = CategoricalIndex([0, 1, 1])
     result = ci.insert(0, pd.NaT)
     expected = Index([pd.NaT, 0, 1, 1], dtype=object)
     tm.assert_index_equal(result, expected)
Ejemplo n.º 12
0
    def test_unique(self, data, categories, expected_data, ordered):
        dtype = CategoricalDtype(categories, ordered=ordered)

        idx = CategoricalIndex(data, dtype=dtype)
        expected = CategoricalIndex(expected_data, dtype=dtype)
        tm.assert_index_equal(idx.unique(), expected)
Ejemplo n.º 13
0
    def test_has_duplicates(self):

        idx = CategoricalIndex([0, 0, 0], name='foo')
        assert not idx.is_unique
        assert idx.has_duplicates
Ejemplo n.º 14
0
 def test_is_unique(self, values, expected):
     ci = CategoricalIndex(values)
     assert ci.is_unique is expected
Ejemplo n.º 15
0
 def create_index(self, categories=None, ordered=False):
     if categories is None:
         categories = list('cab')
     return CategoricalIndex(list('aabbca'),
                             categories=categories,
                             ordered=ordered)
Ejemplo n.º 16
0
    def test_drop_duplicates(self):

        idx = CategoricalIndex([0, 0, 0], name="foo")
        expected = CategoricalIndex([0], name="foo")
        tm.assert_index_equal(idx.drop_duplicates(), expected)
        tm.assert_index_equal(idx.unique(), expected)
Ejemplo n.º 17
0
    def test_construction(self):

        ci = self.create_index(categories=list('abcd'))
        categories = ci.categories

        result = Index(ci)
        tm.assert_index_equal(result, ci, exact=True)
        assert not result.ordered

        result = Index(ci.values)
        tm.assert_index_equal(result, ci, exact=True)
        assert not result.ordered

        # empty
        result = CategoricalIndex(categories=categories)
        tm.assert_index_equal(result.categories, Index(categories))
        tm.assert_numpy_array_equal(result.codes, np.array([], dtype='int8'))
        assert not result.ordered

        # passing categories
        result = CategoricalIndex(list('aabbca'), categories=categories)
        tm.assert_index_equal(result.categories, Index(categories))
        tm.assert_numpy_array_equal(result.codes,
                                    np.array([0, 0, 1, 1, 2, 0], dtype='int8'))

        c = pd.Categorical(list('aabbca'))
        result = CategoricalIndex(c)
        tm.assert_index_equal(result.categories, Index(list('abc')))
        tm.assert_numpy_array_equal(result.codes,
                                    np.array([0, 0, 1, 1, 2, 0], dtype='int8'))
        assert not result.ordered

        result = CategoricalIndex(c, categories=categories)
        tm.assert_index_equal(result.categories, Index(categories))
        tm.assert_numpy_array_equal(result.codes,
                                    np.array([0, 0, 1, 1, 2, 0], dtype='int8'))
        assert not result.ordered

        ci = CategoricalIndex(c, categories=list('abcd'))
        result = CategoricalIndex(ci)
        tm.assert_index_equal(result.categories, Index(categories))
        tm.assert_numpy_array_equal(result.codes,
                                    np.array([0, 0, 1, 1, 2, 0], dtype='int8'))
        assert not result.ordered

        result = CategoricalIndex(ci, categories=list('ab'))
        tm.assert_index_equal(result.categories, Index(list('ab')))
        tm.assert_numpy_array_equal(
            result.codes, np.array([0, 0, 1, 1, -1, 0], dtype='int8'))
        assert not result.ordered

        result = CategoricalIndex(ci, categories=list('ab'), ordered=True)
        tm.assert_index_equal(result.categories, Index(list('ab')))
        tm.assert_numpy_array_equal(
            result.codes, np.array([0, 0, 1, 1, -1, 0], dtype='int8'))
        assert result.ordered

        # turn me to an Index
        result = Index(np.array(ci))
        assert isinstance(result, Index)
        assert not isinstance(result, CategoricalIndex)
Ejemplo n.º 18
0
    def test_equals_categorical(self):
        ci1 = CategoricalIndex(["a", "b"], categories=["a", "b"], ordered=True)
        ci2 = CategoricalIndex(["a", "b"],
                               categories=["a", "b", "c"],
                               ordered=True)

        assert ci1.equals(ci1)
        assert not ci1.equals(ci2)
        assert ci1.equals(ci1.astype(object))
        assert ci1.astype(object).equals(ci1)

        assert (ci1 == ci1).all()
        assert not (ci1 != ci1).all()
        assert not (ci1 > ci1).all()
        assert not (ci1 < ci1).all()
        assert (ci1 <= ci1).all()
        assert (ci1 >= ci1).all()

        assert not (ci1 == 1).all()
        assert (ci1 == Index(["a", "b"])).all()
        assert (ci1 == ci1.values).all()

        # invalid comparisons
        with pytest.raises(ValueError, match="Lengths must match"):
            ci1 == Index(["a", "b", "c"])

        msg = (
            "categorical index comparisons must have the same categories"
            " and ordered attributes"
            "|"
            "Categoricals can only be compared if 'categories' are the same. "
            "Categories are different lengths"
            "|"
            "Categoricals can only be compared if 'ordered' is the same")
        with pytest.raises(TypeError, match=msg):
            ci1 == ci2
        with pytest.raises(TypeError, match=msg):
            ci1 == Categorical(ci1.values, ordered=False)
        with pytest.raises(TypeError, match=msg):
            ci1 == Categorical(ci1.values, categories=list("abc"))

        # tests
        # make sure that we are testing for category inclusion properly
        ci = CategoricalIndex(list("aabca"), categories=["c", "a", "b"])
        assert not ci.equals(list("aabca"))
        # Same categories, but different order
        # Unordered
        assert ci.equals(CategoricalIndex(list("aabca")))
        # Ordered
        assert not ci.equals(CategoricalIndex(list("aabca"), ordered=True))
        assert ci.equals(ci.copy())

        ci = CategoricalIndex(list("aabca") + [np.nan],
                              categories=["c", "a", "b"])
        assert not ci.equals(list("aabca"))
        assert not ci.equals(CategoricalIndex(list("aabca")))
        assert ci.equals(ci.copy())

        ci = CategoricalIndex(list("aabca") + [np.nan],
                              categories=["c", "a", "b"])
        assert not ci.equals(list("aabca") + [np.nan])
        assert ci.equals(CategoricalIndex(list("aabca") + [np.nan]))
        assert not ci.equals(
            CategoricalIndex(list("aabca") + [np.nan], ordered=True))
        assert ci.equals(ci.copy())
Ejemplo n.º 19
0
 def test_contains_interval(self, item, expected):
     # GH 23705
     ci = CategoricalIndex(IntervalIndex.from_breaks(range(3)))
     result = item in ci
     assert result is expected
Ejemplo n.º 20
0
    def test_unique(self, data, categories, expected_data, expected_categories):

        idx = CategoricalIndex(data, categories=categories)
        expected = CategoricalIndex(expected_data, categories=expected_categories)
        tm.assert_index_equal(idx.unique(), expected)