예제 #1
0
    def test_get_indexer_requires_unique(self):
        np.random.seed(123456789)

        ci = CategoricalIndex(list("aabbca"),
                              categories=list("cab"),
                              ordered=False)
        oidx = Index(np.array(ci))

        msg = "Reindexing only valid with uniquely valued Index objects"

        for n in [1, 2, 5, len(ci)]:
            finder = oidx[np.random.randint(0, len(ci), size=n)]

            with pytest.raises(InvalidIndexError, match=msg):
                ci.get_indexer(finder)

        # see gh-17323
        #
        # Even when indexer is equal to the
        # members in the index, we should
        # respect duplicates instead of taking
        # the fast-track path.
        for finder in [list("aabbca"), list("aababca")]:

            with pytest.raises(InvalidIndexError, match=msg):
                ci.get_indexer(finder)
예제 #2
0
    def test_get_indexer_non_unique(self):
        np.random.seed(123456789)

        ci = CategoricalIndex(list("aabbca"),
                              categories=list("cab"),
                              ordered=False)
        oidx = Index(np.array(ci))

        for n in [1, 2, 5, len(ci)]:
            finder = oidx[np.random.randint(0, len(ci), size=n)]
            expected = oidx.get_indexer_non_unique(finder)[0]

            actual = ci.get_indexer(finder)
            tm.assert_numpy_array_equal(expected, actual)

        # see gh-17323
        #
        # Even when indexer is equal to the
        # members in the index, we should
        # respect duplicates instead of taking
        # the fast-track path.
        for finder in [list("aabbca"), list("aababca")]:
            expected = oidx.get_indexer_non_unique(finder)[0]

            actual = ci.get_indexer(finder)
            tm.assert_numpy_array_equal(expected, actual)
예제 #3
0
    def test_get_indexer_base(self):
        # Determined by cat ordering.
        idx = CategoricalIndex(list("cab"), categories=list("cab"))
        expected = np.arange(len(idx), dtype=np.intp)

        actual = idx.get_indexer(idx)
        tm.assert_numpy_array_equal(expected, actual)

        with pytest.raises(ValueError, match="Invalid fill method"):
            idx.get_indexer(idx, method="invalid")
예제 #4
0
 def test_get_indexer_nans_in_index_and_target(self):
     # GH 45361
     ci = CategoricalIndex([1, 2, np.nan, 3])
     other1 = [2, 3, 4, np.nan]
     res1 = ci.get_indexer(other1)
     expected1 = np.array([1, 3, -1, 2], dtype=np.intp)
     tm.assert_numpy_array_equal(res1, expected1)
     other2 = [1, 4, 2, 3]
     res2 = ci.get_indexer(other2)
     expected2 = np.array([0, -1, 1, 3], dtype=np.intp)
     tm.assert_numpy_array_equal(res2, expected2)
예제 #5
0
    def test_get_indexer_non_unique(self):

        idx1 = CategoricalIndex(list("aabcde"), categories=list("edabc"))
        idx2 = CategoricalIndex(list("abf"))

        for indexer in [idx2, list("abf"), Index(list("abf"))]:
            msg = "Reindexing only valid with uniquely valued Index objects"
            with pytest.raises(InvalidIndexError, match=msg):
                idx1.get_indexer(indexer)

            r1, _ = idx1.get_indexer_non_unique(indexer)
            expected = np.array([0, 1, 2, -1], dtype=np.intp)
            tm.assert_almost_equal(r1, expected)
예제 #6
0
    def test_get_indexer_same_categories_same_order(self):
        ci = CategoricalIndex(["a", "b"], categories=["a", "b"])

        result = ci.get_indexer(
            CategoricalIndex(["b", "b"], categories=["a", "b"]))
        expected = np.array([1, 1], dtype="intp")
        tm.assert_numpy_array_equal(result, expected)
예제 #7
0
    def test_get_indexer_same_categories_different_order(self):
        # https://github.com/pandas-dev/pandas/issues/19551
        ci = CategoricalIndex(["a", "b"], categories=["a", "b"])

        result = ci.get_indexer(CategoricalIndex(["b", "b"], categories=["b", "a"]))
        expected = np.array([1, 1], dtype="intp")
        tm.assert_numpy_array_equal(result, expected)
예제 #8
0
    def test_get_indexer_same_categories_same_order(self):
        ci = CategoricalIndex(['a', 'b'], categories=['a', 'b'])

        result = ci.get_indexer(
            CategoricalIndex(['b', 'b'], categories=['a', 'b']))
        expected = np.array([1, 1], dtype='intp')
        tm.assert_numpy_array_equal(result, expected)
예제 #9
0
    def test_get_indexer_same_categories_same_order(self):
        ci = CategoricalIndex(['a', 'b'], categories=['a', 'b'])

        result = ci.get_indexer(CategoricalIndex(['b', 'b'],
                                                 categories=['a', 'b']))
        expected = np.array([1, 1], dtype='intp')
        tm.assert_numpy_array_equal(result, expected)
예제 #10
0
    def test_get_indexer_same_categories_different_order(self):
        # https://github.com/pandas-dev/pandas/issues/19551
        ci = CategoricalIndex(['a', 'b'], categories=['a', 'b'])

        result = ci.get_indexer(CategoricalIndex(['b', 'b'],
                                                 categories=['b', 'a']))
        expected = np.array([1, 1], dtype='intp')
        tm.assert_numpy_array_equal(result, expected)
예제 #11
0
class CategoricalIndexIndexing:

    params = ["monotonic_incr", "monotonic_decr", "non_monotonic"]
    param_names = ["index"]

    def setup(self, index):
        N = 10**5
        values = list("a" * N + "b" * N + "c" * N)
        indices = {
            "monotonic_incr": CategoricalIndex(values),
            "monotonic_decr": CategoricalIndex(reversed(values)),
            "non_monotonic": CategoricalIndex(list("abc" * N)),
        }
        self.data = indices[index]
        self.data_unique = CategoricalIndex([
            "".join(perm)
            for perm in itertools.permutations(string.printable, 3)
        ])

        self.int_scalar = 10000
        self.int_list = list(range(10000))

        self.cat_scalar = "b"
        self.cat_list = ["a", "c"]

    def time_getitem_scalar(self, index):
        self.data[self.int_scalar]

    def time_getitem_slice(self, index):
        self.data[:self.int_scalar]

    def time_getitem_list_like(self, index):
        self.data[[self.int_scalar]]

    def time_getitem_list(self, index):
        self.data[self.int_list]

    def time_getitem_bool_array(self, index):
        self.data[self.data == self.cat_scalar]

    def time_get_loc_scalar(self, index):
        self.data.get_loc(self.cat_scalar)

    def time_get_indexer_list(self, index):
        self.data_unique.get_indexer(self.cat_list)
예제 #12
0
 def test_get_indexer_array(self):
     arr = np.array(
         [Timestamp("1999-12-31 00:00:00"), Timestamp("2000-12-31 00:00:00")],
         dtype=object,
     )
     cats = [Timestamp("1999-12-31 00:00:00"), Timestamp("2000-12-31 00:00:00")]
     ci = CategoricalIndex(cats, categories=cats, ordered=False, dtype="category")
     result = ci.get_indexer(arr)
     expected = np.array([0, 1], dtype="intp")
     tm.assert_numpy_array_equal(result, expected)
예제 #13
0
 def test_get_indexer_array(self):
     arr = np.array([Timestamp('1999-12-31 00:00:00'),
                     Timestamp('2000-12-31 00:00:00')], dtype=object)
     cats = [Timestamp('1999-12-31 00:00:00'),
             Timestamp('2000-12-31 00:00:00')]
     ci = CategoricalIndex(cats,
                           categories=cats,
                           ordered=False, dtype='category')
     result = ci.get_indexer(arr)
     expected = np.array([0, 1], dtype='intp')
     tm.assert_numpy_array_equal(result, expected)
예제 #14
0
    def test_get_indexer_method(self):
        idx1 = CategoricalIndex(list("aabcde"), categories=list("edabc"))
        idx2 = CategoricalIndex(list("abf"))

        msg = "method pad not yet implemented for CategoricalIndex"
        with pytest.raises(NotImplementedError, match=msg):
            idx2.get_indexer(idx1, method="pad")
        msg = "method backfill not yet implemented for CategoricalIndex"
        with pytest.raises(NotImplementedError, match=msg):
            idx2.get_indexer(idx1, method="backfill")

        msg = "method nearest not yet implemented for CategoricalIndex"
        with pytest.raises(NotImplementedError, match=msg):
            idx2.get_indexer(idx1, method="nearest")
예제 #15
0
    def test_get_indexer(self):

        idx1 = CategoricalIndex(list("aabcde"), categories=list("edabc"))
        idx2 = CategoricalIndex(list("abf"))

        for indexer in [idx2, list("abf"), Index(list("abf"))]:
            r1 = idx1.get_indexer(idx2)
            tm.assert_almost_equal(r1, np.array([0, 1, 2, -1], dtype=np.intp))

        msg = "method pad not yet implemented for CategoricalIndex"
        with pytest.raises(NotImplementedError, match=msg):
            idx2.get_indexer(idx1, method="pad")
        msg = "method backfill not yet implemented for CategoricalIndex"
        with pytest.raises(NotImplementedError, match=msg):
            idx2.get_indexer(idx1, method="backfill")

        msg = "method nearest not yet implemented for CategoricalIndex"
        with pytest.raises(NotImplementedError, match=msg):
            idx2.get_indexer(idx1, method="nearest")