def test_get_indexer_requires_unique(self): np.random.seed(123456789) ci = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False) oidx = Index(np.array(ci)) msg = "Reindexing only valid with uniquely valued Index objects" for n in [1, 2, 5, len(ci)]: finder = oidx[np.random.randint(0, len(ci), size=n)] with pytest.raises(InvalidIndexError, match=msg): ci.get_indexer(finder) # see gh-17323 # # Even when indexer is equal to the # members in the index, we should # respect duplicates instead of taking # the fast-track path. for finder in [list("aabbca"), list("aababca")]: with pytest.raises(InvalidIndexError, match=msg): ci.get_indexer(finder)
def test_get_indexer_non_unique(self): np.random.seed(123456789) ci = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False) oidx = Index(np.array(ci)) for n in [1, 2, 5, len(ci)]: finder = oidx[np.random.randint(0, len(ci), size=n)] expected = oidx.get_indexer_non_unique(finder)[0] actual = ci.get_indexer(finder) tm.assert_numpy_array_equal(expected, actual) # see gh-17323 # # Even when indexer is equal to the # members in the index, we should # respect duplicates instead of taking # the fast-track path. for finder in [list("aabbca"), list("aababca")]: expected = oidx.get_indexer_non_unique(finder)[0] actual = ci.get_indexer(finder) tm.assert_numpy_array_equal(expected, actual)
def test_get_indexer_base(self): # Determined by cat ordering. idx = CategoricalIndex(list("cab"), categories=list("cab")) expected = np.arange(len(idx), dtype=np.intp) actual = idx.get_indexer(idx) tm.assert_numpy_array_equal(expected, actual) with pytest.raises(ValueError, match="Invalid fill method"): idx.get_indexer(idx, method="invalid")
def test_get_indexer_nans_in_index_and_target(self): # GH 45361 ci = CategoricalIndex([1, 2, np.nan, 3]) other1 = [2, 3, 4, np.nan] res1 = ci.get_indexer(other1) expected1 = np.array([1, 3, -1, 2], dtype=np.intp) tm.assert_numpy_array_equal(res1, expected1) other2 = [1, 4, 2, 3] res2 = ci.get_indexer(other2) expected2 = np.array([0, -1, 1, 3], dtype=np.intp) tm.assert_numpy_array_equal(res2, expected2)
def test_get_indexer_non_unique(self): idx1 = CategoricalIndex(list("aabcde"), categories=list("edabc")) idx2 = CategoricalIndex(list("abf")) for indexer in [idx2, list("abf"), Index(list("abf"))]: msg = "Reindexing only valid with uniquely valued Index objects" with pytest.raises(InvalidIndexError, match=msg): idx1.get_indexer(indexer) r1, _ = idx1.get_indexer_non_unique(indexer) expected = np.array([0, 1, 2, -1], dtype=np.intp) tm.assert_almost_equal(r1, expected)
def test_get_indexer_same_categories_same_order(self): ci = CategoricalIndex(["a", "b"], categories=["a", "b"]) result = ci.get_indexer( CategoricalIndex(["b", "b"], categories=["a", "b"])) expected = np.array([1, 1], dtype="intp") tm.assert_numpy_array_equal(result, expected)
def test_get_indexer_same_categories_different_order(self): # https://github.com/pandas-dev/pandas/issues/19551 ci = CategoricalIndex(["a", "b"], categories=["a", "b"]) result = ci.get_indexer(CategoricalIndex(["b", "b"], categories=["b", "a"])) expected = np.array([1, 1], dtype="intp") tm.assert_numpy_array_equal(result, expected)
def test_get_indexer_same_categories_same_order(self): ci = CategoricalIndex(['a', 'b'], categories=['a', 'b']) result = ci.get_indexer( CategoricalIndex(['b', 'b'], categories=['a', 'b'])) expected = np.array([1, 1], dtype='intp') tm.assert_numpy_array_equal(result, expected)
def test_get_indexer_same_categories_same_order(self): ci = CategoricalIndex(['a', 'b'], categories=['a', 'b']) result = ci.get_indexer(CategoricalIndex(['b', 'b'], categories=['a', 'b'])) expected = np.array([1, 1], dtype='intp') tm.assert_numpy_array_equal(result, expected)
def test_get_indexer_same_categories_different_order(self): # https://github.com/pandas-dev/pandas/issues/19551 ci = CategoricalIndex(['a', 'b'], categories=['a', 'b']) result = ci.get_indexer(CategoricalIndex(['b', 'b'], categories=['b', 'a'])) expected = np.array([1, 1], dtype='intp') tm.assert_numpy_array_equal(result, expected)
class CategoricalIndexIndexing: params = ["monotonic_incr", "monotonic_decr", "non_monotonic"] param_names = ["index"] def setup(self, index): N = 10**5 values = list("a" * N + "b" * N + "c" * N) indices = { "monotonic_incr": CategoricalIndex(values), "monotonic_decr": CategoricalIndex(reversed(values)), "non_monotonic": CategoricalIndex(list("abc" * N)), } self.data = indices[index] self.data_unique = CategoricalIndex([ "".join(perm) for perm in itertools.permutations(string.printable, 3) ]) self.int_scalar = 10000 self.int_list = list(range(10000)) self.cat_scalar = "b" self.cat_list = ["a", "c"] def time_getitem_scalar(self, index): self.data[self.int_scalar] def time_getitem_slice(self, index): self.data[:self.int_scalar] def time_getitem_list_like(self, index): self.data[[self.int_scalar]] def time_getitem_list(self, index): self.data[self.int_list] def time_getitem_bool_array(self, index): self.data[self.data == self.cat_scalar] def time_get_loc_scalar(self, index): self.data.get_loc(self.cat_scalar) def time_get_indexer_list(self, index): self.data_unique.get_indexer(self.cat_list)
def test_get_indexer_array(self): arr = np.array( [Timestamp("1999-12-31 00:00:00"), Timestamp("2000-12-31 00:00:00")], dtype=object, ) cats = [Timestamp("1999-12-31 00:00:00"), Timestamp("2000-12-31 00:00:00")] ci = CategoricalIndex(cats, categories=cats, ordered=False, dtype="category") result = ci.get_indexer(arr) expected = np.array([0, 1], dtype="intp") tm.assert_numpy_array_equal(result, expected)
def test_get_indexer_array(self): arr = np.array([Timestamp('1999-12-31 00:00:00'), Timestamp('2000-12-31 00:00:00')], dtype=object) cats = [Timestamp('1999-12-31 00:00:00'), Timestamp('2000-12-31 00:00:00')] ci = CategoricalIndex(cats, categories=cats, ordered=False, dtype='category') result = ci.get_indexer(arr) expected = np.array([0, 1], dtype='intp') tm.assert_numpy_array_equal(result, expected)
def test_get_indexer_method(self): idx1 = CategoricalIndex(list("aabcde"), categories=list("edabc")) idx2 = CategoricalIndex(list("abf")) msg = "method pad not yet implemented for CategoricalIndex" with pytest.raises(NotImplementedError, match=msg): idx2.get_indexer(idx1, method="pad") msg = "method backfill not yet implemented for CategoricalIndex" with pytest.raises(NotImplementedError, match=msg): idx2.get_indexer(idx1, method="backfill") msg = "method nearest not yet implemented for CategoricalIndex" with pytest.raises(NotImplementedError, match=msg): idx2.get_indexer(idx1, method="nearest")
def test_get_indexer(self): idx1 = CategoricalIndex(list("aabcde"), categories=list("edabc")) idx2 = CategoricalIndex(list("abf")) for indexer in [idx2, list("abf"), Index(list("abf"))]: r1 = idx1.get_indexer(idx2) tm.assert_almost_equal(r1, np.array([0, 1, 2, -1], dtype=np.intp)) msg = "method pad not yet implemented for CategoricalIndex" with pytest.raises(NotImplementedError, match=msg): idx2.get_indexer(idx1, method="pad") msg = "method backfill not yet implemented for CategoricalIndex" with pytest.raises(NotImplementedError, match=msg): idx2.get_indexer(idx1, method="backfill") msg = "method nearest not yet implemented for CategoricalIndex" with pytest.raises(NotImplementedError, match=msg): idx2.get_indexer(idx1, method="nearest")