def test_astype(self): ci = self.create_index() result = ci.astype('category') tm.assert_index_equal(result, ci, exact=True) result = ci.astype(object) tm.assert_index_equal(result, Index(np.array(ci))) # this IS equal, but not the same class assert result.equals(ci) assert isinstance(result, Index) assert not isinstance(result, CategoricalIndex) # interval ii = IntervalIndex.from_arrays(left=[-0.001, 2.0], right=[2, 4], closed='right') ci = CategoricalIndex(Categorical.from_codes( [0, 1, -1], categories=ii, ordered=True)) result = ci.astype('interval') expected = ii.take([0, 1, -1]) tm.assert_index_equal(result, expected) result = IntervalIndex.from_intervals(result.values) tm.assert_index_equal(result, expected)
def test_reindex_empty_index(self): # See GH16770 c = CategoricalIndex([]) res, indexer = c.reindex(['a', 'b']) tm.assert_index_equal(res, Index(['a', 'b']), exact=True) tm.assert_numpy_array_equal(indexer, np.array([-1, -1], dtype=np.intp))
def test_identical(self): ci1 = CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=True) ci2 = CategoricalIndex(['a', 'b'], categories=['a', 'b', 'c'], ordered=True) assert ci1.identical(ci1) assert ci1.identical(ci1.copy()) assert not ci1.identical(ci2)
def test_identical(self): ci1 = CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=True) ci2 = CategoricalIndex(['a', 'b'], categories=['a', 'b', 'c'], ordered=True) self.assertTrue(ci1.identical(ci1)) self.assertTrue(ci1.identical(ci1.copy())) self.assertFalse(ci1.identical(ci2))
def test_duplicates(self): idx = CategoricalIndex([0, 0, 0], name='foo') assert not idx.is_unique assert idx.has_duplicates expected = CategoricalIndex([0], name='foo') tm.assert_index_equal(idx.drop_duplicates(), expected) tm.assert_index_equal(idx.unique(), expected)
def test_duplicates(self): idx = CategoricalIndex([0, 0, 0], name='foo') self.assertFalse(idx.is_unique) self.assertTrue(idx.has_duplicates) expected = CategoricalIndex([0], name='foo') tm.assert_index_equal(idx.drop_duplicates(), expected) tm.assert_index_equal(idx.unique(), expected)
def test_reindex_base(self): # Determined by cat ordering. idx = CategoricalIndex(list("cab"), categories=list("cab")) expected = np.arange(len(idx), dtype=np.intp) actual = idx.get_indexer(idx) tm.assert_numpy_array_equal(expected, actual) with pytest.raises(ValueError, match="Invalid fill method"): idx.get_indexer(idx, method="invalid")
def test_reindex_duplicate_target(self): # See GH23963 c = CategoricalIndex(['a', 'b', 'c', 'a'], categories=['a', 'b', 'c', 'd']) with pytest.raises(ValueError, match='non-unique indexer'): c.reindex(['a', 'a', 'c']) with pytest.raises(ValueError, match='non-unique indexer'): c.reindex(CategoricalIndex(['a', 'a', 'c'], categories=['a', 'b', 'c', 'd']))
def test_fillna_categorical(self): # GH 11343 idx = CategoricalIndex([1.0, np.nan, 3.0, 1.0], name='x') # fill by value in categories exp = CategoricalIndex([1.0, 1.0, 3.0, 1.0], name='x') tm.assert_index_equal(idx.fillna(1.0), exp) # fill by value not in categories raises ValueError with tm.assert_raises_regex(ValueError, 'fill value must be in categories'): idx.fillna(2.0)
def test_fillna_categorical(self): # GH 11343 idx = CategoricalIndex([1.0, np.nan, 3.0, 1.0], name='x') # fill by value in categories exp = CategoricalIndex([1.0, 1.0, 3.0, 1.0], name='x') tm.assert_index_equal(idx.fillna(1.0), exp) # fill by value not in categories raises ValueError msg = 'fill value must be in categories' with pytest.raises(ValueError, match=msg): idx.fillna(2.0)
def test_reindex_dtype(self): c = CategoricalIndex(['a', 'b', 'c', 'a']) res, indexer = c.reindex(['a', 'c']) tm.assert_index_equal(res, Index(['a', 'a', 'c']), exact=True) tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp)) c = CategoricalIndex(['a', 'b', 'c', 'a']) res, indexer = c.reindex(Categorical(['a', 'c'])) exp = CategoricalIndex(['a', 'a', 'c'], categories=['a', 'c']) tm.assert_index_equal(res, exp, exact=True) tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp)) c = CategoricalIndex(['a', 'b', 'c', 'a'], categories=['a', 'b', 'c', 'd']) res, indexer = c.reindex(['a', 'c']) exp = Index(['a', 'a', 'c'], dtype='object') tm.assert_index_equal(res, exp, exact=True) tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp)) c = CategoricalIndex(['a', 'b', 'c', 'a'], categories=['a', 'b', 'c', 'd']) res, indexer = c.reindex(Categorical(['a', 'c'])) exp = CategoricalIndex(['a', 'a', 'c'], categories=['a', 'c']) tm.assert_index_equal(res, exp, exact=True) tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp))
def test_create_categorical(self): # https://github.com/pandas-dev/pandas/pull/17513 # The public CI constructor doesn't hit this code path with # instances of CategoricalIndex, but we still want to test the code ci = CategoricalIndex(['a', 'b', 'c']) # First ci is self, second ci is data. result = CategoricalIndex._create_categorical(ci, ci) expected = Categorical(['a', 'b', 'c']) tm.assert_categorical_equal(result, expected)
def test_get_indexer(self): idx1 = CategoricalIndex(list('aabcde'), categories=list('edabc')) idx2 = CategoricalIndex(list('abf')) for indexer in [idx2, list('abf'), Index(list('abf'))]: r1 = idx1.get_indexer(idx2) assert_almost_equal(r1, np.array([0, 1, 2, -1], dtype=np.intp)) msg = ("method='pad' and method='backfill' not implemented yet for" " CategoricalIndex") with pytest.raises(NotImplementedError, match=msg): idx2.get_indexer(idx1, method='pad') with pytest.raises(NotImplementedError, match=msg): idx2.get_indexer(idx1, method='backfill') msg = "method='nearest' not implemented yet for CategoricalIndex" with pytest.raises(NotImplementedError, match=msg): idx2.get_indexer(idx1, method='nearest')
def test_get_loc(self): # GH 12531 cidx1 = CategoricalIndex(list('abcde'), categories=list('edabc')) idx1 = Index(list('abcde')) assert cidx1.get_loc('a') == idx1.get_loc('a') assert cidx1.get_loc('e') == idx1.get_loc('e') for i in [cidx1, idx1]: with pytest.raises(KeyError): i.get_loc('NOT-EXIST') # non-unique cidx2 = CategoricalIndex(list('aacded'), categories=list('edabc')) idx2 = Index(list('aacded')) # results in bool array res = cidx2.get_loc('d') tm.assert_numpy_array_equal(res, idx2.get_loc('d')) tm.assert_numpy_array_equal(res, np.array([False, False, False, True, False, True])) # unique element results in scalar res = cidx2.get_loc('e') assert res == idx2.get_loc('e') assert res == 4 for i in [cidx2, idx2]: with pytest.raises(KeyError): i.get_loc('NOT-EXIST') # non-unique, slicable cidx3 = CategoricalIndex(list('aabbb'), categories=list('abc')) idx3 = Index(list('aabbb')) # results in slice res = cidx3.get_loc('a') assert res == idx3.get_loc('a') assert res == slice(0, 2, None) res = cidx3.get_loc('b') assert res == idx3.get_loc('b') assert res == slice(2, 5, None) for i in [cidx3, idx3]: with pytest.raises(KeyError): i.get_loc('c')
def test_isin(self): ci = CategoricalIndex( list('aabca') + [np.nan], categories=['c', 'a', 'b']) tm.assert_numpy_array_equal( ci.isin(['c']), np.array([False, False, False, True, False, False])) tm.assert_numpy_array_equal( ci.isin(['c', 'a', 'b']), np.array([True] * 5 + [False])) tm.assert_numpy_array_equal( ci.isin(['c', 'a', 'b', np.nan]), np.array([True] * 6)) # mismatched categorical -> coerced to ndarray so doesn't matter tm.assert_numpy_array_equal( ci.isin(ci.set_categories(list('abcdefghi'))), np.array([True] * 6)) tm.assert_numpy_array_equal( ci.isin(ci.set_categories(list('defghi'))), np.array([False] * 5 + [True]))
def test_get_indexer(self): idx1 = CategoricalIndex(list('aabcde'), categories=list('edabc')) idx2 = CategoricalIndex(list('abf')) for indexer in [idx2, list('abf'), Index(list('abf'))]: r1 = idx1.get_indexer(idx2) assert_almost_equal(r1, np.array([0, 1, 2, -1], dtype=np.intp)) pytest.raises(NotImplementedError, lambda: idx2.get_indexer(idx1, method='pad')) pytest.raises(NotImplementedError, lambda: idx2.get_indexer(idx1, method='backfill')) pytest.raises(NotImplementedError, lambda: idx2.get_indexer(idx1, method='nearest'))
def test_equals_categorical(self): ci1 = CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=True) ci2 = CategoricalIndex(['a', 'b'], categories=['a', 'b', 'c'], ordered=True) assert ci1.equals(ci1) assert not ci1.equals(ci2) assert ci1.equals(ci1.astype(object)) assert ci1.astype(object).equals(ci1) assert (ci1 == ci1).all() assert not (ci1 != ci1).all() assert not (ci1 > ci1).all() assert not (ci1 < ci1).all() assert (ci1 <= ci1).all() assert (ci1 >= ci1).all() assert not (ci1 == 1).all() assert (ci1 == Index(['a', 'b'])).all() assert (ci1 == ci1.values).all() # invalid comparisons with tm.assert_raises_regex(ValueError, "Lengths must match"): ci1 == Index(['a', 'b', 'c']) pytest.raises(TypeError, lambda: ci1 == ci2) pytest.raises(TypeError, lambda: ci1 == Categorical(ci1.values, ordered=False)) pytest.raises( TypeError, lambda: ci1 == Categorical(ci1.values, categories=list('abc'))) # tests # make sure that we are testing for category inclusion properly ci = CategoricalIndex(list('aabca'), categories=['c', 'a', 'b']) assert not ci.equals(list('aabca')) # Same categories, but different order # Unordered assert ci.equals(CategoricalIndex(list('aabca'))) # Ordered assert not ci.equals(CategoricalIndex(list('aabca'), ordered=True)) assert ci.equals(ci.copy()) ci = CategoricalIndex(list('aabca') + [np.nan], categories=['c', 'a', 'b']) assert not ci.equals(list('aabca')) assert not ci.equals(CategoricalIndex(list('aabca'))) assert ci.equals(ci.copy()) ci = CategoricalIndex(list('aabca') + [np.nan], categories=['c', 'a', 'b']) assert not ci.equals(list('aabca') + [np.nan]) assert ci.equals(CategoricalIndex(list('aabca') + [np.nan])) assert not ci.equals( CategoricalIndex(list('aabca') + [np.nan], ordered=True)) assert ci.equals(ci.copy())
def test_get_loc(self): # GH 12531 cidx1 = CategoricalIndex(list('abcde'), categories=list('edabc')) idx1 = Index(list('abcde')) assert cidx1.get_loc('a') == idx1.get_loc('a') assert cidx1.get_loc('e') == idx1.get_loc('e') for i in [cidx1, idx1]: with pytest.raises(KeyError): i.get_loc('NOT-EXIST') # non-unique cidx2 = CategoricalIndex(list('aacded'), categories=list('edabc')) idx2 = Index(list('aacded')) # results in bool array res = cidx2.get_loc('d') tm.assert_numpy_array_equal(res, idx2.get_loc('d')) tm.assert_numpy_array_equal( res, np.array([False, False, False, True, False, True])) # unique element results in scalar res = cidx2.get_loc('e') assert res == idx2.get_loc('e') assert res == 4 for i in [cidx2, idx2]: with pytest.raises(KeyError): i.get_loc('NOT-EXIST') # non-unique, slicable cidx3 = CategoricalIndex(list('aabbb'), categories=list('abc')) idx3 = Index(list('aabbb')) # results in slice res = cidx3.get_loc('a') assert res == idx3.get_loc('a') assert res == slice(0, 2, None) res = cidx3.get_loc('b') assert res == idx3.get_loc('b') assert res == slice(2, 5, None) for i in [cidx3, idx3]: with pytest.raises(KeyError): i.get_loc('c')
def test_equals_categorical(self): ci1 = CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=True) ci2 = CategoricalIndex(['a', 'b'], categories=['a', 'b', 'c'], ordered=True) assert ci1.equals(ci1) assert not ci1.equals(ci2) assert ci1.equals(ci1.astype(object)) assert ci1.astype(object).equals(ci1) assert (ci1 == ci1).all() assert not (ci1 != ci1).all() assert not (ci1 > ci1).all() assert not (ci1 < ci1).all() assert (ci1 <= ci1).all() assert (ci1 >= ci1).all() assert not (ci1 == 1).all() assert (ci1 == Index(['a', 'b'])).all() assert (ci1 == ci1.values).all() # invalid comparisons with pytest.raises(ValueError, match="Lengths must match"): ci1 == Index(['a', 'b', 'c']) msg = ("categorical index comparisons must have the same categories" " and ordered attributes") with pytest.raises(TypeError, match=msg): ci1 == ci2 with pytest.raises(TypeError, match=msg): ci1 == Categorical(ci1.values, ordered=False) with pytest.raises(TypeError, match=msg): ci1 == Categorical(ci1.values, categories=list('abc')) # tests # make sure that we are testing for category inclusion properly ci = CategoricalIndex(list('aabca'), categories=['c', 'a', 'b']) assert not ci.equals(list('aabca')) # Same categories, but different order # Unordered assert ci.equals(CategoricalIndex(list('aabca'))) # Ordered assert not ci.equals(CategoricalIndex(list('aabca'), ordered=True)) assert ci.equals(ci.copy()) ci = CategoricalIndex(list('aabca') + [np.nan], categories=['c', 'a', 'b']) assert not ci.equals(list('aabca')) assert not ci.equals(CategoricalIndex(list('aabca'))) assert ci.equals(ci.copy()) ci = CategoricalIndex(list('aabca') + [np.nan], categories=['c', 'a', 'b']) assert not ci.equals(list('aabca') + [np.nan]) assert ci.equals(CategoricalIndex(list('aabca') + [np.nan])) assert not ci.equals(CategoricalIndex(list('aabca') + [np.nan], ordered=True)) assert ci.equals(ci.copy())
def test_contains_interval(self, item, expected): # GH 23705 ci = CategoricalIndex(IntervalIndex.from_breaks(range(3))) result = item in ci assert result is expected
def create_index(self, *, categories=None, ordered=False): if categories is None: categories = list("cab") return CategoricalIndex(list("aabbca"), categories=categories, ordered=ordered)
def test_has_duplicates(self): idx = CategoricalIndex([0, 0, 0], name="foo") assert idx.is_unique is False assert idx.has_duplicates is True
def test_equals_categorical(self): ci1 = CategoricalIndex(["a", "b"], categories=["a", "b"], ordered=True) ci2 = CategoricalIndex(["a", "b"], categories=["a", "b", "c"], ordered=True) assert ci1.equals(ci1) assert not ci1.equals(ci2) assert ci1.equals(ci1.astype(object)) assert ci1.astype(object).equals(ci1) assert (ci1 == ci1).all() assert not (ci1 != ci1).all() assert not (ci1 > ci1).all() assert not (ci1 < ci1).all() assert (ci1 <= ci1).all() assert (ci1 >= ci1).all() assert not (ci1 == 1).all() assert (ci1 == Index(["a", "b"])).all() assert (ci1 == ci1.values).all() # invalid comparisons with pytest.raises(ValueError, match="Lengths must match"): ci1 == Index(["a", "b", "c"]) msg = ( "categorical index comparisons must have the same categories " "and ordered attributes" "|" "Categoricals can only be compared if 'categories' are the same. " "Categories are different lengths" "|" "Categoricals can only be compared if 'ordered' is the same") with pytest.raises(TypeError, match=msg): ci1 == ci2 with pytest.raises(TypeError, match=msg): ci1 == Categorical(ci1.values, ordered=False) with pytest.raises(TypeError, match=msg): ci1 == Categorical(ci1.values, categories=list("abc")) # tests # make sure that we are testing for category inclusion properly ci = CategoricalIndex(list("aabca"), categories=["c", "a", "b"]) assert not ci.equals(list("aabca")) # Same categories, but different order # Unordered assert ci.equals(CategoricalIndex(list("aabca"))) # Ordered assert not ci.equals(CategoricalIndex(list("aabca"), ordered=True)) assert ci.equals(ci.copy()) ci = CategoricalIndex(list("aabca") + [np.nan], categories=["c", "a", "b"]) assert not ci.equals(list("aabca")) assert not ci.equals(CategoricalIndex(list("aabca"))) assert ci.equals(ci.copy()) ci = CategoricalIndex(list("aabca") + [np.nan], categories=["c", "a", "b"]) assert not ci.equals(list("aabca") + [np.nan]) assert ci.equals(CategoricalIndex(list("aabca") + [np.nan])) assert not ci.equals( CategoricalIndex(list("aabca") + [np.nan], ordered=True)) assert ci.equals(ci.copy())
def test_unique(self, data, categories, expected_data, expected_categories): idx = CategoricalIndex(data, categories=categories) expected = CategoricalIndex(expected_data, categories=expected_categories) tm.assert_index_equal(idx.unique(), expected)
def test_frame_repr(self): df = pd.DataFrame({"A": [1, 2, 3]}, index=CategoricalIndex(["a", "b", "c"])) result = repr(df) expected = " A\na 1\nb 2\nc 3" assert result == expected
def test_insert_na_mismatched_dtype(self): ci = CategoricalIndex([0, 1, 1]) result = ci.insert(0, pd.NaT) expected = Index([pd.NaT, 0, 1, 1], dtype=object) tm.assert_index_equal(result, expected)
def test_get_loc(self): # GH 12531 cidx1 = CategoricalIndex(list("abcde"), categories=list("edabc")) idx1 = Index(list("abcde")) assert cidx1.get_loc("a") == idx1.get_loc("a") assert cidx1.get_loc("e") == idx1.get_loc("e") for i in [cidx1, idx1]: with pytest.raises(KeyError, match="'NOT-EXIST'"): i.get_loc("NOT-EXIST") # non-unique cidx2 = CategoricalIndex(list("aacded"), categories=list("edabc")) idx2 = Index(list("aacded")) # results in bool array res = cidx2.get_loc("d") tm.assert_numpy_array_equal(res, idx2.get_loc("d")) tm.assert_numpy_array_equal( res, np.array([False, False, False, True, False, True]) ) # unique element results in scalar res = cidx2.get_loc("e") assert res == idx2.get_loc("e") assert res == 4 for i in [cidx2, idx2]: with pytest.raises(KeyError, match="'NOT-EXIST'"): i.get_loc("NOT-EXIST") # non-unique, sliceable cidx3 = CategoricalIndex(list("aabbb"), categories=list("abc")) idx3 = Index(list("aabbb")) # results in slice res = cidx3.get_loc("a") assert res == idx3.get_loc("a") assert res == slice(0, 2, None) res = cidx3.get_loc("b") assert res == idx3.get_loc("b") assert res == slice(2, 5, None) for i in [cidx3, idx3]: with pytest.raises(KeyError, match="'c'"): i.get_loc("c")
def test_get_indexer(self): idx1 = CategoricalIndex(list("aabcde"), categories=list("edabc")) idx2 = CategoricalIndex(list("abf")) for indexer in [idx2, list("abf"), Index(list("abf"))]: r1 = idx1.get_indexer(idx2) tm.assert_almost_equal(r1, np.array([0, 1, 2, -1], dtype=np.intp)) msg = ( "method='pad' and method='backfill' not implemented yet for" " CategoricalIndex" ) with pytest.raises(NotImplementedError, match=msg): idx2.get_indexer(idx1, method="pad") with pytest.raises(NotImplementedError, match=msg): idx2.get_indexer(idx1, method="backfill") msg = "method='nearest' not implemented yet for CategoricalIndex" with pytest.raises(NotImplementedError, match=msg): idx2.get_indexer(idx1, method="nearest")
def test_drop_duplicates(self): idx = CategoricalIndex([0, 0, 0], name="foo") expected = CategoricalIndex([0], name="foo") tm.assert_index_equal(idx.drop_duplicates(), expected) tm.assert_index_equal(idx.unique(), expected)
def test_method_delegation(self): ci = CategoricalIndex(list('aabbca'), categories=list('cabdef')) result = ci.set_categories(list('cab')) tm.assert_index_equal( result, CategoricalIndex(list('aabbca'), categories=list('cab'))) ci = CategoricalIndex(list('aabbca'), categories=list('cab')) result = ci.rename_categories(list('efg')) tm.assert_index_equal( result, CategoricalIndex(list('ffggef'), categories=list('efg'))) # GH18862 (let rename_categories take callables) result = ci.rename_categories(lambda x: x.upper()) tm.assert_index_equal( result, CategoricalIndex(list('AABBCA'), categories=list('CAB'))) ci = CategoricalIndex(list('aabbca'), categories=list('cab')) result = ci.add_categories(['d']) tm.assert_index_equal( result, CategoricalIndex(list('aabbca'), categories=list('cabd'))) ci = CategoricalIndex(list('aabbca'), categories=list('cab')) result = ci.remove_categories(['c']) tm.assert_index_equal( result, CategoricalIndex(list('aabb') + [np.nan] + ['a'], categories=list('ab'))) ci = CategoricalIndex(list('aabbca'), categories=list('cabdef')) result = ci.as_unordered() tm.assert_index_equal(result, ci) ci = CategoricalIndex(list('aabbca'), categories=list('cabdef')) result = ci.as_ordered() tm.assert_index_equal( result, CategoricalIndex(list('aabbca'), categories=list('cabdef'), ordered=True)) # invalid pytest.raises(ValueError, lambda: ci.set_categories(list('cab'), inplace=True))
def test_construction(self): ci = self.create_index(categories=list('abcd')) categories = ci.categories result = Index(ci) tm.assert_index_equal(result, ci, exact=True) self.assertFalse(result.ordered) result = Index(ci.values) tm.assert_index_equal(result, ci, exact=True) self.assertFalse(result.ordered) # empty result = CategoricalIndex(categories=categories) self.assert_index_equal(result.categories, Index(categories)) tm.assert_numpy_array_equal(result.codes, np.array([], dtype='int8')) self.assertFalse(result.ordered) # passing categories result = CategoricalIndex(list('aabbca'), categories=categories) self.assert_index_equal(result.categories, Index(categories)) tm.assert_numpy_array_equal(result.codes, np.array([0, 0, 1, 1, 2, 0], dtype='int8')) c = pd.Categorical(list('aabbca')) result = CategoricalIndex(c) self.assert_index_equal(result.categories, Index(list('abc'))) tm.assert_numpy_array_equal(result.codes, np.array([0, 0, 1, 1, 2, 0], dtype='int8')) self.assertFalse(result.ordered) result = CategoricalIndex(c, categories=categories) self.assert_index_equal(result.categories, Index(categories)) tm.assert_numpy_array_equal(result.codes, np.array([0, 0, 1, 1, 2, 0], dtype='int8')) self.assertFalse(result.ordered) ci = CategoricalIndex(c, categories=list('abcd')) result = CategoricalIndex(ci) self.assert_index_equal(result.categories, Index(categories)) tm.assert_numpy_array_equal(result.codes, np.array([0, 0, 1, 1, 2, 0], dtype='int8')) self.assertFalse(result.ordered) result = CategoricalIndex(ci, categories=list('ab')) self.assert_index_equal(result.categories, Index(list('ab'))) tm.assert_numpy_array_equal( result.codes, np.array([0, 0, 1, 1, -1, 0], dtype='int8')) self.assertFalse(result.ordered) result = CategoricalIndex(ci, categories=list('ab'), ordered=True) self.assert_index_equal(result.categories, Index(list('ab'))) tm.assert_numpy_array_equal( result.codes, np.array([0, 0, 1, 1, -1, 0], dtype='int8')) self.assertTrue(result.ordered) # turn me to an Index result = Index(np.array(ci)) self.assertIsInstance(result, Index) self.assertNotIsInstance(result, CategoricalIndex)
def test_format_different_scalar_lengths(self): # GH35439 idx = CategoricalIndex(["aaaaaaaaa", "b"]) expected = ["aaaaaaaaa", "b"] assert idx.format() == expected
def test_method_delegation(self): ci = CategoricalIndex(list('aabbca'), categories=list('cabdef')) result = ci.set_categories(list('cab')) tm.assert_index_equal( result, CategoricalIndex(list('aabbca'), categories=list('cab'))) ci = CategoricalIndex(list('aabbca'), categories=list('cab')) result = ci.rename_categories(list('efg')) tm.assert_index_equal( result, CategoricalIndex(list('ffggef'), categories=list('efg'))) ci = CategoricalIndex(list('aabbca'), categories=list('cab')) result = ci.add_categories(['d']) tm.assert_index_equal( result, CategoricalIndex(list('aabbca'), categories=list('cabd'))) ci = CategoricalIndex(list('aabbca'), categories=list('cab')) result = ci.remove_categories(['c']) tm.assert_index_equal( result, CategoricalIndex(list('aabb') + [np.nan] + ['a'], categories=list('ab'))) ci = CategoricalIndex(list('aabbca'), categories=list('cabdef')) result = ci.as_unordered() tm.assert_index_equal(result, ci) ci = CategoricalIndex(list('aabbca'), categories=list('cabdef')) result = ci.as_ordered() tm.assert_index_equal( result, CategoricalIndex(list('aabbca'), categories=list('cabdef'), ordered=True)) # invalid self.assertRaises(ValueError, lambda: ci.set_categories(list('cab'), inplace=True))
def test_equals_categorical(self): ci1 = CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=True) ci2 = CategoricalIndex(['a', 'b'], categories=['a', 'b', 'c'], ordered=True) self.assertTrue(ci1.equals(ci1)) self.assertFalse(ci1.equals(ci2)) self.assertTrue(ci1.equals(ci1.astype(object))) self.assertTrue(ci1.astype(object).equals(ci1)) self.assertTrue((ci1 == ci1).all()) self.assertFalse((ci1 != ci1).all()) self.assertFalse((ci1 > ci1).all()) self.assertFalse((ci1 < ci1).all()) self.assertTrue((ci1 <= ci1).all()) self.assertTrue((ci1 >= ci1).all()) self.assertFalse((ci1 == 1).all()) self.assertTrue((ci1 == Index(['a', 'b'])).all()) self.assertTrue((ci1 == ci1.values).all()) # invalid comparisons with tm.assertRaisesRegexp(ValueError, "Lengths must match"): ci1 == Index(['a', 'b', 'c']) self.assertRaises(TypeError, lambda: ci1 == ci2) self.assertRaises( TypeError, lambda: ci1 == Categorical(ci1.values, ordered=False)) self.assertRaises( TypeError, lambda: ci1 == Categorical(ci1.values, categories=list('abc'))) # tests # make sure that we are testing for category inclusion properly ci = CategoricalIndex(list('aabca'), categories=['c', 'a', 'b']) self.assertFalse(ci.equals(list('aabca'))) self.assertFalse(ci.equals(CategoricalIndex(list('aabca')))) self.assertTrue(ci.equals(ci.copy())) ci = CategoricalIndex(list('aabca') + [np.nan], categories=['c', 'a', 'b']) self.assertFalse(ci.equals(list('aabca'))) self.assertFalse(ci.equals(CategoricalIndex(list('aabca')))) self.assertTrue(ci.equals(ci.copy())) ci = CategoricalIndex(list('aabca') + [np.nan], categories=['c', 'a', 'b']) self.assertFalse(ci.equals(list('aabca') + [np.nan])) self.assertFalse(ci.equals(CategoricalIndex(list('aabca') + [np.nan]))) self.assertTrue(ci.equals(ci.copy()))
def test_reindex_dtype(self): c = CategoricalIndex(["a", "b", "c", "a"]) res, indexer = c.reindex(["a", "c"]) tm.assert_index_equal(res, Index(["a", "a", "c"]), exact=True) tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp)) c = CategoricalIndex(["a", "b", "c", "a"]) res, indexer = c.reindex(Categorical(["a", "c"])) exp = CategoricalIndex(["a", "a", "c"], categories=["a", "c"]) tm.assert_index_equal(res, exp, exact=True) tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp)) c = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) res, indexer = c.reindex(["a", "c"]) exp = Index(["a", "a", "c"], dtype="object") tm.assert_index_equal(res, exp, exact=True) tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp)) c = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) res, indexer = c.reindex(Categorical(["a", "c"])) exp = CategoricalIndex(["a", "a", "c"], categories=["a", "c"]) tm.assert_index_equal(res, exp, exact=True) tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp))
def test_method_delegation(self): ci = CategoricalIndex(list('aabbca'), categories=list('cabdef')) result = ci.set_categories(list('cab')) tm.assert_index_equal(result, CategoricalIndex( list('aabbca'), categories=list('cab'))) ci = CategoricalIndex(list('aabbca'), categories=list('cab')) result = ci.rename_categories(list('efg')) tm.assert_index_equal(result, CategoricalIndex( list('ffggef'), categories=list('efg'))) ci = CategoricalIndex(list('aabbca'), categories=list('cab')) result = ci.add_categories(['d']) tm.assert_index_equal(result, CategoricalIndex( list('aabbca'), categories=list('cabd'))) ci = CategoricalIndex(list('aabbca'), categories=list('cab')) result = ci.remove_categories(['c']) tm.assert_index_equal(result, CategoricalIndex( list('aabb') + [np.nan] + ['a'], categories=list('ab'))) ci = CategoricalIndex(list('aabbca'), categories=list('cabdef')) result = ci.as_unordered() tm.assert_index_equal(result, ci) ci = CategoricalIndex(list('aabbca'), categories=list('cabdef')) result = ci.as_ordered() tm.assert_index_equal(result, CategoricalIndex( list('aabbca'), categories=list('cabdef'), ordered=True)) # invalid pytest.raises(ValueError, lambda: ci.set_categories( list('cab'), inplace=True))
def test_equals_categorical(self): ci1 = CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=True) ci2 = CategoricalIndex(['a', 'b'], categories=['a', 'b', 'c'], ordered=True) assert ci1.equals(ci1) assert not ci1.equals(ci2) assert ci1.equals(ci1.astype(object)) assert ci1.astype(object).equals(ci1) assert (ci1 == ci1).all() assert not (ci1 != ci1).all() assert not (ci1 > ci1).all() assert not (ci1 < ci1).all() assert (ci1 <= ci1).all() assert (ci1 >= ci1).all() assert not (ci1 == 1).all() assert (ci1 == Index(['a', 'b'])).all() assert (ci1 == ci1.values).all() # invalid comparisons with tm.assert_raises_regex(ValueError, "Lengths must match"): ci1 == Index(['a', 'b', 'c']) pytest.raises(TypeError, lambda: ci1 == ci2) pytest.raises( TypeError, lambda: ci1 == Categorical(ci1.values, ordered=False)) pytest.raises( TypeError, lambda: ci1 == Categorical(ci1.values, categories=list('abc'))) # tests # make sure that we are testing for category inclusion properly ci = CategoricalIndex(list('aabca'), categories=['c', 'a', 'b']) assert not ci.equals(list('aabca')) assert not ci.equals(CategoricalIndex(list('aabca'))) assert ci.equals(ci.copy()) ci = CategoricalIndex(list('aabca') + [np.nan], categories=['c', 'a', 'b']) assert not ci.equals(list('aabca')) assert not ci.equals(CategoricalIndex(list('aabca'))) assert ci.equals(ci.copy()) ci = CategoricalIndex(list('aabca') + [np.nan], categories=['c', 'a', 'b']) assert not ci.equals(list('aabca') + [np.nan]) assert not ci.equals(CategoricalIndex(list('aabca') + [np.nan])) assert ci.equals(ci.copy())
def test_is_unique(self, values, expected): ci = CategoricalIndex(values) assert ci.is_unique is expected
def test_has_duplicates(self): idx = CategoricalIndex([0, 0, 0], name='foo') assert not idx.is_unique assert idx.has_duplicates
def __init__( self, index: Index, grouper=None, obj: Optional[FrameOrSeries] = None, name=None, level=None, sort: bool = True, observed: bool = False, in_axis: bool = False, ): self.name = name self.level = level self.grouper = _convert_grouper(index, grouper) self.all_grouper = None self.index = index self.sort = sort self.obj = obj self.observed = observed self.in_axis = in_axis # right place for this? if isinstance(grouper, (Series, Index)) and name is None: self.name = grouper.name if isinstance(grouper, MultiIndex): self.grouper = grouper.values # we have a single grouper which may be a myriad of things, # some of which are dependent on the passing in level if level is not None: if not isinstance(level, int): if level not in index.names: raise AssertionError(f"Level {level} not in index") level = index.names.index(level) if self.name is None: self.name = index.names[level] ( self.grouper, self._codes, self._group_index, ) = index._get_grouper_for_level(self.grouper, level) # a passed Grouper like, directly get the grouper in the same way # as single grouper groupby, use the group_info to get codes elif isinstance(self.grouper, Grouper): # get the new grouper; we already have disambiguated # what key/level refer to exactly, don't need to # check again as we have by this point converted these # to an actual value (rather than a pd.Grouper) _, grouper, _ = self.grouper._get_grouper(self.obj, validate=False) if self.name is None: self.name = grouper.result_index.name self.obj = self.grouper.obj self.grouper = grouper._get_grouper() else: if self.grouper is None and self.name is not None and self.obj is not None: self.grouper = self.obj[self.name] elif isinstance(self.grouper, (list, tuple)): self.grouper = com.asarray_tuplesafe(self.grouper) # a passed Categorical elif is_categorical_dtype(self.grouper): self.grouper, self.all_grouper = recode_for_groupby( self.grouper, self.sort, observed) categories = self.grouper.categories # we make a CategoricalIndex out of the cat grouper # preserving the categories / ordered attributes self._codes = self.grouper.codes if observed: codes = algorithms.unique1d(self.grouper.codes) codes = codes[codes != -1] if sort or self.grouper.ordered: codes = np.sort(codes) else: codes = np.arange(len(categories)) self._group_index = CategoricalIndex( Categorical.from_codes(codes=codes, categories=categories, ordered=self.grouper.ordered), name=self.name, ) # we are done if isinstance(self.grouper, Grouping): self.grouper = self.grouper.grouper # no level passed elif not isinstance(self.grouper, (Series, Index, ExtensionArray, np.ndarray)): if getattr(self.grouper, "ndim", 1) != 1: t = self.name or str(type(self.grouper)) raise ValueError(f"Grouper for '{t}' not 1-dimensional") self.grouper = self.index.map(self.grouper) if not (hasattr(self.grouper, "__len__") and len(self.grouper) == len(self.index)): grper = pprint_thing(self.grouper) errmsg = ("Grouper result violates len(labels) == " f"len(data)\nresult: {grper}") self.grouper = None # Try for sanity raise AssertionError(errmsg) # if we have a date/time-like grouper, make sure that we have # Timestamps like if getattr(self.grouper, "dtype", None) is not None: if is_datetime64_dtype(self.grouper): self.grouper = self.grouper.astype("datetime64[ns]") elif is_timedelta64_dtype(self.grouper): self.grouper = self.grouper.astype("timedelta64[ns]")
def test_unique(self, data, categories, expected_data, ordered): dtype = CategoricalDtype(categories, ordered=ordered) idx = CategoricalIndex(data, dtype=dtype) expected = CategoricalIndex(expected_data, dtype=dtype) tm.assert_index_equal(idx.unique(), expected)
def create_index(self, categories=None, ordered=False): if categories is None: categories = list('cab') return CategoricalIndex(list('aabbca'), categories=categories, ordered=ordered)
def test_construction(self): ci = self.create_index(categories=list('abcd')) categories = ci.categories result = Index(ci) tm.assert_index_equal(result, ci, exact=True) assert not result.ordered result = Index(ci.values) tm.assert_index_equal(result, ci, exact=True) assert not result.ordered # empty result = CategoricalIndex(categories=categories) tm.assert_index_equal(result.categories, Index(categories)) tm.assert_numpy_array_equal(result.codes, np.array([], dtype='int8')) assert not result.ordered # passing categories result = CategoricalIndex(list('aabbca'), categories=categories) tm.assert_index_equal(result.categories, Index(categories)) tm.assert_numpy_array_equal(result.codes, np.array([0, 0, 1, 1, 2, 0], dtype='int8')) c = pd.Categorical(list('aabbca')) result = CategoricalIndex(c) tm.assert_index_equal(result.categories, Index(list('abc'))) tm.assert_numpy_array_equal(result.codes, np.array([0, 0, 1, 1, 2, 0], dtype='int8')) assert not result.ordered result = CategoricalIndex(c, categories=categories) tm.assert_index_equal(result.categories, Index(categories)) tm.assert_numpy_array_equal(result.codes, np.array([0, 0, 1, 1, 2, 0], dtype='int8')) assert not result.ordered ci = CategoricalIndex(c, categories=list('abcd')) result = CategoricalIndex(ci) tm.assert_index_equal(result.categories, Index(categories)) tm.assert_numpy_array_equal(result.codes, np.array([0, 0, 1, 1, 2, 0], dtype='int8')) assert not result.ordered result = CategoricalIndex(ci, categories=list('ab')) tm.assert_index_equal(result.categories, Index(list('ab'))) tm.assert_numpy_array_equal( result.codes, np.array([0, 0, 1, 1, -1, 0], dtype='int8')) assert not result.ordered result = CategoricalIndex(ci, categories=list('ab'), ordered=True) tm.assert_index_equal(result.categories, Index(list('ab'))) tm.assert_numpy_array_equal( result.codes, np.array([0, 0, 1, 1, -1, 0], dtype='int8')) assert result.ordered result = pd.CategoricalIndex(ci, categories=list('ab'), ordered=True) expected = pd.CategoricalIndex(ci, categories=list('ab'), ordered=True, dtype='category') tm.assert_index_equal(result, expected, exact=True) # turn me to an Index result = Index(np.array(ci)) assert isinstance(result, Index) assert not isinstance(result, CategoricalIndex)
def test_method_delegation(self): ci = CategoricalIndex(list('aabbca'), categories=list('cabdef')) result = ci.set_categories(list('cab')) tm.assert_index_equal(result, CategoricalIndex( list('aabbca'), categories=list('cab'))) ci = CategoricalIndex(list('aabbca'), categories=list('cab')) result = ci.rename_categories(list('efg')) tm.assert_index_equal(result, CategoricalIndex( list('ffggef'), categories=list('efg'))) # GH18862 (let rename_categories take callables) result = ci.rename_categories(lambda x: x.upper()) tm.assert_index_equal(result, CategoricalIndex( list('AABBCA'), categories=list('CAB'))) ci = CategoricalIndex(list('aabbca'), categories=list('cab')) result = ci.add_categories(['d']) tm.assert_index_equal(result, CategoricalIndex( list('aabbca'), categories=list('cabd'))) ci = CategoricalIndex(list('aabbca'), categories=list('cab')) result = ci.remove_categories(['c']) tm.assert_index_equal(result, CategoricalIndex( list('aabb') + [np.nan] + ['a'], categories=list('ab'))) ci = CategoricalIndex(list('aabbca'), categories=list('cabdef')) result = ci.as_unordered() tm.assert_index_equal(result, ci) ci = CategoricalIndex(list('aabbca'), categories=list('cabdef')) result = ci.as_ordered() tm.assert_index_equal(result, CategoricalIndex( list('aabbca'), categories=list('cabdef'), ordered=True)) # invalid msg = "cannot use inplace with CategoricalIndex" with pytest.raises(ValueError, match=msg): ci.set_categories(list('cab'), inplace=True)
def test_method_delegation(self): ci = CategoricalIndex(list("aabbca"), categories=list("cabdef")) result = ci.set_categories(list("cab")) tm.assert_index_equal( result, CategoricalIndex(list("aabbca"), categories=list("cab"))) ci = CategoricalIndex(list("aabbca"), categories=list("cab")) result = ci.rename_categories(list("efg")) tm.assert_index_equal( result, CategoricalIndex(list("ffggef"), categories=list("efg"))) # GH18862 (let rename_categories take callables) result = ci.rename_categories(lambda x: x.upper()) tm.assert_index_equal( result, CategoricalIndex(list("AABBCA"), categories=list("CAB"))) ci = CategoricalIndex(list("aabbca"), categories=list("cab")) result = ci.add_categories(["d"]) tm.assert_index_equal( result, CategoricalIndex(list("aabbca"), categories=list("cabd"))) ci = CategoricalIndex(list("aabbca"), categories=list("cab")) result = ci.remove_categories(["c"]) tm.assert_index_equal( result, CategoricalIndex(list("aabb") + [np.nan] + ["a"], categories=list("ab")), ) ci = CategoricalIndex(list("aabbca"), categories=list("cabdef")) result = ci.as_unordered() tm.assert_index_equal(result, ci) ci = CategoricalIndex(list("aabbca"), categories=list("cabdef")) result = ci.as_ordered() tm.assert_index_equal( result, CategoricalIndex(list("aabbca"), categories=list("cabdef"), ordered=True), ) # invalid msg = "cannot use inplace with CategoricalIndex" with pytest.raises(ValueError, match=msg): ci.set_categories(list("cab"), inplace=True)
def test_insert_na_mismatched_dtype(self): ci = CategoricalIndex([0, 1, 1]) msg = "'fill_value=NaT' is not present in this Categorical's categories" with pytest.raises(ValueError, match=msg): ci.insert(0, pd.NaT)