def test_remove_categories(self): cat = Categorical(["a", "b", "c", "a"], ordered=True) old = cat.copy() new = Categorical(["a", "b", np.nan, "a"], categories=["a", "b"], ordered=True) # first inplace == False res = cat.remove_categories("c") tm.assert_categorical_equal(cat, old) tm.assert_categorical_equal(res, new) res = cat.remove_categories(["c"]) tm.assert_categorical_equal(cat, old) tm.assert_categorical_equal(res, new) # inplace == True res = cat.remove_categories("c", inplace=True) tm.assert_categorical_equal(cat, new) assert res is None # removal is not in categories def f(): cat.remove_categories(["c"]) pytest.raises(ValueError, f)
def test_set_ordered(self): cat = Categorical(["a", "b", "c", "a"], ordered=True) cat2 = cat.as_unordered() assert not cat2.ordered cat2 = cat.as_ordered() assert cat2.ordered cat2.as_unordered(inplace=True) assert not cat2.ordered cat2.as_ordered(inplace=True) assert cat2.ordered assert cat2.set_ordered(True).ordered assert not cat2.set_ordered(False).ordered cat2.set_ordered(True, inplace=True) assert cat2.ordered cat2.set_ordered(False, inplace=True) assert not cat2.ordered # removed in 0.19.0 msg = "can\'t set attribute" with tm.assert_raises_regex(AttributeError, msg): cat.ordered = True with tm.assert_raises_regex(AttributeError, msg): cat.ordered = False
def test_rename_categories(self): cat = Categorical(["a", "b", "c", "a"]) # inplace=False: the old one must not be changed res = cat.rename_categories([1, 2, 3]) tm.assert_numpy_array_equal(res.__array__(), np.array([1, 2, 3, 1], dtype=np.int64)) tm.assert_index_equal(res.categories, Index([1, 2, 3])) exp_cat = np.array(["a", "b", "c", "a"], dtype=np.object_) tm.assert_numpy_array_equal(cat.__array__(), exp_cat) exp_cat = Index(["a", "b", "c"]) tm.assert_index_equal(cat.categories, exp_cat) # GH18862 (let rename_categories take callables) result = cat.rename_categories(lambda x: x.upper()) expected = Categorical(["A", "B", "C", "A"]) tm.assert_categorical_equal(result, expected) # and now inplace res = cat.rename_categories([1, 2, 3], inplace=True) assert res is None tm.assert_numpy_array_equal(cat.__array__(), np.array([1, 2, 3, 1], dtype=np.int64)) tm.assert_index_equal(cat.categories, Index([1, 2, 3])) # Lengthen with pytest.raises(ValueError): cat.rename_categories([1, 2, 3, 4]) # Shorten with pytest.raises(ValueError): cat.rename_categories([1, 2])
def test_codes_immutable(self): # Codes should be read only c = Categorical(["a", "b", "c", "a", np.nan]) exp = np.array([0, 1, 2, 0, -1], dtype='int8') tm.assert_numpy_array_equal(c.codes, exp) # Assignments to codes should raise def f(): c.codes = np.array([0, 1, 2, 0, 1], dtype='int8') pytest.raises(ValueError, f) # changes in the codes array should raise # np 1.6.1 raises RuntimeError rather than ValueError codes = c.codes def f(): codes[4] = 1 pytest.raises(ValueError, f) # But even after getting the codes, the original array should still be # writeable! c[4] = "a" exp = np.array([0, 1, 2, 0, 0], dtype='int8') tm.assert_numpy_array_equal(c.codes, exp) c._codes[4] = 2 exp = np.array([0, 1, 2, 0, 2], dtype='int8') tm.assert_numpy_array_equal(c.codes, exp)
def test_groupby_categorical_index(self): s = np.random.RandomState(12345) levels = ['foo', 'bar', 'baz', 'qux'] codes = s.randint(0, 4, size=20) cats = Categorical.from_codes(codes, levels, ordered=True) df = DataFrame( np.repeat( np.arange(20), 4).reshape(-1, 4), columns=list('abcd')) df['cats'] = cats # with a cat index result = df.set_index('cats').groupby(level=0).sum() expected = df[list('abcd')].groupby(cats.codes).sum() expected.index = CategoricalIndex( Categorical.from_codes( [0, 1, 2, 3], levels, ordered=True), name='cats') assert_frame_equal(result, expected) # with a cat column, should produce a cat index result = df.groupby('cats').sum() expected = df[list('abcd')].groupby(cats.codes).sum() expected.index = CategoricalIndex( Categorical.from_codes( [0, 1, 2, 3], levels, ordered=True), name='cats') assert_frame_equal(result, expected)
def test_groupby_categorical_no_compress(self): data = Series(np.random.randn(9)) codes = np.array([0, 0, 0, 1, 1, 1, 2, 2, 2]) cats = Categorical.from_codes(codes, [0, 1, 2], ordered=True) result = data.groupby(cats).mean() exp = data.groupby(codes).mean() exp.index = CategoricalIndex(exp.index, categories=cats.categories, ordered=cats.ordered) assert_series_equal(result, exp) codes = np.array([0, 0, 0, 1, 1, 1, 3, 3, 3]) cats = Categorical.from_codes(codes, [0, 1, 2, 3], ordered=True) result = data.groupby(cats).mean() exp = data.groupby(codes).mean().reindex(cats.categories) exp.index = CategoricalIndex(exp.index, categories=cats.categories, ordered=cats.ordered) assert_series_equal(result, exp) cats = Categorical(["a", "a", "a", "b", "b", "b", "c", "c", "c"], categories=["a", "b", "c", "d"], ordered=True) data = DataFrame({"a": [1, 1, 1, 2, 2, 2, 3, 4, 5], "b": cats}) result = data.groupby("b").mean() result = result["a"].values exp = np.array([1, 2, 4, np.nan]) tm.assert_numpy_array_equal(result, exp)
def test_shift(fill_value): ct = Categorical(['a', 'b', 'c', 'd'], categories=['a', 'b', 'c', 'd'], ordered=False) expected = Categorical([None, 'a', 'b', 'c'], categories=['a', 'b', 'c', 'd'], ordered=False) res = ct.shift(1, fill_value=fill_value) assert_equal(res, expected)
def test_right(self): data = np.array([.2, 1.4, 2.5, 6.2, 9.7, 2.1, 2.575]) result, bins = cut(data, 4, right=True, retbins=True) intervals = IntervalIndex.from_breaks(bins.round(3)) expected = Categorical(intervals, ordered=True) expected = expected.take([0, 0, 0, 2, 3, 0, 0]) tm.assert_categorical_equal(result, expected) tm.assert_almost_equal(bins, np.array([0.1905, 2.575, 4.95, 7.325, 9.7]))
def test_from_codes_with_dtype_raises(self): msg = 'Cannot specify' with pytest.raises(ValueError, match=msg): Categorical.from_codes([0, 1], categories=['a', 'b'], dtype=CategoricalDtype(['a', 'b'])) with pytest.raises(ValueError, match=msg): Categorical.from_codes([0, 1], ordered=True, dtype=CategoricalDtype(['a', 'b']))
def _bins_to_cuts(x, bins, right=True, labels=None, precision=3, include_lowest=False, dtype=None, duplicates='raise'): if duplicates not in ['raise', 'drop']: raise ValueError("invalid value for 'duplicates' parameter, " "valid options are: raise, drop") if isinstance(bins, IntervalIndex): # we have a fast-path here ids = bins.get_indexer(x) result = algos.take_nd(bins, ids) result = Categorical(result, categories=bins, ordered=True) return result, bins unique_bins = algos.unique(bins) if len(unique_bins) < len(bins) and len(bins) != 2: if duplicates == 'raise': raise ValueError("Bin edges must be unique: {bins!r}.\nYou " "can drop duplicate edges by setting " "the 'duplicates' kwarg".format(bins=bins)) else: bins = unique_bins side = 'left' if right else 'right' ids = _ensure_int64(bins.searchsorted(x, side=side)) if include_lowest: # Numpy 1.9 support: ensure this mask is a Numpy array ids[np.asarray(x == bins[0])] = 1 na_mask = isna(x) | (ids == len(bins)) | (ids == 0) has_nas = na_mask.any() if labels is not False: if labels is None: labels = _format_labels(bins, precision, right=right, include_lowest=include_lowest, dtype=dtype) else: if len(labels) != len(bins) - 1: raise ValueError('Bin labels must be one fewer than ' 'the number of bin edges') if not is_categorical_dtype(labels): labels = Categorical(labels, categories=labels, ordered=True) np.putmask(ids, na_mask, 0) result = algos.take_nd(labels, ids - 1) else: result = ids - 1 if has_nas: result = result.astype(np.float64) np.putmask(result, na_mask, np.nan) return result, bins
def test_from_codes_with_nan_code(self): # GH21767 codes = [1, 2, np.nan] dtype = CategoricalDtype(categories=['a', 'b', 'c']) with pytest.raises(ValueError, match="codes need to be array-like integers"): Categorical.from_codes(codes, categories=dtype.categories) with pytest.raises(ValueError, match="codes need to be array-like integers"): Categorical.from_codes(codes, dtype=dtype)
def test_argsort(self): c = Categorical([5, 3, 1, 4, 2], ordered=True) expected = np.array([2, 4, 1, 3, 0]) tm.assert_numpy_array_equal(c.argsort(ascending=True), expected, check_dtype=False) expected = expected[::-1] tm.assert_numpy_array_equal(c.argsort(ascending=False), expected, check_dtype=False)
def test_mask_with_boolean(index): s = Series(range(3)) idx = Categorical([True, False, True]) if index: idx = CategoricalIndex(idx) assert com.is_bool_indexer(idx) result = s[idx] expected = s[idx.astype('object')] tm.assert_series_equal(result, expected)
def test_rename_categories_dict(self): # GH 17336 cat = Categorical(['a', 'b', 'c', 'd']) res = cat.rename_categories({'a': 4, 'b': 3, 'c': 2, 'd': 1}) expected = Index([4, 3, 2, 1]) tm.assert_index_equal(res.categories, expected) # Test for inplace res = cat.rename_categories({'a': 4, 'b': 3, 'c': 2, 'd': 1}, inplace=True) assert res is None tm.assert_index_equal(cat.categories, expected) # Test for dicts of smaller length cat = Categorical(['a', 'b', 'c', 'd']) res = cat.rename_categories({'a': 1, 'c': 3}) expected = Index([1, 'b', 3, 'd']) tm.assert_index_equal(res.categories, expected) # Test for dicts with bigger length cat = Categorical(['a', 'b', 'c', 'd']) res = cat.rename_categories({'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5, 'f': 6}) expected = Index([1, 2, 3, 4]) tm.assert_index_equal(res.categories, expected) # Test for dicts with no items from old categories cat = Categorical(['a', 'b', 'c', 'd']) res = cat.rename_categories({'f': 1, 'g': 3}) expected = Index(['a', 'b', 'c', 'd']) tm.assert_index_equal(res.categories, expected)
def test_fillna_iterable_category(self, named): # https://github.com/pandas-dev/pandas/issues/21097 if named: Point = collections.namedtuple("Point", "x y") else: Point = lambda *args: args # tuple cat = Categorical([Point(0, 0), Point(0, 1), None]) result = cat.fillna(Point(0, 0)) expected = Categorical([Point(0, 0), Point(0, 1), Point(0, 0)]) tm.assert_categorical_equal(result, expected)
def test_rename_categories_series(self): # https://github.com/pandas-dev/pandas/issues/17981 c = Categorical(['a', 'b']) xpr = "Treating Series 'new_categories' as a list-like " with tm.assert_produces_warning(FutureWarning) as rec: result = c.rename_categories(Series([0, 1])) assert len(rec) == 1 assert xpr in str(rec[0].message) expected = Categorical([0, 1]) tm.assert_categorical_equal(result, expected)
def test_from_codes_with_float(self): # GH21767 codes = [1.0, 2.0, 0] # integer, but in float dtype categories = ['a', 'b', 'c'] with tm.assert_produces_warning(FutureWarning): cat = Categorical.from_codes(codes, categories) tm.assert_numpy_array_equal(cat.codes, np.array([1, 2, 0], dtype='i1')) codes = [1.1, 2.0, 0] # non-integer with pytest.raises(ValueError): Categorical.from_codes(codes, categories)
def test_unique_ordered(self): # keep categories order when ordered=True cat = Categorical(['b', 'a', 'b'], categories=['a', 'b'], ordered=True) res = cat.unique() exp_cat = Categorical(['b', 'a'], categories=['a', 'b'], ordered=True) tm.assert_categorical_equal(res, exp_cat) cat = Categorical(['c', 'b', 'a', 'a'], categories=['a', 'b', 'c'], ordered=True) res = cat.unique() exp_cat = Categorical(['c', 'b', 'a'], categories=['a', 'b', 'c'], ordered=True) tm.assert_categorical_equal(res, exp_cat) cat = Categorical(['b', 'a', 'a'], categories=['a', 'b', 'c'], ordered=True) res = cat.unique() exp_cat = Categorical(['b', 'a'], categories=['a', 'b'], ordered=True) tm.assert_categorical_equal(res, exp_cat) cat = Categorical(['b', 'b', np.nan, 'a'], categories=['a', 'b', 'c'], ordered=True) res = cat.unique() exp_cat = Categorical(['b', np.nan, 'a'], categories=['a', 'b'], ordered=True) tm.assert_categorical_equal(res, exp_cat)
def test_unique(self): # categories are reordered based on value when ordered=False cat = Categorical(["a", "b"]) exp = Index(["a", "b"]) res = cat.unique() tm.assert_index_equal(res.categories, exp) tm.assert_categorical_equal(res, cat) cat = Categorical(["a", "b", "a", "a"], categories=["a", "b", "c"]) res = cat.unique() tm.assert_index_equal(res.categories, exp) tm.assert_categorical_equal(res, Categorical(exp)) cat = Categorical(["c", "a", "b", "a", "a"], categories=["a", "b", "c"]) exp = Index(["c", "a", "b"]) res = cat.unique() tm.assert_index_equal(res.categories, exp) exp_cat = Categorical(exp, categories=['c', 'a', 'b']) tm.assert_categorical_equal(res, exp_cat) # nan must be removed cat = Categorical(["b", np.nan, "b", np.nan, "a"], categories=["a", "b", "c"]) res = cat.unique() exp = Index(["b", "a"]) tm.assert_index_equal(res.categories, exp) exp_cat = Categorical(["b", np.nan, "a"], categories=["b", "a"]) tm.assert_categorical_equal(res, exp_cat)
def test_map(self): c = Categorical(list('ABABC'), categories=list('CBA'), ordered=True) result = c.map(lambda x: x.lower()) exp = Categorical(list('ababc'), categories=list('cba'), ordered=True) tm.assert_categorical_equal(result, exp) c = Categorical(list('ABABC'), categories=list('ABC'), ordered=False) result = c.map(lambda x: x.lower()) exp = Categorical(list('ababc'), categories=list('abc'), ordered=False) tm.assert_categorical_equal(result, exp) result = c.map(lambda x: 1) # GH 12766: Return an index not an array tm.assert_index_equal(result, Index(np.array([1] * 5, dtype=np.int64)))
def test_categories_assigments(self): s = Categorical(["a", "b", "c", "a"]) exp = np.array([1, 2, 3, 1], dtype=np.int64) s.categories = [1, 2, 3] tm.assert_numpy_array_equal(s.__array__(), exp) tm.assert_index_equal(s.categories, Index([1, 2, 3])) # lengthen with pytest.raises(ValueError): s.categories = [1, 2, 3, 4] # shorten with pytest.raises(ValueError): s.categories = [1, 2]
def test_cut_pass_labels(self): arr = [50, 5, 10, 15, 20, 30, 70] bins = [0, 25, 50, 100] labels = ['Small', 'Medium', 'Large'] result = cut(arr, bins, labels=labels) exp = Categorical(['Medium'] + 4 * ['Small'] + ['Medium', 'Large'], ordered=True) tm.assert_categorical_equal(result, exp) result = cut(arr, bins, labels=Categorical.from_codes([0, 1, 2], labels)) exp = Categorical.from_codes([1] + 4 * [0] + [1, 2], labels) tm.assert_categorical_equal(result, exp)
def test_validate_ordered(self): # see gh-14058 exp_msg = "'ordered' must either be 'True' or 'False'" exp_err = TypeError # This should be a boolean. ordered = np.array([0, 1, 2]) with tm.assert_raises_regex(exp_err, exp_msg): Categorical([1, 2, 3], ordered=ordered) with tm.assert_raises_regex(exp_err, exp_msg): Categorical.from_codes([0, 0, 1], categories=['a', 'b', 'c'], ordered=ordered)
def test_from_codes_with_categorical_categories(self): # GH17884 expected = Categorical(['a', 'b'], categories=['a', 'b', 'c']) result = Categorical.from_codes( [0, 1], categories=Categorical(['a', 'b', 'c'])) tm.assert_categorical_equal(result, expected) result = Categorical.from_codes( [0, 1], categories=CategoricalIndex(['a', 'b', 'c'])) tm.assert_categorical_equal(result, expected) # non-unique Categorical still raises with pytest.raises(ValueError): Categorical.from_codes([0, 1], Categorical(['a', 'b', 'a']))
def test_from_inferred_categories_coerces(self): cats = ['1', '2', 'bad'] codes = np.array([0, 0, 1, 2], dtype='i8') dtype = CategoricalDtype([1, 2]) result = Categorical._from_inferred_categories(cats, codes, dtype) expected = Categorical([1, 1, 2, np.nan]) tm.assert_categorical_equal(result, expected)
def test_astype(self): ci = self.create_index() result = ci.astype('category') tm.assert_index_equal(result, ci, exact=True) result = ci.astype(object) tm.assert_index_equal(result, Index(np.array(ci))) # this IS equal, but not the same class assert result.equals(ci) assert isinstance(result, Index) assert not isinstance(result, CategoricalIndex) # interval ii = IntervalIndex.from_arrays(left=[-0.001, 2.0], right=[2, 4], closed='right') ci = CategoricalIndex(Categorical.from_codes( [0, 1, -1], categories=ii, ordered=True)) result = ci.astype('interval') expected = ii.take([0, 1, -1]) tm.assert_index_equal(result, expected) result = IntervalIndex.from_intervals(result.values) tm.assert_index_equal(result, expected)
def f(a): if isinstance(a, (CategoricalIndex, Categorical)): categories = a.categories a = Categorical.from_codes(np.arange(len(categories)), categories=categories, ordered=a.ordered) return a
def test_reshaping_panel_categorical(self): p = tm.makePanel() p['str'] = 'foo' df = p.to_frame() df['category'] = df['str'].astype('category') result = df['category'].unstack() c = Categorical(['foo'] * len(p.major_axis)) expected = DataFrame({'A': c.copy(), 'B': c.copy(), 'C': c.copy(), 'D': c.copy()}, columns=Index(list('ABCD'), name='minor'), index=p.major_axis.set_names('major')) tm.assert_frame_equal(result, expected)
def test_cut_return_categorical(self): s = Series([0, 1, 2, 3, 4, 5, 6, 7, 8]) res = cut(s, 3) exp = Series(Categorical.from_codes([0, 0, 0, 1, 1, 1, 2, 2, 2], ["(-0.008, 2.667]", "(2.667, 5.333]", "(5.333, 8]"], ordered=True)) tm.assert_series_equal(res, exp)
def test_qcut_return_categorical(self): s = Series([0, 1, 2, 3, 4, 5, 6, 7, 8]) res = qcut(s, [0, 0.333, 0.666, 1]) exp = Series(Categorical.from_codes([0, 0, 0, 1, 1, 1, 2, 2, 2], ["[0, 2.664]", "(2.664, 5.328]", "(5.328, 8]"], ordered=True)) tm.assert_series_equal(res, exp)
def test_isna(self): exp = np.array([False, False, True]) cat = Categorical(["a", "b", np.nan]) res = cat.isna() tm.assert_numpy_array_equal(res, exp)
def test_constructor_imaginary(self): values = [1, 2, 3 + 1j] c1 = Categorical(values) tm.assert_index_equal(c1.categories, Index(values)) tm.assert_numpy_array_equal(np.array(c1), np.array(values))
def test_slicing_and_getting_ops(self): # systematically test the slicing operations: # for all slicing ops: # - returning a dataframe # - returning a column # - returning a row # - returning a single value cats = Categorical( ["a", "c", "b", "c", "c", "c", "c"], categories=["a", "b", "c"] ) idx = Index(["h", "i", "j", "k", "l", "m", "n"]) values = [1, 2, 3, 4, 5, 6, 7] df = DataFrame({"cats": cats, "values": values}, index=idx) # the expected values cats2 = Categorical(["b", "c"], categories=["a", "b", "c"]) idx2 = Index(["j", "k"]) values2 = [3, 4] # 2:4,: | "j":"k",: exp_df = DataFrame({"cats": cats2, "values": values2}, index=idx2) # :,"cats" | :,0 exp_col = Series(cats, index=idx, name="cats") # "j",: | 2,: exp_row = Series(["b", 3], index=["cats", "values"], dtype="object", name="j") # "j","cats | 2,0 exp_val = "b" # iloc # frame res_df = df.iloc[2:4, :] tm.assert_frame_equal(res_df, exp_df) assert is_categorical_dtype(res_df["cats"]) # row res_row = df.iloc[2, :] tm.assert_series_equal(res_row, exp_row) assert isinstance(res_row["cats"], str) # col res_col = df.iloc[:, 0] tm.assert_series_equal(res_col, exp_col) assert is_categorical_dtype(res_col) # single value res_val = df.iloc[2, 0] assert res_val == exp_val # loc # frame res_df = df.loc["j":"k", :] tm.assert_frame_equal(res_df, exp_df) assert is_categorical_dtype(res_df["cats"]) # row res_row = df.loc["j", :] tm.assert_series_equal(res_row, exp_row) assert isinstance(res_row["cats"], str) # col res_col = df.loc[:, "cats"] tm.assert_series_equal(res_col, exp_col) assert is_categorical_dtype(res_col) # single value res_val = df.loc["j", "cats"] assert res_val == exp_val # ix # frame # res_df = df.loc["j":"k",[0,1]] # doesn't work? res_df = df.loc["j":"k", :] tm.assert_frame_equal(res_df, exp_df) assert is_categorical_dtype(res_df["cats"]) # row res_row = df.loc["j", :] tm.assert_series_equal(res_row, exp_row) assert isinstance(res_row["cats"], str) # col res_col = df.loc[:, "cats"] tm.assert_series_equal(res_col, exp_col) assert is_categorical_dtype(res_col) # single value res_val = df.loc["j", df.columns[0]] assert res_val == exp_val # iat res_val = df.iat[2, 0] assert res_val == exp_val # at res_val = df.at["j", "cats"] assert res_val == exp_val # fancy indexing exp_fancy = df.iloc[[2]] res_fancy = df[df["cats"] == "b"] tm.assert_frame_equal(res_fancy, exp_fancy) res_fancy = df[df["values"] == 3] tm.assert_frame_equal(res_fancy, exp_fancy) # get_value res_val = df.at["j", "cats"] assert res_val == exp_val # i : int, slice, or sequence of integers res_row = df.iloc[2] tm.assert_series_equal(res_row, exp_row) assert isinstance(res_row["cats"], str) res_df = df.iloc[slice(2, 4)] tm.assert_frame_equal(res_df, exp_df) assert is_categorical_dtype(res_df["cats"]) res_df = df.iloc[[2, 3]] tm.assert_frame_equal(res_df, exp_df) assert is_categorical_dtype(res_df["cats"]) res_col = df.iloc[:, 0] tm.assert_series_equal(res_col, exp_col) assert is_categorical_dtype(res_col) res_df = df.iloc[:, slice(0, 2)] tm.assert_frame_equal(res_df, df) assert is_categorical_dtype(res_df["cats"]) res_df = df.iloc[:, [0, 1]] tm.assert_frame_equal(res_df, df) assert is_categorical_dtype(res_df["cats"])
def test_categories_match_up_to_permutation(self): # test dtype comparisons between cats c1 = Categorical(list("aabca"), categories=list("abc"), ordered=False) c2 = Categorical(list("aabca"), categories=list("cab"), ordered=False) c3 = Categorical(list("aabca"), categories=list("cab"), ordered=True) assert c1._categories_match_up_to_permutation(c1) assert c2._categories_match_up_to_permutation(c2) assert c3._categories_match_up_to_permutation(c3) assert c1._categories_match_up_to_permutation(c2) assert not c1._categories_match_up_to_permutation(c3) assert not c1._categories_match_up_to_permutation(Index(list("aabca"))) assert not c1._categories_match_up_to_permutation(c1.astype(object)) assert c1._categories_match_up_to_permutation(CategoricalIndex(c1)) assert c1._categories_match_up_to_permutation( CategoricalIndex(c1, categories=list("cab")) ) assert not c1._categories_match_up_to_permutation( CategoricalIndex(c1, ordered=True) ) # GH 16659 s1 = Series(c1) s2 = Series(c2) s3 = Series(c3) assert c1._categories_match_up_to_permutation(s1) assert c2._categories_match_up_to_permutation(s2) assert c3._categories_match_up_to_permutation(s3) assert c1._categories_match_up_to_permutation(s2) assert not c1._categories_match_up_to_permutation(s3) assert not c1._categories_match_up_to_permutation(s1.astype(object))
def test_is_dtype_equal_deprecated(self): # GH#37545 c1 = Categorical(list("aabca"), categories=list("abc"), ordered=False) with tm.assert_produces_warning(FutureWarning): c1.is_dtype_equal(c1)
def test_reindexing(self): # reindexing # convert to a regular index result = self.df2.reindex(["a", "b", "e"]) expected = DataFrame( {"A": [0, 1, 5, 2, 3, np.nan], "B": Series(list("aaabbe"))} ).set_index("B") assert_frame_equal(result, expected, check_index_type=True) result = self.df2.reindex(["a", "b"]) expected = DataFrame( {"A": [0, 1, 5, 2, 3], "B": Series(list("aaabb"))} ).set_index("B") assert_frame_equal(result, expected, check_index_type=True) result = self.df2.reindex(["e"]) expected = DataFrame({"A": [np.nan], "B": Series(["e"])}).set_index("B") assert_frame_equal(result, expected, check_index_type=True) result = self.df2.reindex(["d"]) expected = DataFrame({"A": [np.nan], "B": Series(["d"])}).set_index("B") assert_frame_equal(result, expected, check_index_type=True) # since we are actually reindexing with a Categorical # then return a Categorical cats = list("cabe") result = self.df2.reindex(Categorical(["a", "d"], categories=cats)) expected = DataFrame( {"A": [0, 1, 5, np.nan], "B": Series(list("aaad")).astype(CDT(cats))} ).set_index("B") assert_frame_equal(result, expected, check_index_type=True) result = self.df2.reindex(Categorical(["a"], categories=cats)) expected = DataFrame( {"A": [0, 1, 5], "B": Series(list("aaa")).astype(CDT(cats))} ).set_index("B") assert_frame_equal(result, expected, check_index_type=True) result = self.df2.reindex(["a", "b", "e"]) expected = DataFrame( {"A": [0, 1, 5, 2, 3, np.nan], "B": Series(list("aaabbe"))} ).set_index("B") assert_frame_equal(result, expected, check_index_type=True) result = self.df2.reindex(["a", "b"]) expected = DataFrame( {"A": [0, 1, 5, 2, 3], "B": Series(list("aaabb"))} ).set_index("B") assert_frame_equal(result, expected, check_index_type=True) result = self.df2.reindex(["e"]) expected = DataFrame({"A": [np.nan], "B": Series(["e"])}).set_index("B") assert_frame_equal(result, expected, check_index_type=True) # give back the type of categorical that we received result = self.df2.reindex( Categorical(["a", "d"], categories=cats, ordered=True) ) expected = DataFrame( { "A": [0, 1, 5, np.nan], "B": Series(list("aaad")).astype(CDT(cats, ordered=True)), } ).set_index("B") assert_frame_equal(result, expected, check_index_type=True) result = self.df2.reindex(Categorical(["a", "d"], categories=["a", "d"])) expected = DataFrame( {"A": [0, 1, 5, np.nan], "B": Series(list("aaad")).astype(CDT(["a", "d"]))} ).set_index("B") assert_frame_equal(result, expected, check_index_type=True) # passed duplicate indexers are not allowed msg = "cannot reindex with a non-unique indexer" with pytest.raises(ValueError, match=msg): self.df2.reindex(["a", "a"]) # args NotImplemented ATM msg = r"argument {} is not implemented for CategoricalIndex\.reindex" with pytest.raises(NotImplementedError, match=msg.format("method")): self.df2.reindex(["a"], method="ffill") with pytest.raises(NotImplementedError, match=msg.format("level")): self.df2.reindex(["a"], level=1) with pytest.raises(NotImplementedError, match=msg.format("limit")): self.df2.reindex(["a"], limit=2)
def test_iter_python_types(self): # GH-19909 cat = Categorical([1, 2]) assert isinstance(list(cat)[0], int) assert isinstance(cat.tolist()[0], int)
def test_set_dtype_nans(self): c = Categorical(["a", "b", np.nan]) result = c._set_dtype(CategoricalDtype(["a", "c"])) tm.assert_numpy_array_equal(result.codes, np.array([0, -1, -1], dtype="int8"))
def test_set_dtype_same(self): c = Categorical(["a", "b", "c"]) result = c._set_dtype(CategoricalDtype(["a", "b", "c"])) tm.assert_categorical_equal(result, c)
def test_set_dtype_new_categories(self): c = Categorical(["a", "b", "c"]) result = c._set_dtype(CategoricalDtype(list("abcd"))) tm.assert_numpy_array_equal(result.codes, c.codes) tm.assert_index_equal(result.dtype.categories, Index(list("abcd")))
def test_set_dtype_many(self, values, categories, new_categories, ordered): c = Categorical(values, categories) expected = Categorical(values, new_categories, ordered) result = c._set_dtype(expected.dtype) tm.assert_categorical_equal(result, expected)
def test_set_dtype_no_overlap(self): c = Categorical(["a", "b", "c"], ["d", "e"]) result = c._set_dtype(CategoricalDtype(["a", "b"])) expected = Categorical([None, None, None], categories=["a", "b"]) tm.assert_categorical_equal(result, expected)
def test_iter_python_types_datetime(self): cat = Categorical([Timestamp("2017-01-01"), Timestamp("2017-01-02")]) assert isinstance(list(cat)[0], Timestamp) assert isinstance(cat.tolist()[0], Timestamp)
def test_construction_with_ordered(self, ordered): # GH 9347, 9190 cat = Categorical([0, 1, 2], ordered=ordered) assert cat.ordered == bool(ordered)