def test_constructor_invariant(self, values): # GH 14190 c = Categorical(values) c2 = Categorical(c) tm.assert_categorical_equal(c, c2)
def test_qcut_index(): result = qcut([0, 2], 2) intervals = [Interval(-0.001, 1), Interval(1, 2)] expected = Categorical(intervals, ordered=True) tm.assert_categorical_equal(result, expected)
def test_constructor(self): exp_arr = np.array(["a", "b", "c", "a", "b", "c"], dtype=np.object_) c1 = Categorical(exp_arr) tm.assert_numpy_array_equal(c1.__array__(), exp_arr) c2 = Categorical(exp_arr, categories=["a", "b", "c"]) tm.assert_numpy_array_equal(c2.__array__(), exp_arr) c2 = Categorical(exp_arr, categories=["c", "b", "a"]) tm.assert_numpy_array_equal(c2.__array__(), exp_arr) # categories must be unique msg = "Categorical categories must be unique" with pytest.raises(ValueError, match=msg): Categorical([1, 2], [1, 2, 2]) with pytest.raises(ValueError, match=msg): Categorical(["a", "b"], ["a", "b", "b"]) # The default should be unordered c1 = Categorical(["a", "b", "c", "a"]) assert not c1.ordered # Categorical as input c1 = Categorical(["a", "b", "c", "a"]) c2 = Categorical(c1) tm.assert_categorical_equal(c1, c2) c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) c2 = Categorical(c1) tm.assert_categorical_equal(c1, c2) c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"]) c2 = Categorical(c1) tm.assert_categorical_equal(c1, c2) c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"]) c2 = Categorical(c1, categories=["a", "b", "c"]) tm.assert_numpy_array_equal(c1.__array__(), c2.__array__()) tm.assert_index_equal(c2.categories, Index(["a", "b", "c"])) # Series of dtype category c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) c2 = Categorical(Series(c1)) tm.assert_categorical_equal(c1, c2) c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"]) c2 = Categorical(Series(c1)) tm.assert_categorical_equal(c1, c2) # Series c1 = Categorical(["a", "b", "c", "a"]) c2 = Categorical(Series(["a", "b", "c", "a"])) tm.assert_categorical_equal(c1, c2) c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) c2 = Categorical(Series(["a", "b", "c", "a"]), categories=["a", "b", "c", "d"]) tm.assert_categorical_equal(c1, c2) # This should result in integer categories, not float! cat = Categorical([1, 2, 3, np.nan], categories=[1, 2, 3]) assert is_integer_dtype(cat.categories) # https://github.com/pandas-dev/pandas/issues/3678 cat = Categorical([np.nan, 1, 2, 3]) assert is_integer_dtype(cat.categories) # this should result in floats cat = Categorical([np.nan, 1, 2.0, 3]) assert is_float_dtype(cat.categories) cat = Categorical([np.nan, 1.0, 2.0, 3.0]) assert is_float_dtype(cat.categories) # This doesn't work -> this would probably need some kind of "remember # the original type" feature to try to cast the array interface result # to... # vals = np.asarray(cat[cat.notna()]) # assert is_integer_dtype(vals) # corner cases cat = Categorical([1]) assert len(cat.categories) == 1 assert cat.categories[0] == 1 assert len(cat.codes) == 1 assert cat.codes[0] == 0 cat = Categorical(["a"]) assert len(cat.categories) == 1 assert cat.categories[0] == "a" assert len(cat.codes) == 1 assert cat.codes[0] == 0 with tm.assert_produces_warning(FutureWarning): # GH#38433 cat = Categorical(1) assert len(cat.categories) == 1 assert cat.categories[0] == 1 assert len(cat.codes) == 1 assert cat.codes[0] == 0 # two arrays # - when the first is an integer dtype and the second is not # - when the resulting codes are all -1/NaN with tm.assert_produces_warning(None): Categorical([0, 1, 2, 0, 1, 2], categories=["a", "b", "c"]) with tm.assert_produces_warning(None): Categorical([0, 1, 2, 0, 1, 2], categories=[3, 4, 5]) # the next one are from the old docs with tm.assert_produces_warning(None): Categorical([0, 1, 2, 0, 1, 2], [1, 2, 3]) cat = Categorical([1, 2], categories=[1, 2, 3]) # this is a legitimate constructor with tm.assert_produces_warning(None): Categorical(np.array([], dtype="int64"), categories=[3, 2, 1], ordered=True)
def test_qcut_specify_quantiles(): arr = np.random.randn(100) factor = qcut(arr, [0, 0.25, 0.5, 0.75, 1.0]) expected = qcut(arr, 4) tm.assert_categorical_equal(factor, expected)
def test_qcut_list_like_labels(labels, expected): # GH 13318 values = range(3) result = qcut(values, 3, labels=labels) tm.assert_categorical_equal(result, expected)
def test_union_categoricals_sort_false(self): # GH 13846 c1 = Categorical(["x", "y", "z"]) c2 = Categorical(["a", "b", "c"]) result = union_categoricals([c1, c2], sort_categories=False) expected = Categorical(["x", "y", "z", "a", "b", "c"], categories=["x", "y", "z", "a", "b", "c"]) tm.assert_categorical_equal(result, expected) # fastpath c1 = Categorical(["a", "b"], categories=["b", "a", "c"]) c2 = Categorical(["b", "c"], categories=["b", "a", "c"]) result = union_categoricals([c1, c2], sort_categories=False) expected = Categorical(["a", "b", "b", "c"], categories=["b", "a", "c"]) tm.assert_categorical_equal(result, expected) # fastpath - skip resort c1 = Categorical(["a", "b"], categories=["a", "b", "c"]) c2 = Categorical(["b", "c"], categories=["a", "b", "c"]) result = union_categoricals([c1, c2], sort_categories=False) expected = Categorical(["a", "b", "b", "c"], categories=["a", "b", "c"]) tm.assert_categorical_equal(result, expected) c1 = Categorical(["x", np.nan]) c2 = Categorical([np.nan, "b"]) result = union_categoricals([c1, c2], sort_categories=False) expected = Categorical(["x", np.nan, np.nan, "b"], categories=["x", "b"]) tm.assert_categorical_equal(result, expected) c1 = Categorical([np.nan]) c2 = Categorical([np.nan]) result = union_categoricals([c1, c2], sort_categories=False) expected = Categorical([np.nan, np.nan]) tm.assert_categorical_equal(result, expected) c1 = Categorical([]) c2 = Categorical([]) result = union_categoricals([c1, c2], sort_categories=False) expected = Categorical([]) tm.assert_categorical_equal(result, expected) c1 = Categorical(["b", "a"], categories=["b", "a", "c"], ordered=True) c2 = Categorical(["a", "c"], categories=["b", "a", "c"], ordered=True) result = union_categoricals([c1, c2], sort_categories=False) expected = Categorical(["b", "a", "a", "c"], categories=["b", "a", "c"], ordered=True) tm.assert_categorical_equal(result, expected)
def test_union_categoricals_ignore_order(self): # GH 15219 c1 = Categorical([1, 2, 3], ordered=True) c2 = Categorical([1, 2, 3], ordered=False) res = union_categoricals([c1, c2], ignore_order=True) exp = Categorical([1, 2, 3, 1, 2, 3]) tm.assert_categorical_equal(res, exp) msg = "Categorical.ordered must be the same" with pytest.raises(TypeError, match=msg): union_categoricals([c1, c2], ignore_order=False) res = union_categoricals([c1, c1], ignore_order=True) exp = Categorical([1, 2, 3, 1, 2, 3]) tm.assert_categorical_equal(res, exp) res = union_categoricals([c1, c1], ignore_order=False) exp = Categorical([1, 2, 3, 1, 2, 3], categories=[1, 2, 3], ordered=True) tm.assert_categorical_equal(res, exp) c1 = Categorical([1, 2, 3, np.nan], ordered=True) c2 = Categorical([3, 2], categories=[1, 2, 3], ordered=True) res = union_categoricals([c1, c2], ignore_order=True) exp = Categorical([1, 2, 3, np.nan, 3, 2]) tm.assert_categorical_equal(res, exp) c1 = Categorical([1, 2, 3], ordered=True) c2 = Categorical([1, 2, 3], categories=[3, 2, 1], ordered=True) res = union_categoricals([c1, c2], ignore_order=True) exp = Categorical([1, 2, 3, 1, 2, 3]) tm.assert_categorical_equal(res, exp) res = union_categoricals([c2, c1], ignore_order=True, sort_categories=True) exp = Categorical([1, 2, 3, 1, 2, 3], categories=[1, 2, 3]) tm.assert_categorical_equal(res, exp) c1 = Categorical([1, 2, 3], ordered=True) c2 = Categorical([4, 5, 6], ordered=True) result = union_categoricals([c1, c2], ignore_order=True) expected = Categorical([1, 2, 3, 4, 5, 6]) tm.assert_categorical_equal(result, expected) msg = "to union ordered Categoricals, all categories must be the same" with pytest.raises(TypeError, match=msg): union_categoricals([c1, c2], ignore_order=False) with pytest.raises(TypeError, match=msg): union_categoricals([c1, c2])
def test_take_fill_value(self): # GH 12631 # numeric category idx = CategoricalIndex([1, 2, 3], name="xxx") result = idx.take(np.array([1, 0, -1])) expected = CategoricalIndex([2, 1, 3], name="xxx") tm.assert_index_equal(result, expected) tm.assert_categorical_equal(result.values, expected.values) # fill_value result = idx.take(np.array([1, 0, -1]), fill_value=True) expected = CategoricalIndex([2, 1, np.nan], categories=[1, 2, 3], name="xxx") tm.assert_index_equal(result, expected) tm.assert_categorical_equal(result.values, expected.values) # allow_fill=False result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) expected = CategoricalIndex([2, 1, 3], name="xxx") tm.assert_index_equal(result, expected) tm.assert_categorical_equal(result.values, expected.values) # object category idx = CategoricalIndex(list("CBA"), categories=list("ABC"), ordered=True, name="xxx") result = idx.take(np.array([1, 0, -1])) expected = CategoricalIndex(list("BCA"), categories=list("ABC"), ordered=True, name="xxx") tm.assert_index_equal(result, expected) tm.assert_categorical_equal(result.values, expected.values) # fill_value result = idx.take(np.array([1, 0, -1]), fill_value=True) expected = CategoricalIndex(["B", "C", np.nan], categories=list("ABC"), ordered=True, name="xxx") tm.assert_index_equal(result, expected) tm.assert_categorical_equal(result.values, expected.values) # allow_fill=False result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) expected = CategoricalIndex(list("BCA"), categories=list("ABC"), ordered=True, name="xxx") tm.assert_index_equal(result, expected) tm.assert_categorical_equal(result.values, expected.values) msg = ("When allow_fill=True and fill_value is not None, " "all indices must be >= -1") with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -2]), fill_value=True) with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -5]), fill_value=True) msg = "index -5 is out of bounds for (axis 0 with )?size 3" with pytest.raises(IndexError, match=msg): idx.take(np.array([1, -5]))