Esempio n. 1
0
    def test_from_codes(self):

        # too few categories
        with pytest.raises(ValueError):
            Categorical.from_codes([1, 2], [1, 2])

        # no int codes
        with pytest.raises(ValueError):
            Categorical.from_codes(["a"], [1, 2])

        # no unique categories
        with pytest.raises(ValueError):
            Categorical.from_codes([0, 1, 2], ["a", "a", "b"])

        # NaN categories included
        with pytest.raises(ValueError):
            Categorical.from_codes([0, 1, 2], ["a", "b", np.nan])

        # too negative
        with pytest.raises(ValueError):
            Categorical.from_codes([-2, 1, 2], ["a", "b", "c"])

        exp = Categorical(["a", "b", "c"], ordered=False)
        res = Categorical.from_codes([0, 1, 2], ["a", "b", "c"])
        tm.assert_categorical_equal(exp, res)

        # Not available in earlier numpy versions
        if hasattr(np.random, "choice"):
            codes = np.random.choice([0, 1], 5, p=[0.9, 0.1])
            Categorical.from_codes(codes, categories=["train", "test"])
Esempio n. 2
0
 def test_constructor_interval(self):
     result = Categorical([Interval(1, 2), Interval(2, 3), Interval(3, 6)],
                          ordered=True)
     ii = IntervalIndex([Interval(1, 2), Interval(2, 3), Interval(3, 6)])
     exp = Categorical(ii, ordered=True)
     tm.assert_categorical_equal(result, exp)
     tm.assert_index_equal(result.categories, ii)
    def test_union_categoricals_nan(self):
        # GH 13759
        res = union_categoricals([pd.Categorical([1, 2, np.nan]),
                                  pd.Categorical([3, 2, np.nan])])
        exp = Categorical([1, 2, np.nan, 3, 2, np.nan])
        tm.assert_categorical_equal(res, exp)

        res = union_categoricals([pd.Categorical(['A', 'B']),
                                  pd.Categorical(['B', 'B', np.nan])])
        exp = Categorical(['A', 'B', 'B', 'B', np.nan])
        tm.assert_categorical_equal(res, exp)

        val1 = [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-03-01'),
                pd.NaT]
        val2 = [pd.NaT, pd.Timestamp('2011-01-01'),
                pd.Timestamp('2011-02-01')]

        res = union_categoricals([pd.Categorical(val1), pd.Categorical(val2)])
        exp = Categorical(val1 + val2,
                          categories=[pd.Timestamp('2011-01-01'),
                                      pd.Timestamp('2011-03-01'),
                                      pd.Timestamp('2011-02-01')])
        tm.assert_categorical_equal(res, exp)

        # all NaN
        res = union_categoricals([pd.Categorical([np.nan, np.nan]),
                                  pd.Categorical(['X'])])
        exp = Categorical([np.nan, np.nan, 'X'])
        tm.assert_categorical_equal(res, exp)

        res = union_categoricals([pd.Categorical([np.nan, np.nan]),
                                  pd.Categorical([np.nan, np.nan])])
        exp = Categorical([np.nan, np.nan, np.nan, np.nan])
        tm.assert_categorical_equal(res, exp)
Esempio n. 4
0
def test_cut_pass_labels(get_labels, get_expected):
    bins = [0, 25, 50, 100]
    arr = [50, 5, 10, 15, 20, 30, 70]
    labels = ["Small", "Medium", "Large"]

    result = cut(arr, bins, labels=get_labels(labels))
    tm.assert_categorical_equal(result, get_expected(labels))
    def test_union_categoricals_ordered(self):
        c1 = Categorical([1, 2, 3], ordered=True)
        c2 = Categorical([1, 2, 3], ordered=False)

        msg = 'Categorical.ordered must be the same'
        with tm.assertRaisesRegexp(TypeError, msg):
            union_categoricals([c1, c2])

        res = union_categoricals([c1, c1])
        exp = Categorical([1, 2, 3, 1, 2, 3], ordered=True)
        tm.assert_categorical_equal(res, exp)

        c1 = Categorical([1, 2, 3, np.nan], ordered=True)
        c2 = Categorical([3, 2], categories=[1, 2, 3], ordered=True)

        res = union_categoricals([c1, c2])
        exp = Categorical([1, 2, 3, np.nan, 3, 2], ordered=True)
        tm.assert_categorical_equal(res, exp)

        c1 = Categorical([1, 2, 3], ordered=True)
        c2 = Categorical([1, 2, 3], categories=[3, 2, 1], ordered=True)

        msg = "to union ordered Categoricals, all categories must be the same"
        with tm.assertRaisesRegexp(TypeError, msg):
            union_categoricals([c1, c2])
Esempio n. 6
0
 def test_from_inferred_categories_coerces(self):
     cats = ['1', '2', 'bad']
     codes = np.array([0, 0, 1, 2], dtype='i8')
     dtype = CategoricalDtype([1, 2])
     result = Categorical._from_inferred_categories(cats, codes, dtype)
     expected = Categorical([1, 1, 2, np.nan])
     tm.assert_categorical_equal(result, expected)
Esempio n. 7
0
def test_unique():
    # GH714 also, dtype=float
    s = Series([1.2345] * 100)
    s[::2] = np.nan
    result = s.unique()
    assert len(result) == 2

    s = Series([1.2345] * 100, dtype='f4')
    s[::2] = np.nan
    result = s.unique()
    assert len(result) == 2

    # NAs in object arrays #714
    s = Series(['foo'] * 100, dtype='O')
    s[::2] = np.nan
    result = s.unique()
    assert len(result) == 2

    # decision about None
    s = Series([1, 2, 3, None, None, None], dtype=object)
    result = s.unique()
    expected = np.array([1, 2, 3, None], dtype=object)
    tm.assert_numpy_array_equal(result, expected)

    # GH 18051
    s = Series(Categorical([]))
    tm.assert_categorical_equal(s.unique(), Categorical([]), check_dtype=False)
    s = Series(Categorical([np.nan]))
    tm.assert_categorical_equal(s.unique(), Categorical([np.nan]),
                                check_dtype=False)
Esempio n. 8
0
 def test_take_allow_fill(self):
     # https://github.com/pandas-dev/pandas/issues/23296
     cat = pd.Categorical(['a', 'a', 'b'])
     result = cat.take([0, -1, -1], allow_fill=True)
     expected = pd.Categorical(['a', np.nan, np.nan],
                               categories=['a', 'b'])
     tm.assert_categorical_equal(result, expected)
Esempio n. 9
0
 def test_setitem_same_but_unordered(self, other):
     # GH-24142
     target = pd.Categorical(['a', 'b'], categories=['a', 'b'])
     mask = np.array([True, False])
     target[mask] = other[mask]
     expected = pd.Categorical(['b', 'b'], categories=['a', 'b'])
     tm.assert_categorical_equal(target, expected)
Esempio n. 10
0
 def test_positional_take(self, ordered):
     cat = pd.Categorical(['a', 'a', 'b', 'b'], categories=['b', 'a'],
                          ordered=ordered)
     result = cat.take([0, 1, 2], allow_fill=False)
     expected = pd.Categorical(['a', 'a', 'b'], categories=cat.categories,
                               ordered=ordered)
     tm.assert_categorical_equal(result, expected)
Esempio n. 11
0
    def test_rename_categories(self):
        cat = Categorical(["a", "b", "c", "a"])

        # inplace=False: the old one must not be changed
        res = cat.rename_categories([1, 2, 3])
        tm.assert_numpy_array_equal(res.__array__(), np.array([1, 2, 3, 1],
                                                              dtype=np.int64))
        tm.assert_index_equal(res.categories, Index([1, 2, 3]))

        exp_cat = np.array(["a", "b", "c", "a"], dtype=np.object_)
        tm.assert_numpy_array_equal(cat.__array__(), exp_cat)

        exp_cat = Index(["a", "b", "c"])
        tm.assert_index_equal(cat.categories, exp_cat)

        # GH18862 (let rename_categories take callables)
        result = cat.rename_categories(lambda x: x.upper())
        expected = Categorical(["A", "B", "C", "A"])
        tm.assert_categorical_equal(result, expected)

        # and now inplace
        res = cat.rename_categories([1, 2, 3], inplace=True)
        assert res is None
        tm.assert_numpy_array_equal(cat.__array__(), np.array([1, 2, 3, 1],
                                                              dtype=np.int64))
        tm.assert_index_equal(cat.categories, Index([1, 2, 3]))

        # Lengthen
        with pytest.raises(ValueError):
            cat.rename_categories([1, 2, 3, 4])

        # Shorten
        with pytest.raises(ValueError):
            cat.rename_categories([1, 2])
Esempio n. 12
0
    def test_numpy_repeat(self):
        cat = Categorical(["a", "b"], categories=["a", "b"])
        exp = Categorical(["a", "a", "b", "b"], categories=["a", "b"])
        tm.assert_categorical_equal(np.repeat(cat, 2), exp)

        msg = "the 'axis' parameter is not supported"
        tm.assert_raises_regex(ValueError, msg, np.repeat, cat, 2, axis=1)
Esempio n. 13
0
    def test_remove_categories(self):
        cat = Categorical(["a", "b", "c", "a"], ordered=True)
        old = cat.copy()
        new = Categorical(["a", "b", np.nan, "a"], categories=["a", "b"],
                          ordered=True)

        # first inplace == False
        res = cat.remove_categories("c")
        tm.assert_categorical_equal(cat, old)
        tm.assert_categorical_equal(res, new)

        res = cat.remove_categories(["c"])
        tm.assert_categorical_equal(cat, old)
        tm.assert_categorical_equal(res, new)

        # inplace == True
        res = cat.remove_categories("c", inplace=True)
        tm.assert_categorical_equal(cat, new)
        assert res is None

        # removal is not in categories
        def f():
            cat.remove_categories(["c"])

        pytest.raises(ValueError, f)
Esempio n. 14
0
    def test_basic(self):

        # run multiple times here
        for n in range(10):
            for s, i in self.d.items():
                i_rec = self.encode_decode(i)
                assert_categorical_equal(i, i_rec)
Esempio n. 15
0
def check_arbitrary(a, b):

    if isinstance(a, (list, tuple)) and isinstance(b, (list, tuple)):
        assert(len(a) == len(b))
        for a_, b_ in zip(a, b):
            check_arbitrary(a_, b_)
    elif isinstance(a, DataFrame):
        assert_frame_equal(a, b)
    elif isinstance(a, Series):
        assert_series_equal(a, b)
    elif isinstance(a, Index):
        assert_index_equal(a, b)
    elif isinstance(a, Categorical):
        # Temp,
        # Categorical.categories is changed from str to bytes in PY3
        # maybe the same as GH 13591
        if b.categories.inferred_type == 'string':
            pass
        else:
            tm.assert_categorical_equal(a, b)
    elif a is NaT:
        assert b is NaT
    elif isinstance(a, Timestamp):
        assert a == b
        assert a.freq == b.freq
    else:
        assert(a == b)
Esempio n. 16
0
 def test_arraylike(self):
     data = [.2, 1.4, 2.5, 6.2, 9.7, 2.1]
     result, bins = cut(data, 3, retbins=True)
     intervals = IntervalIndex.from_breaks(bins.round(3))
     expected = intervals.take([0, 0, 0, 1, 2, 0]).astype('category')
     tm.assert_categorical_equal(result, expected)
     tm.assert_almost_equal(bins, np.array([0.1905, 3.36666667,
                                            6.53333333, 9.7]))
Esempio n. 17
0
def test_factorized_sort():
    cat = pd.Categorical(['b', 'b', None, 'a'])
    labels, uniques = pd.factorize(cat, sort=True)
    expected_labels = np.array([1, 1, -1, 0], dtype=np.intp)
    expected_uniques = pd.Categorical(['a', 'b'])

    tm.assert_numpy_array_equal(labels, expected_labels)
    tm.assert_categorical_equal(uniques, expected_uniques)
Esempio n. 18
0
 def test_noright(self):
     data = np.array([.2, 1.4, 2.5, 6.2, 9.7, 2.1, 2.575])
     result, bins = cut(data, 4, right=False, retbins=True)
     intervals = IntervalIndex.from_breaks(bins.round(3), closed='left')
     expected = intervals.take([0, 0, 0, 2, 3, 0, 1]).astype('category')
     tm.assert_categorical_equal(result, expected)
     tm.assert_almost_equal(bins, np.array([0.2, 2.575, 4.95,
                                            7.325, 9.7095]))
Esempio n. 19
0
 def test_constructor_with_dtype(self, ordered):
     categories = ['b', 'a', 'c']
     dtype = CategoricalDtype(categories, ordered=ordered)
     result = Categorical(['a', 'b', 'a', 'c'], dtype=dtype)
     expected = Categorical(['a', 'b', 'a', 'c'], categories=categories,
                            ordered=ordered)
     tm.assert_categorical_equal(result, expected)
     assert result.ordered is ordered
Esempio n. 20
0
    def test_numpy_repeat(self):
        cat = Categorical(["a", "b"], categories=["a", "b"])
        exp = Categorical(["a", "a", "b", "b"], categories=["a", "b"])
        tm.assert_categorical_equal(np.repeat(cat, 2), exp)

        msg = "the 'axis' parameter is not supported"
        with pytest.raises(ValueError, match=msg):
            np.repeat(cat, 2, axis=1)
 def test_union_categorical_same_categories_different_order(self):
     # https://github.com/pandas-dev/pandas/issues/19096
     c1 = Categorical(['a', 'b', 'c'], categories=['a', 'b', 'c'])
     c2 = Categorical(['a', 'b', 'c'], categories=['b', 'a', 'c'])
     result = union_categoricals([c1, c2])
     expected = Categorical(['a', 'b', 'c', 'a', 'b', 'c'],
                            categories=['a', 'b', 'c'])
     tm.assert_categorical_equal(result, expected)
Esempio n. 22
0
def test_ensure_categorical():
    values = np.arange(10, dtype=np.int32)
    result = _ensure_categorical(values)
    assert (result.dtype == 'category')

    values = Categorical(values)
    result = _ensure_categorical(values)
    tm.assert_categorical_equal(result, values)
Esempio n. 23
0
    def test_constructor_with_index(self):
        ci = CategoricalIndex(list('aabbca'), categories=list('cab'))
        tm.assert_categorical_equal(ci.values, Categorical(ci))

        ci = CategoricalIndex(list('aabbca'), categories=list('cab'))
        tm.assert_categorical_equal(ci.values,
                                    Categorical(ci.astype(object),
                                                categories=ci.categories))
Esempio n. 24
0
    def test_groupby_describe_categorical_columns(self):
        # GH 11558
        cats = pd.CategoricalIndex(["qux", "foo", "baz", "bar"], categories=["foo", "bar", "baz", "qux"], ordered=True)
        df = DataFrame(np.random.randn(20, 4), columns=cats)
        result = df.groupby([1, 2, 3, 4] * 5).describe()

        tm.assert_index_equal(result.columns, cats)
        tm.assert_categorical_equal(result.columns.values, cats.values)
Esempio n. 25
0
 def test_constructor_from_categorical_with_unknown_dtype(self):
     dtype = CategoricalDtype(None, ordered=True)
     values = Categorical(['a', 'b', 'd'])
     result = Categorical(values, dtype=dtype)
     # We use values.categories, not dtype.categories
     expected = Categorical(['a', 'b', 'd'], categories=['a', 'b', 'd'],
                            ordered=True)
     tm.assert_categorical_equal(result, expected)
Esempio n. 26
0
 def test_from_inferred_categories_dtype(self):
     cats = ['a', 'b', 'd']
     codes = np.array([0, 1, 0, 2], dtype='i8')
     dtype = CategoricalDtype(['c', 'b', 'a'], ordered=True)
     result = Categorical._from_inferred_categories(cats, codes, dtype)
     expected = Categorical(['a', 'b', 'a', 'd'],
                            categories=['c', 'b', 'a'],
                            ordered=True)
     tm.assert_categorical_equal(result, expected)
Esempio n. 27
0
    def test_reshape_categorical_numpy(self):
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            cat = Categorical(["a", "b"], categories=["a", "b"])
            tm.assert_categorical_equal(np.reshape(cat, cat.shape), cat)

        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            msg = "the 'order' parameter is not supported"
            tm.assert_raises_regex(ValueError, msg, np.reshape,
                                   cat, cat.shape, order='F')
Esempio n. 28
0
 def test_create_categorical(self):
     # https://github.com/pandas-dev/pandas/pull/17513
     # The public CI constructor doesn't hit this code path with
     # instances of CategoricalIndex, but we still want to test the code
     ci = CategoricalIndex(['a', 'b', 'c'])
     # First ci is self, second ci is data.
     result = CategoricalIndex._create_categorical(ci, ci)
     expected = Categorical(['a', 'b', 'c'])
     tm.assert_categorical_equal(result, expected)
Esempio n. 29
0
 def test_map_with_nan(self, data, f):  # GH 24241
     values = pd.Categorical(data)
     result = values.map(f)
     if data[1] == 1:
         expected = pd.Categorical([False, False, np.nan])
         tm.assert_categorical_equal(result, expected)
     else:
         expected = pd.Index([False, False, np.nan])
         tm.assert_index_equal(result, expected)
Esempio n. 30
0
 def test_right(self):
     data = np.array([.2, 1.4, 2.5, 6.2, 9.7, 2.1, 2.575])
     result, bins = cut(data, 4, right=True, retbins=True)
     intervals = IntervalIndex.from_breaks(bins.round(3))
     expected = Categorical(intervals, ordered=True)
     expected = expected.take([0, 0, 0, 2, 3, 0, 0])
     tm.assert_categorical_equal(result, expected)
     tm.assert_almost_equal(bins, np.array([0.1905, 2.575, 4.95,
                                            7.325, 9.7]))
Esempio n. 31
0
 def test_from_inferred_categories_sorts(self, dtype):
     cats = ['b', 'a']
     codes = np.array([0, 1, 1, 1], dtype='i8')
     result = Categorical._from_inferred_categories(cats, codes, dtype)
     expected = Categorical.from_codes([1, 0, 0, 0], ['a', 'b'])
     tm.assert_categorical_equal(result, expected)
Esempio n. 32
0
    def test_constructor(self):

        exp_arr = np.array(["a", "b", "c", "a", "b", "c"], dtype=np.object_)
        c1 = Categorical(exp_arr)
        tm.assert_numpy_array_equal(c1.__array__(), exp_arr)
        c2 = Categorical(exp_arr, categories=["a", "b", "c"])
        tm.assert_numpy_array_equal(c2.__array__(), exp_arr)
        c2 = Categorical(exp_arr, categories=["c", "b", "a"])
        tm.assert_numpy_array_equal(c2.__array__(), exp_arr)

        # categories must be unique
        def f():
            Categorical([1, 2], [1, 2, 2])

        pytest.raises(ValueError, f)

        def f():
            Categorical(["a", "b"], ["a", "b", "b"])

        pytest.raises(ValueError, f)

        # The default should be unordered
        c1 = Categorical(["a", "b", "c", "a"])
        assert not c1.ordered

        # Categorical as input
        c1 = Categorical(["a", "b", "c", "a"])
        c2 = Categorical(c1)
        tm.assert_categorical_equal(c1, c2)

        c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
        c2 = Categorical(c1)
        tm.assert_categorical_equal(c1, c2)

        c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"])
        c2 = Categorical(c1)
        tm.assert_categorical_equal(c1, c2)

        c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"])
        c2 = Categorical(c1, categories=["a", "b", "c"])
        tm.assert_numpy_array_equal(c1.__array__(), c2.__array__())
        tm.assert_index_equal(c2.categories, Index(["a", "b", "c"]))

        # Series of dtype category
        c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
        c2 = Categorical(Series(c1))
        tm.assert_categorical_equal(c1, c2)

        c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"])
        c2 = Categorical(Series(c1))
        tm.assert_categorical_equal(c1, c2)

        # Series
        c1 = Categorical(["a", "b", "c", "a"])
        c2 = Categorical(Series(["a", "b", "c", "a"]))
        tm.assert_categorical_equal(c1, c2)

        c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
        c2 = Categorical(Series(["a", "b", "c", "a"]),
                         categories=["a", "b", "c", "d"])
        tm.assert_categorical_equal(c1, c2)

        # This should result in integer categories, not float!
        cat = Categorical([1, 2, 3, np.nan], categories=[1, 2, 3])
        assert is_integer_dtype(cat.categories)

        # https://github.com/pandas-dev/pandas/issues/3678
        cat = Categorical([np.nan, 1, 2, 3])
        assert is_integer_dtype(cat.categories)

        # this should result in floats
        cat = Categorical([np.nan, 1, 2., 3])
        assert is_float_dtype(cat.categories)

        cat = Categorical([np.nan, 1., 2., 3.])
        assert is_float_dtype(cat.categories)

        # This doesn't work -> this would probably need some kind of "remember
        # the original type" feature to try to cast the array interface result
        # to...

        # vals = np.asarray(cat[cat.notna()])
        # assert is_integer_dtype(vals)

        # corner cases
        cat = Categorical([1])
        assert len(cat.categories) == 1
        assert cat.categories[0] == 1
        assert len(cat.codes) == 1
        assert cat.codes[0] == 0

        cat = Categorical(["a"])
        assert len(cat.categories) == 1
        assert cat.categories[0] == "a"
        assert len(cat.codes) == 1
        assert cat.codes[0] == 0

        # Scalars should be converted to lists
        cat = Categorical(1)
        assert len(cat.categories) == 1
        assert cat.categories[0] == 1
        assert len(cat.codes) == 1
        assert cat.codes[0] == 0

        # two arrays
        #  - when the first is an integer dtype and the second is not
        #  - when the resulting codes are all -1/NaN
        with tm.assert_produces_warning(None):
            c_old = Categorical([0, 1, 2, 0, 1, 2], categories=["a", "b",
                                                                "c"])  # noqa

        with tm.assert_produces_warning(None):
            c_old = Categorical(
                [0, 1, 2, 0, 1, 2],  # noqa
                categories=[3, 4, 5])

        # the next one are from the old docs
        with tm.assert_produces_warning(None):
            c_old2 = Categorical([0, 1, 2, 0, 1, 2], [1, 2, 3])  # noqa
            cat = Categorical([1, 2], categories=[1, 2, 3])

        # this is a legitimate constructor
        with tm.assert_produces_warning(None):
            c = Categorical(
                np.array([], dtype='int64'),  # noqa
                categories=[3, 2, 1],
                ordered=True)
Esempio n. 33
0
    def test_take_fill_value(self):
        # GH 12631

        # numeric category
        idx = pd.CategoricalIndex([1, 2, 3], name='xxx')
        result = idx.take(np.array([1, 0, -1]))
        expected = pd.CategoricalIndex([2, 1, 3], name='xxx')
        tm.assert_index_equal(result, expected)
        tm.assert_categorical_equal(result.values, expected.values)

        # fill_value
        result = idx.take(np.array([1, 0, -1]), fill_value=True)
        expected = pd.CategoricalIndex([2, 1, np.nan],
                                       categories=[1, 2, 3],
                                       name='xxx')
        tm.assert_index_equal(result, expected)
        tm.assert_categorical_equal(result.values, expected.values)

        # allow_fill=False
        result = idx.take(np.array([1, 0, -1]),
                          allow_fill=False,
                          fill_value=True)
        expected = pd.CategoricalIndex([2, 1, 3], name='xxx')
        tm.assert_index_equal(result, expected)
        tm.assert_categorical_equal(result.values, expected.values)

        # object category
        idx = pd.CategoricalIndex(list('CBA'),
                                  categories=list('ABC'),
                                  ordered=True,
                                  name='xxx')
        result = idx.take(np.array([1, 0, -1]))
        expected = pd.CategoricalIndex(list('BCA'),
                                       categories=list('ABC'),
                                       ordered=True,
                                       name='xxx')
        tm.assert_index_equal(result, expected)
        tm.assert_categorical_equal(result.values, expected.values)

        # fill_value
        result = idx.take(np.array([1, 0, -1]), fill_value=True)
        expected = pd.CategoricalIndex(['B', 'C', np.nan],
                                       categories=list('ABC'),
                                       ordered=True,
                                       name='xxx')
        tm.assert_index_equal(result, expected)
        tm.assert_categorical_equal(result.values, expected.values)

        # allow_fill=False
        result = idx.take(np.array([1, 0, -1]),
                          allow_fill=False,
                          fill_value=True)
        expected = pd.CategoricalIndex(list('BCA'),
                                       categories=list('ABC'),
                                       ordered=True,
                                       name='xxx')
        tm.assert_index_equal(result, expected)
        tm.assert_categorical_equal(result.values, expected.values)

        msg = ('When allow_fill=True and fill_value is not None, '
               'all indices must be >= -1')
        with tm.assertRaisesRegexp(ValueError, msg):
            idx.take(np.array([1, 0, -2]), fill_value=True)
        with tm.assertRaisesRegexp(ValueError, msg):
            idx.take(np.array([1, 0, -5]), fill_value=True)

        with tm.assertRaises(IndexError):
            idx.take(np.array([1, -5]))
Esempio n. 34
0
    def test_set_item_nan(self):
        cat = Categorical([1, 2, 3])
        cat[1] = np.nan

        exp = Categorical([1, np.nan, 3], categories=[1, 2, 3])
        tm.assert_categorical_equal(cat, exp)
Esempio n. 35
0
 def test_categories_none_comparisons(self):
     factor = Categorical(["a", "b", "b", "a", "a", "c", "c", "c"],
                          ordered=True)
     tm.assert_categorical_equal(factor, self.factor)
Esempio n. 36
0
    def test_union_categoricals_sort(self):
        # GH 13846
        c1 = Categorical(['x', 'y', 'z'])
        c2 = Categorical(['a', 'b', 'c'])
        result = union_categoricals([c1, c2], sort_categories=True)
        expected = Categorical(['x', 'y', 'z', 'a', 'b', 'c'],
                               categories=['a', 'b', 'c', 'x', 'y', 'z'])
        tm.assert_categorical_equal(result, expected)

        # fastpath
        c1 = Categorical(['a', 'b'], categories=['b', 'a', 'c'])
        c2 = Categorical(['b', 'c'], categories=['b', 'a', 'c'])
        result = union_categoricals([c1, c2], sort_categories=True)
        expected = Categorical(['a', 'b', 'b', 'c'],
                               categories=['a', 'b', 'c'])
        tm.assert_categorical_equal(result, expected)

        c1 = Categorical(['a', 'b'], categories=['c', 'a', 'b'])
        c2 = Categorical(['b', 'c'], categories=['c', 'a', 'b'])
        result = union_categoricals([c1, c2], sort_categories=True)
        expected = Categorical(['a', 'b', 'b', 'c'],
                               categories=['a', 'b', 'c'])
        tm.assert_categorical_equal(result, expected)

        # fastpath - skip resort
        c1 = Categorical(['a', 'b'], categories=['a', 'b', 'c'])
        c2 = Categorical(['b', 'c'], categories=['a', 'b', 'c'])
        result = union_categoricals([c1, c2], sort_categories=True)
        expected = Categorical(['a', 'b', 'b', 'c'],
                               categories=['a', 'b', 'c'])
        tm.assert_categorical_equal(result, expected)

        c1 = Categorical(['x', np.nan])
        c2 = Categorical([np.nan, 'b'])
        result = union_categoricals([c1, c2], sort_categories=True)
        expected = Categorical(['x', np.nan, np.nan, 'b'],
                               categories=['b', 'x'])
        tm.assert_categorical_equal(result, expected)

        c1 = Categorical([np.nan])
        c2 = Categorical([np.nan])
        result = union_categoricals([c1, c2], sort_categories=True)
        expected = Categorical([np.nan, np.nan])
        tm.assert_categorical_equal(result, expected)

        c1 = Categorical([])
        c2 = Categorical([])
        result = union_categoricals([c1, c2], sort_categories=True)
        expected = Categorical([])
        tm.assert_categorical_equal(result, expected)

        c1 = Categorical(['b', 'a'], categories=['b', 'a', 'c'], ordered=True)
        c2 = Categorical(['a', 'c'], categories=['b', 'a', 'c'], ordered=True)
        with pytest.raises(TypeError):
            union_categoricals([c1, c2], sort_categories=True)
Esempio n. 37
0
    def test_comparisons(self):

        result = self.factor[self.factor == 'a']
        expected = self.factor[np.asarray(self.factor) == 'a']
        tm.assert_categorical_equal(result, expected)

        result = self.factor[self.factor != 'a']
        expected = self.factor[np.asarray(self.factor) != 'a']
        tm.assert_categorical_equal(result, expected)

        result = self.factor[self.factor < 'c']
        expected = self.factor[np.asarray(self.factor) < 'c']
        tm.assert_categorical_equal(result, expected)

        result = self.factor[self.factor > 'a']
        expected = self.factor[np.asarray(self.factor) > 'a']
        tm.assert_categorical_equal(result, expected)

        result = self.factor[self.factor >= 'b']
        expected = self.factor[np.asarray(self.factor) >= 'b']
        tm.assert_categorical_equal(result, expected)

        result = self.factor[self.factor <= 'b']
        expected = self.factor[np.asarray(self.factor) <= 'b']
        tm.assert_categorical_equal(result, expected)

        n = len(self.factor)

        other = self.factor[np.random.permutation(n)]
        result = self.factor == other
        expected = np.asarray(self.factor) == np.asarray(other)
        tm.assert_numpy_array_equal(result, expected)

        result = self.factor == 'd'
        expected = np.repeat(False, len(self.factor))
        tm.assert_numpy_array_equal(result, expected)

        # comparisons with categoricals
        cat_rev = Categorical(["a", "b", "c"],
                              categories=["c", "b", "a"],
                              ordered=True)
        cat_rev_base = Categorical(["b", "b", "b"],
                                   categories=["c", "b", "a"],
                                   ordered=True)
        cat = Categorical(["a", "b", "c"], ordered=True)
        cat_base = Categorical(["b", "b", "b"],
                               categories=cat.categories,
                               ordered=True)

        # comparisons need to take categories ordering into account
        res_rev = cat_rev > cat_rev_base
        exp_rev = np.array([True, False, False])
        tm.assert_numpy_array_equal(res_rev, exp_rev)

        res_rev = cat_rev < cat_rev_base
        exp_rev = np.array([False, False, True])
        tm.assert_numpy_array_equal(res_rev, exp_rev)

        res = cat > cat_base
        exp = np.array([False, False, True])
        tm.assert_numpy_array_equal(res, exp)

        # Only categories with same categories can be compared
        with pytest.raises(TypeError):
            cat > cat_rev

        cat_rev_base2 = Categorical(["b", "b", "b"],
                                    categories=["c", "b", "a", "d"])

        with pytest.raises(TypeError):
            cat_rev > cat_rev_base2

        # Only categories with same ordering information can be compared
        cat_unorderd = cat.set_ordered(False)
        assert not (cat > cat).any()

        with pytest.raises(TypeError):
            cat > cat_unorderd

        # comparison (in both directions) with Series will raise
        s = Series(["b", "b", "b"])
        msg = ("Cannot compare a Categorical for op __gt__ with type"
               r" <class 'numpy\.ndarray'>")
        with pytest.raises(TypeError, match=msg):
            cat > s
        with pytest.raises(TypeError, match=msg):
            cat_rev > s
        with pytest.raises(TypeError, match=msg):
            s < cat
        with pytest.raises(TypeError, match=msg):
            s < cat_rev

        # comparison with numpy.array will raise in both direction, but only on
        # newer numpy versions
        a = np.array(["b", "b", "b"])
        with pytest.raises(TypeError, match=msg):
            cat > a
        with pytest.raises(TypeError, match=msg):
            cat_rev > a

        # Make sure that unequal comparison take the categories order in
        # account
        cat_rev = Categorical(list("abc"),
                              categories=list("cba"),
                              ordered=True)
        exp = np.array([True, False, False])
        res = cat_rev > "b"
        tm.assert_numpy_array_equal(res, exp)

        # check that zero-dim array gets unboxed
        res = cat_rev > np.array("b")
        tm.assert_numpy_array_equal(res, exp)
Esempio n. 38
0
 def test_set_categories_many(self, values, categories, new_categories,
                              ordered):
     c = Categorical(values, categories)
     expected = Categorical(values, new_categories, ordered)
     result = c.set_categories(new_categories, ordered=ordered)
     tm.assert_categorical_equal(result, expected)
Esempio n. 39
0
 def test_set_categories_rename_less(self):
     # GH 24675
     cat = Categorical(["A", "B"])
     result = cat.set_categories(["A"], rename=True)
     expected = Categorical(["A", np.nan])
     tm.assert_categorical_equal(result, expected)
Esempio n. 40
0
 def test_rename_categories_series(self):
     # https://github.com/pandas-dev/pandas/issues/17981
     c = Categorical(["a", "b"])
     result = c.rename_categories(Series([0, 1], index=["a", "b"]))
     expected = Categorical([0, 1])
     tm.assert_categorical_equal(result, expected)
Esempio n. 41
0
 def test_categories_none_comparisons(self):
     factor = Categorical(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'],
                          ordered=True)
     tm.assert_categorical_equal(factor, self.factor)
 def test_set_dtype_many(self, values, categories, new_categories, ordered):
     c = Categorical(values, categories)
     expected = Categorical(values, new_categories, ordered)
     result = c._set_dtype(expected.dtype)
     tm.assert_categorical_equal(result, expected)
 def test_repeat(self):
     # GH10183
     cat = Categorical(["a", "b"], categories=["a", "b"])
     exp = Categorical(["a", "a", "b", "b"], categories=["a", "b"])
     res = cat.repeat(2)
     tm.assert_categorical_equal(res, exp)
Esempio n. 44
0
    def test_union_categoricals_sort_false(self):
        # GH 13846
        c1 = Categorical(['x', 'y', 'z'])
        c2 = Categorical(['a', 'b', 'c'])
        result = union_categoricals([c1, c2], sort_categories=False)
        expected = Categorical(['x', 'y', 'z', 'a', 'b', 'c'],
                               categories=['x', 'y', 'z', 'a', 'b', 'c'])
        tm.assert_categorical_equal(result, expected)

        # fastpath
        c1 = Categorical(['a', 'b'], categories=['b', 'a', 'c'])
        c2 = Categorical(['b', 'c'], categories=['b', 'a', 'c'])
        result = union_categoricals([c1, c2], sort_categories=False)
        expected = Categorical(['a', 'b', 'b', 'c'],
                               categories=['b', 'a', 'c'])
        tm.assert_categorical_equal(result, expected)

        # fastpath - skip resort
        c1 = Categorical(['a', 'b'], categories=['a', 'b', 'c'])
        c2 = Categorical(['b', 'c'], categories=['a', 'b', 'c'])
        result = union_categoricals([c1, c2], sort_categories=False)
        expected = Categorical(['a', 'b', 'b', 'c'],
                               categories=['a', 'b', 'c'])
        tm.assert_categorical_equal(result, expected)

        c1 = Categorical(['x', np.nan])
        c2 = Categorical([np.nan, 'b'])
        result = union_categoricals([c1, c2], sort_categories=False)
        expected = Categorical(['x', np.nan, np.nan, 'b'],
                               categories=['x', 'b'])
        tm.assert_categorical_equal(result, expected)

        c1 = Categorical([np.nan])
        c2 = Categorical([np.nan])
        result = union_categoricals([c1, c2], sort_categories=False)
        expected = Categorical([np.nan, np.nan], categories=[])
        tm.assert_categorical_equal(result, expected)

        c1 = Categorical([])
        c2 = Categorical([])
        result = union_categoricals([c1, c2], sort_categories=False)
        expected = Categorical([])
        tm.assert_categorical_equal(result, expected)

        c1 = Categorical(['b', 'a'], categories=['b', 'a', 'c'], ordered=True)
        c2 = Categorical(['a', 'c'], categories=['b', 'a', 'c'], ordered=True)
        result = union_categoricals([c1, c2], sort_categories=False)
        expected = Categorical(['b', 'a', 'a', 'c'],
                               categories=['b', 'a', 'c'],
                               ordered=True)
        tm.assert_categorical_equal(result, expected)
Esempio n. 45
0
    def test_add_categories(self):
        cat = Categorical(["a", "b", "c", "a"], ordered=True)
        old = cat.copy()
        new = Categorical(["a", "b", "c", "a"],
                          categories=["a", "b", "c", "d"],
                          ordered=True)

        # first inplace == False
        res = cat.add_categories("d")
        tm.assert_categorical_equal(cat, old)
        tm.assert_categorical_equal(res, new)

        res = cat.add_categories(["d"])
        tm.assert_categorical_equal(cat, old)
        tm.assert_categorical_equal(res, new)

        # inplace == True
        res = cat.add_categories("d", inplace=True)
        tm.assert_categorical_equal(cat, new)
        assert res is None

        # new is in old categories
        with pytest.raises(ValueError):
            cat.add_categories(["d"])

        # GH 9927
        cat = Categorical(list("abc"), ordered=True)
        expected = Categorical(list("abc"),
                               categories=list("abcde"),
                               ordered=True)
        # test with Series, np.array, index, list
        res = cat.add_categories(Series(["d", "e"]))
        tm.assert_categorical_equal(res, expected)
        res = cat.add_categories(np.array(["d", "e"]))
        tm.assert_categorical_equal(res, expected)
        res = cat.add_categories(Index(["d", "e"]))
        tm.assert_categorical_equal(res, expected)
        res = cat.add_categories(["d", "e"])
        tm.assert_categorical_equal(res, expected)
Esempio n. 46
0
    def test_take_fill_value(self):
        # GH 12631

        # numeric category
        idx = pd.CategoricalIndex([1, 2, 3], name="xxx")
        result = idx.take(np.array([1, 0, -1]))
        expected = pd.CategoricalIndex([2, 1, 3], name="xxx")
        tm.assert_index_equal(result, expected)
        tm.assert_categorical_equal(result.values, expected.values)

        # fill_value
        result = idx.take(np.array([1, 0, -1]), fill_value=True)
        expected = pd.CategoricalIndex([2, 1, np.nan],
                                       categories=[1, 2, 3],
                                       name="xxx")
        tm.assert_index_equal(result, expected)
        tm.assert_categorical_equal(result.values, expected.values)

        # allow_fill=False
        result = idx.take(np.array([1, 0, -1]),
                          allow_fill=False,
                          fill_value=True)
        expected = pd.CategoricalIndex([2, 1, 3], name="xxx")
        tm.assert_index_equal(result, expected)
        tm.assert_categorical_equal(result.values, expected.values)

        # object category
        idx = pd.CategoricalIndex(list("CBA"),
                                  categories=list("ABC"),
                                  ordered=True,
                                  name="xxx")
        result = idx.take(np.array([1, 0, -1]))
        expected = pd.CategoricalIndex(list("BCA"),
                                       categories=list("ABC"),
                                       ordered=True,
                                       name="xxx")
        tm.assert_index_equal(result, expected)
        tm.assert_categorical_equal(result.values, expected.values)

        # fill_value
        result = idx.take(np.array([1, 0, -1]), fill_value=True)
        expected = pd.CategoricalIndex(["B", "C", np.nan],
                                       categories=list("ABC"),
                                       ordered=True,
                                       name="xxx")
        tm.assert_index_equal(result, expected)
        tm.assert_categorical_equal(result.values, expected.values)

        # allow_fill=False
        result = idx.take(np.array([1, 0, -1]),
                          allow_fill=False,
                          fill_value=True)
        expected = pd.CategoricalIndex(list("BCA"),
                                       categories=list("ABC"),
                                       ordered=True,
                                       name="xxx")
        tm.assert_index_equal(result, expected)
        tm.assert_categorical_equal(result.values, expected.values)

        msg = ("When allow_fill=True and fill_value is not None, "
               "all indices must be >= -1")
        with pytest.raises(ValueError, match=msg):
            idx.take(np.array([1, 0, -2]), fill_value=True)
        with pytest.raises(ValueError, match=msg):
            idx.take(np.array([1, 0, -5]), fill_value=True)

        with pytest.raises(IndexError):
            idx.take(np.array([1, -5]))
Esempio n. 47
0
 def test_mode(self, values, categories, exp_mode):
     s = Categorical(values, categories=categories, ordered=True)
     res = s.mode()
     exp = Categorical(exp_mode, categories=categories, ordered=True)
     tm.assert_categorical_equal(res, exp)
Esempio n. 48
0
    def test_unique_index_series(self):
        c = Categorical([3, 1, 2, 2, 1], categories=[3, 2, 1])
        # Categorical.unique sorts categories by appearance order
        # if ordered=False
        exp = Categorical([3, 1, 2], categories=[3, 1, 2])
        tm.assert_categorical_equal(c.unique(), exp)

        tm.assert_index_equal(Index(c).unique(), Index(exp))
        tm.assert_categorical_equal(Series(c).unique(), exp)

        c = Categorical([1, 1, 2, 2], categories=[3, 2, 1])
        exp = Categorical([1, 2], categories=[1, 2])
        tm.assert_categorical_equal(c.unique(), exp)
        tm.assert_index_equal(Index(c).unique(), Index(exp))
        tm.assert_categorical_equal(Series(c).unique(), exp)

        c = Categorical([3, 1, 2, 2, 1], categories=[3, 2, 1], ordered=True)
        # Categorical.unique keeps categories order if ordered=True
        exp = Categorical([3, 1, 2], categories=[3, 2, 1], ordered=True)
        tm.assert_categorical_equal(c.unique(), exp)

        tm.assert_index_equal(Index(c).unique(), Index(exp))
        tm.assert_categorical_equal(Series(c).unique(), exp)
Esempio n. 49
0
    def test_union_categoricals_ignore_order(self):
        # GH 15219
        c1 = Categorical([1, 2, 3], ordered=True)
        c2 = Categorical([1, 2, 3], ordered=False)

        res = union_categoricals([c1, c2], ignore_order=True)
        exp = Categorical([1, 2, 3, 1, 2, 3])
        tm.assert_categorical_equal(res, exp)

        msg = 'Categorical.ordered must be the same'
        with tm.assert_raises_regex(TypeError, msg):
            union_categoricals([c1, c2], ignore_order=False)

        res = union_categoricals([c1, c1], ignore_order=True)
        exp = Categorical([1, 2, 3, 1, 2, 3])
        tm.assert_categorical_equal(res, exp)

        res = union_categoricals([c1, c1], ignore_order=False)
        exp = Categorical([1, 2, 3, 1, 2, 3],
                          categories=[1, 2, 3],
                          ordered=True)
        tm.assert_categorical_equal(res, exp)

        c1 = Categorical([1, 2, 3, np.nan], ordered=True)
        c2 = Categorical([3, 2], categories=[1, 2, 3], ordered=True)

        res = union_categoricals([c1, c2], ignore_order=True)
        exp = Categorical([1, 2, 3, np.nan, 3, 2])
        tm.assert_categorical_equal(res, exp)

        c1 = Categorical([1, 2, 3], ordered=True)
        c2 = Categorical([1, 2, 3], categories=[3, 2, 1], ordered=True)

        res = union_categoricals([c1, c2], ignore_order=True)
        exp = Categorical([1, 2, 3, 1, 2, 3])
        tm.assert_categorical_equal(res, exp)

        res = union_categoricals([c2, c1],
                                 ignore_order=True,
                                 sort_categories=True)
        exp = Categorical([1, 2, 3, 1, 2, 3], categories=[1, 2, 3])
        tm.assert_categorical_equal(res, exp)

        c1 = Categorical([1, 2, 3], ordered=True)
        c2 = Categorical([4, 5, 6], ordered=True)
        result = union_categoricals([c1, c2], ignore_order=True)
        expected = Categorical([1, 2, 3, 4, 5, 6])
        tm.assert_categorical_equal(result, expected)

        msg = "to union ordered Categoricals, all categories must be the same"
        with tm.assert_raises_regex(TypeError, msg):
            union_categoricals([c1, c2], ignore_order=False)

        with tm.assert_raises_regex(TypeError, msg):
            union_categoricals([c1, c2])
Esempio n. 50
0
def categories_equals(left, right):
    assert (left.ordered and right.ordered) or (not left.ordered and not right.ordered)
    is_category_ordered = left.ordered
    assert_categorical_equal(left, right, check_category_order=is_category_ordered)
def test_categorical_equal(c):
    assert_categorical_equal(c, c)
Esempio n. 52
0
 def test_qcut_index(self):
     result = qcut([0, 2], 2)
     expected = Index([Interval(-0.001, 1),
                       Interval(1, 2)]).astype('category')
     tm.assert_categorical_equal(result, expected)
Esempio n. 53
0
 def test_from_inferred_categories_sorts(self, dtype):
     cats = ["b", "a"]
     codes = np.array([0, 1, 1, 1], dtype="i8")
     result = Categorical._from_inferred_categories(cats, codes, dtype)
     expected = Categorical.from_codes([1, 0, 0, 0], ["a", "b"])
     tm.assert_categorical_equal(result, expected)
Esempio n. 54
0
    def test_qcut_specify_quantiles(self):
        arr = np.random.randn(100)

        factor = qcut(arr, [0, .25, .5, .75, 1.])
        expected = qcut(arr, 4)
        tm.assert_categorical_equal(factor, expected)
Esempio n. 55
0
 def test_take_fill_value(self):
     # https://github.com/pandas-dev/pandas/issues/23296
     cat = pd.Categorical(['a', 'b', 'c'])
     result = cat.take([0, 1, -1], fill_value='a', allow_fill=True)
     expected = pd.Categorical(['a', 'b', 'a'], categories=['a', 'b', 'c'])
     tm.assert_categorical_equal(result, expected)
def test_astype_categorical():
    arr = period_array(['2000', '2001', '2001', None], freq='D')
    result = arr.astype('category')
    categories = pd.PeriodIndex(['2000', '2001'], freq='D')
    expected = pd.Categorical.from_codes([0, 1, 1, -1], categories=categories)
    tm.assert_categorical_equal(result, expected)
 def test_set_dtype_no_overlap(self):
     c = Categorical(['a', 'b', 'c'], ['d', 'e'])
     result = c._set_dtype(CategoricalDtype(['a', 'b']))
     expected = Categorical([None, None, None], categories=['a', 'b'])
     tm.assert_categorical_equal(result, expected)
 def test_qcut_index(self):
     result = qcut([0, 2], 2)
     intervals = [Interval(-0.001, 1), Interval(1, 2)]
     expected = Categorical(intervals, ordered=True)
     tm.assert_categorical_equal(result, expected)
 def test_set_dtype_same(self):
     c = Categorical(['a', 'b', 'c'])
     result = c._set_dtype(CategoricalDtype(['a', 'b', 'c']))
     tm.assert_categorical_equal(result, c)
Esempio n. 60
0
 def test_take_fill_with_negative_one(self):
     # -1 was a category
     cat = pd.Categorical([-1, 0, 1])
     result = cat.take([0, -1, 1], allow_fill=True, fill_value=-1)
     expected = pd.Categorical([-1, -1, 0], categories=[-1, 0, 1])
     tm.assert_categorical_equal(result, expected)