Exemple #1
0
    def test_remove_categories(self):
        cat = Categorical(["a", "b", "c", "a"], ordered=True)
        old = cat.copy()
        new = Categorical(["a", "b", np.nan, "a"], categories=["a", "b"],
                          ordered=True)

        # first inplace == False
        res = cat.remove_categories("c")
        tm.assert_categorical_equal(cat, old)
        tm.assert_categorical_equal(res, new)

        res = cat.remove_categories(["c"])
        tm.assert_categorical_equal(cat, old)
        tm.assert_categorical_equal(res, new)

        # inplace == True
        res = cat.remove_categories("c", inplace=True)
        tm.assert_categorical_equal(cat, new)
        assert res is None

        # removal is not in categories
        def f():
            cat.remove_categories(["c"])

        pytest.raises(ValueError, f)
Exemple #2
0
    def test_set_ordered(self):

        cat = Categorical(["a", "b", "c", "a"], ordered=True)
        cat2 = cat.as_unordered()
        assert not cat2.ordered
        cat2 = cat.as_ordered()
        assert cat2.ordered
        cat2.as_unordered(inplace=True)
        assert not cat2.ordered
        cat2.as_ordered(inplace=True)
        assert cat2.ordered

        assert cat2.set_ordered(True).ordered
        assert not cat2.set_ordered(False).ordered
        cat2.set_ordered(True, inplace=True)
        assert cat2.ordered
        cat2.set_ordered(False, inplace=True)
        assert not cat2.ordered

        # removed in 0.19.0
        msg = "can\'t set attribute"
        with tm.assert_raises_regex(AttributeError, msg):
            cat.ordered = True
        with tm.assert_raises_regex(AttributeError, msg):
            cat.ordered = False
Exemple #3
0
    def test_rename_categories(self):
        cat = Categorical(["a", "b", "c", "a"])

        # inplace=False: the old one must not be changed
        res = cat.rename_categories([1, 2, 3])
        tm.assert_numpy_array_equal(res.__array__(), np.array([1, 2, 3, 1],
                                                              dtype=np.int64))
        tm.assert_index_equal(res.categories, Index([1, 2, 3]))

        exp_cat = np.array(["a", "b", "c", "a"], dtype=np.object_)
        tm.assert_numpy_array_equal(cat.__array__(), exp_cat)

        exp_cat = Index(["a", "b", "c"])
        tm.assert_index_equal(cat.categories, exp_cat)

        # GH18862 (let rename_categories take callables)
        result = cat.rename_categories(lambda x: x.upper())
        expected = Categorical(["A", "B", "C", "A"])
        tm.assert_categorical_equal(result, expected)

        # and now inplace
        res = cat.rename_categories([1, 2, 3], inplace=True)
        assert res is None
        tm.assert_numpy_array_equal(cat.__array__(), np.array([1, 2, 3, 1],
                                                              dtype=np.int64))
        tm.assert_index_equal(cat.categories, Index([1, 2, 3]))

        # Lengthen
        with pytest.raises(ValueError):
            cat.rename_categories([1, 2, 3, 4])

        # Shorten
        with pytest.raises(ValueError):
            cat.rename_categories([1, 2])
Exemple #4
0
    def test_codes_immutable(self):

        # Codes should be read only
        c = Categorical(["a", "b", "c", "a", np.nan])
        exp = np.array([0, 1, 2, 0, -1], dtype='int8')
        tm.assert_numpy_array_equal(c.codes, exp)

        # Assignments to codes should raise
        def f():
            c.codes = np.array([0, 1, 2, 0, 1], dtype='int8')

        pytest.raises(ValueError, f)

        # changes in the codes array should raise
        # np 1.6.1 raises RuntimeError rather than ValueError
        codes = c.codes

        def f():
            codes[4] = 1

        pytest.raises(ValueError, f)

        # But even after getting the codes, the original array should still be
        # writeable!
        c[4] = "a"
        exp = np.array([0, 1, 2, 0, 0], dtype='int8')
        tm.assert_numpy_array_equal(c.codes, exp)
        c._codes[4] = 2
        exp = np.array([0, 1, 2, 0, 2], dtype='int8')
        tm.assert_numpy_array_equal(c.codes, exp)
Exemple #5
0
    def test_groupby_categorical_index(self):

        s = np.random.RandomState(12345)
        levels = ['foo', 'bar', 'baz', 'qux']
        codes = s.randint(0, 4, size=20)
        cats = Categorical.from_codes(codes, levels, ordered=True)
        df = DataFrame(
            np.repeat(
                np.arange(20), 4).reshape(-1, 4), columns=list('abcd'))
        df['cats'] = cats

        # with a cat index
        result = df.set_index('cats').groupby(level=0).sum()
        expected = df[list('abcd')].groupby(cats.codes).sum()
        expected.index = CategoricalIndex(
            Categorical.from_codes(
                [0, 1, 2, 3], levels, ordered=True), name='cats')
        assert_frame_equal(result, expected)

        # with a cat column, should produce a cat index
        result = df.groupby('cats').sum()
        expected = df[list('abcd')].groupby(cats.codes).sum()
        expected.index = CategoricalIndex(
            Categorical.from_codes(
                [0, 1, 2, 3], levels, ordered=True), name='cats')
        assert_frame_equal(result, expected)
Exemple #6
0
    def test_groupby_categorical_no_compress(self):
        data = Series(np.random.randn(9))

        codes = np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])
        cats = Categorical.from_codes(codes, [0, 1, 2], ordered=True)

        result = data.groupby(cats).mean()
        exp = data.groupby(codes).mean()

        exp.index = CategoricalIndex(exp.index, categories=cats.categories,
                                     ordered=cats.ordered)
        assert_series_equal(result, exp)

        codes = np.array([0, 0, 0, 1, 1, 1, 3, 3, 3])
        cats = Categorical.from_codes(codes, [0, 1, 2, 3], ordered=True)

        result = data.groupby(cats).mean()
        exp = data.groupby(codes).mean().reindex(cats.categories)
        exp.index = CategoricalIndex(exp.index, categories=cats.categories,
                                     ordered=cats.ordered)
        assert_series_equal(result, exp)

        cats = Categorical(["a", "a", "a", "b", "b", "b", "c", "c", "c"],
                           categories=["a", "b", "c", "d"], ordered=True)
        data = DataFrame({"a": [1, 1, 1, 2, 2, 2, 3, 4, 5], "b": cats})

        result = data.groupby("b").mean()
        result = result["a"].values
        exp = np.array([1, 2, 4, np.nan])
        tm.assert_numpy_array_equal(result, exp)
def test_shift(fill_value):
    ct = Categorical(['a', 'b', 'c', 'd'],
                     categories=['a', 'b', 'c', 'd'], ordered=False)
    expected = Categorical([None, 'a', 'b', 'c'],
                           categories=['a', 'b', 'c', 'd'], ordered=False)
    res = ct.shift(1, fill_value=fill_value)
    assert_equal(res, expected)
Exemple #8
0
 def test_right(self):
     data = np.array([.2, 1.4, 2.5, 6.2, 9.7, 2.1, 2.575])
     result, bins = cut(data, 4, right=True, retbins=True)
     intervals = IntervalIndex.from_breaks(bins.round(3))
     expected = Categorical(intervals, ordered=True)
     expected = expected.take([0, 0, 0, 2, 3, 0, 0])
     tm.assert_categorical_equal(result, expected)
     tm.assert_almost_equal(bins, np.array([0.1905, 2.575, 4.95,
                                            7.325, 9.7]))
Exemple #9
0
    def test_from_codes_with_dtype_raises(self):
        msg = 'Cannot specify'
        with pytest.raises(ValueError, match=msg):
            Categorical.from_codes([0, 1], categories=['a', 'b'],
                                   dtype=CategoricalDtype(['a', 'b']))

        with pytest.raises(ValueError, match=msg):
            Categorical.from_codes([0, 1], ordered=True,
                                   dtype=CategoricalDtype(['a', 'b']))
Exemple #10
0
def _bins_to_cuts(x, bins, right=True, labels=None,
                  precision=3, include_lowest=False,
                  dtype=None, duplicates='raise'):

    if duplicates not in ['raise', 'drop']:
        raise ValueError("invalid value for 'duplicates' parameter, "
                         "valid options are: raise, drop")

    if isinstance(bins, IntervalIndex):
        # we have a fast-path here
        ids = bins.get_indexer(x)
        result = algos.take_nd(bins, ids)
        result = Categorical(result, categories=bins, ordered=True)
        return result, bins

    unique_bins = algos.unique(bins)
    if len(unique_bins) < len(bins) and len(bins) != 2:
        if duplicates == 'raise':
            raise ValueError("Bin edges must be unique: {bins!r}.\nYou "
                             "can drop duplicate edges by setting "
                             "the 'duplicates' kwarg".format(bins=bins))
        else:
            bins = unique_bins

    side = 'left' if right else 'right'
    ids = _ensure_int64(bins.searchsorted(x, side=side))

    if include_lowest:
        # Numpy 1.9 support: ensure this mask is a Numpy array
        ids[np.asarray(x == bins[0])] = 1

    na_mask = isna(x) | (ids == len(bins)) | (ids == 0)
    has_nas = na_mask.any()

    if labels is not False:
        if labels is None:
            labels = _format_labels(bins, precision, right=right,
                                    include_lowest=include_lowest,
                                    dtype=dtype)
        else:
            if len(labels) != len(bins) - 1:
                raise ValueError('Bin labels must be one fewer than '
                                 'the number of bin edges')
        if not is_categorical_dtype(labels):
            labels = Categorical(labels, categories=labels, ordered=True)

        np.putmask(ids, na_mask, 0)
        result = algos.take_nd(labels, ids - 1)

    else:
        result = ids - 1
        if has_nas:
            result = result.astype(np.float64)
            np.putmask(result, na_mask, np.nan)

    return result, bins
Exemple #11
0
 def test_from_codes_with_nan_code(self):
     # GH21767
     codes = [1, 2, np.nan]
     dtype = CategoricalDtype(categories=['a', 'b', 'c'])
     with pytest.raises(ValueError,
                        match="codes need to be array-like integers"):
         Categorical.from_codes(codes, categories=dtype.categories)
     with pytest.raises(ValueError,
                        match="codes need to be array-like integers"):
         Categorical.from_codes(codes, dtype=dtype)
Exemple #12
0
    def test_argsort(self):
        c = Categorical([5, 3, 1, 4, 2], ordered=True)

        expected = np.array([2, 4, 1, 3, 0])
        tm.assert_numpy_array_equal(c.argsort(ascending=True), expected,
                                    check_dtype=False)

        expected = expected[::-1]
        tm.assert_numpy_array_equal(c.argsort(ascending=False), expected,
                                    check_dtype=False)
Exemple #13
0
def test_mask_with_boolean(index):
    s = Series(range(3))
    idx = Categorical([True, False, True])
    if index:
        idx = CategoricalIndex(idx)

    assert com.is_bool_indexer(idx)
    result = s[idx]
    expected = s[idx.astype('object')]
    tm.assert_series_equal(result, expected)
Exemple #14
0
    def test_rename_categories_dict(self):
        # GH 17336
        cat = Categorical(['a', 'b', 'c', 'd'])
        res = cat.rename_categories({'a': 4, 'b': 3, 'c': 2, 'd': 1})
        expected = Index([4, 3, 2, 1])
        tm.assert_index_equal(res.categories, expected)

        # Test for inplace
        res = cat.rename_categories({'a': 4, 'b': 3, 'c': 2, 'd': 1},
                                    inplace=True)
        assert res is None
        tm.assert_index_equal(cat.categories, expected)

        # Test for dicts of smaller length
        cat = Categorical(['a', 'b', 'c', 'd'])
        res = cat.rename_categories({'a': 1, 'c': 3})

        expected = Index([1, 'b', 3, 'd'])
        tm.assert_index_equal(res.categories, expected)

        # Test for dicts with bigger length
        cat = Categorical(['a', 'b', 'c', 'd'])
        res = cat.rename_categories({'a': 1, 'b': 2, 'c': 3,
                                     'd': 4, 'e': 5, 'f': 6})
        expected = Index([1, 2, 3, 4])
        tm.assert_index_equal(res.categories, expected)

        # Test for dicts with no items from old categories
        cat = Categorical(['a', 'b', 'c', 'd'])
        res = cat.rename_categories({'f': 1, 'g': 3})

        expected = Index(['a', 'b', 'c', 'd'])
        tm.assert_index_equal(res.categories, expected)
Exemple #15
0
    def test_fillna_iterable_category(self, named):
        # https://github.com/pandas-dev/pandas/issues/21097
        if named:
            Point = collections.namedtuple("Point", "x y")
        else:
            Point = lambda *args: args  # tuple
        cat = Categorical([Point(0, 0), Point(0, 1), None])
        result = cat.fillna(Point(0, 0))
        expected = Categorical([Point(0, 0), Point(0, 1), Point(0, 0)])

        tm.assert_categorical_equal(result, expected)
Exemple #16
0
    def test_rename_categories_series(self):
        # https://github.com/pandas-dev/pandas/issues/17981
        c = Categorical(['a', 'b'])
        xpr = "Treating Series 'new_categories' as a list-like "
        with tm.assert_produces_warning(FutureWarning) as rec:
            result = c.rename_categories(Series([0, 1]))

        assert len(rec) == 1
        assert xpr in str(rec[0].message)
        expected = Categorical([0, 1])
        tm.assert_categorical_equal(result, expected)
Exemple #17
0
    def test_from_codes_with_float(self):
        # GH21767
        codes = [1.0, 2.0, 0]  # integer, but in float dtype
        categories = ['a', 'b', 'c']

        with tm.assert_produces_warning(FutureWarning):
            cat = Categorical.from_codes(codes, categories)
        tm.assert_numpy_array_equal(cat.codes, np.array([1, 2, 0], dtype='i1'))

        codes = [1.1, 2.0, 0]  # non-integer
        with pytest.raises(ValueError):
            Categorical.from_codes(codes, categories)
Exemple #18
0
    def test_unique_ordered(self):
        # keep categories order when ordered=True
        cat = Categorical(['b', 'a', 'b'], categories=['a', 'b'], ordered=True)
        res = cat.unique()
        exp_cat = Categorical(['b', 'a'], categories=['a', 'b'], ordered=True)
        tm.assert_categorical_equal(res, exp_cat)

        cat = Categorical(['c', 'b', 'a', 'a'], categories=['a', 'b', 'c'],
                          ordered=True)
        res = cat.unique()
        exp_cat = Categorical(['c', 'b', 'a'], categories=['a', 'b', 'c'],
                              ordered=True)
        tm.assert_categorical_equal(res, exp_cat)

        cat = Categorical(['b', 'a', 'a'], categories=['a', 'b', 'c'],
                          ordered=True)
        res = cat.unique()
        exp_cat = Categorical(['b', 'a'], categories=['a', 'b'], ordered=True)
        tm.assert_categorical_equal(res, exp_cat)

        cat = Categorical(['b', 'b', np.nan, 'a'], categories=['a', 'b', 'c'],
                          ordered=True)
        res = cat.unique()
        exp_cat = Categorical(['b', np.nan, 'a'], categories=['a', 'b'],
                              ordered=True)
        tm.assert_categorical_equal(res, exp_cat)
Exemple #19
0
    def test_unique(self):
        # categories are reordered based on value when ordered=False
        cat = Categorical(["a", "b"])
        exp = Index(["a", "b"])
        res = cat.unique()
        tm.assert_index_equal(res.categories, exp)
        tm.assert_categorical_equal(res, cat)

        cat = Categorical(["a", "b", "a", "a"], categories=["a", "b", "c"])
        res = cat.unique()
        tm.assert_index_equal(res.categories, exp)
        tm.assert_categorical_equal(res, Categorical(exp))

        cat = Categorical(["c", "a", "b", "a", "a"],
                          categories=["a", "b", "c"])
        exp = Index(["c", "a", "b"])
        res = cat.unique()
        tm.assert_index_equal(res.categories, exp)
        exp_cat = Categorical(exp, categories=['c', 'a', 'b'])
        tm.assert_categorical_equal(res, exp_cat)

        # nan must be removed
        cat = Categorical(["b", np.nan, "b", np.nan, "a"],
                          categories=["a", "b", "c"])
        res = cat.unique()
        exp = Index(["b", "a"])
        tm.assert_index_equal(res.categories, exp)
        exp_cat = Categorical(["b", np.nan, "a"], categories=["b", "a"])
        tm.assert_categorical_equal(res, exp_cat)
Exemple #20
0
    def test_map(self):
        c = Categorical(list('ABABC'), categories=list('CBA'), ordered=True)
        result = c.map(lambda x: x.lower())
        exp = Categorical(list('ababc'), categories=list('cba'), ordered=True)
        tm.assert_categorical_equal(result, exp)

        c = Categorical(list('ABABC'), categories=list('ABC'), ordered=False)
        result = c.map(lambda x: x.lower())
        exp = Categorical(list('ababc'), categories=list('abc'), ordered=False)
        tm.assert_categorical_equal(result, exp)

        result = c.map(lambda x: 1)
        # GH 12766: Return an index not an array
        tm.assert_index_equal(result, Index(np.array([1] * 5, dtype=np.int64)))
Exemple #21
0
    def test_categories_assigments(self):
        s = Categorical(["a", "b", "c", "a"])
        exp = np.array([1, 2, 3, 1], dtype=np.int64)
        s.categories = [1, 2, 3]
        tm.assert_numpy_array_equal(s.__array__(), exp)
        tm.assert_index_equal(s.categories, Index([1, 2, 3]))

        # lengthen
        with pytest.raises(ValueError):
            s.categories = [1, 2, 3, 4]

        # shorten
        with pytest.raises(ValueError):
            s.categories = [1, 2]
Exemple #22
0
    def test_cut_pass_labels(self):
        arr = [50, 5, 10, 15, 20, 30, 70]
        bins = [0, 25, 50, 100]
        labels = ['Small', 'Medium', 'Large']

        result = cut(arr, bins, labels=labels)
        exp = Categorical(['Medium'] + 4 * ['Small'] + ['Medium', 'Large'],
                          ordered=True)
        tm.assert_categorical_equal(result, exp)

        result = cut(arr, bins, labels=Categorical.from_codes([0, 1, 2],
                                                              labels))
        exp = Categorical.from_codes([1] + 4 * [0] + [1, 2], labels)
        tm.assert_categorical_equal(result, exp)
Exemple #23
0
    def test_validate_ordered(self):
        # see gh-14058
        exp_msg = "'ordered' must either be 'True' or 'False'"
        exp_err = TypeError

        # This should be a boolean.
        ordered = np.array([0, 1, 2])

        with tm.assert_raises_regex(exp_err, exp_msg):
            Categorical([1, 2, 3], ordered=ordered)

        with tm.assert_raises_regex(exp_err, exp_msg):
            Categorical.from_codes([0, 0, 1], categories=['a', 'b', 'c'],
                                   ordered=ordered)
Exemple #24
0
    def test_from_codes_with_categorical_categories(self):
        # GH17884
        expected = Categorical(['a', 'b'], categories=['a', 'b', 'c'])

        result = Categorical.from_codes(
            [0, 1], categories=Categorical(['a', 'b', 'c']))
        tm.assert_categorical_equal(result, expected)

        result = Categorical.from_codes(
            [0, 1], categories=CategoricalIndex(['a', 'b', 'c']))
        tm.assert_categorical_equal(result, expected)

        # non-unique Categorical still raises
        with pytest.raises(ValueError):
            Categorical.from_codes([0, 1], Categorical(['a', 'b', 'a']))
Exemple #25
0
 def test_from_inferred_categories_coerces(self):
     cats = ['1', '2', 'bad']
     codes = np.array([0, 0, 1, 2], dtype='i8')
     dtype = CategoricalDtype([1, 2])
     result = Categorical._from_inferred_categories(cats, codes, dtype)
     expected = Categorical([1, 1, 2, np.nan])
     tm.assert_categorical_equal(result, expected)
    def test_astype(self):

        ci = self.create_index()
        result = ci.astype('category')
        tm.assert_index_equal(result, ci, exact=True)

        result = ci.astype(object)
        tm.assert_index_equal(result, Index(np.array(ci)))

        # this IS equal, but not the same class
        assert result.equals(ci)
        assert isinstance(result, Index)
        assert not isinstance(result, CategoricalIndex)

        # interval
        ii = IntervalIndex.from_arrays(left=[-0.001, 2.0],
                                       right=[2, 4],
                                       closed='right')

        ci = CategoricalIndex(Categorical.from_codes(
            [0, 1, -1], categories=ii, ordered=True))

        result = ci.astype('interval')
        expected = ii.take([0, 1, -1])
        tm.assert_index_equal(result, expected)

        result = IntervalIndex.from_intervals(result.values)
        tm.assert_index_equal(result, expected)
 def f(a):
     if isinstance(a, (CategoricalIndex, Categorical)):
         categories = a.categories
         a = Categorical.from_codes(np.arange(len(categories)),
                                    categories=categories,
                                    ordered=a.ordered)
     return a
Exemple #28
0
    def test_reshaping_panel_categorical(self):

        p = tm.makePanel()
        p['str'] = 'foo'
        df = p.to_frame()

        df['category'] = df['str'].astype('category')
        result = df['category'].unstack()

        c = Categorical(['foo'] * len(p.major_axis))
        expected = DataFrame({'A': c.copy(),
                              'B': c.copy(),
                              'C': c.copy(),
                              'D': c.copy()},
                             columns=Index(list('ABCD'), name='minor'),
                             index=p.major_axis.set_names('major'))
        tm.assert_frame_equal(result, expected)
Exemple #29
0
 def test_cut_return_categorical(self):
     s = Series([0, 1, 2, 3, 4, 5, 6, 7, 8])
     res = cut(s, 3)
     exp = Series(Categorical.from_codes([0, 0, 0, 1, 1, 1, 2, 2, 2],
                                         ["(-0.008, 2.667]",
                                          "(2.667, 5.333]", "(5.333, 8]"],
                                         ordered=True))
     tm.assert_series_equal(res, exp)
Exemple #30
0
 def test_qcut_return_categorical(self):
     s = Series([0, 1, 2, 3, 4, 5, 6, 7, 8])
     res = qcut(s, [0, 0.333, 0.666, 1])
     exp = Series(Categorical.from_codes([0, 0, 0, 1, 1, 1, 2, 2, 2],
                                         ["[0, 2.664]",
                                          "(2.664, 5.328]", "(5.328, 8]"],
                                         ordered=True))
     tm.assert_series_equal(res, exp)
Exemple #31
0
    def test_isna(self):
        exp = np.array([False, False, True])
        cat = Categorical(["a", "b", np.nan])
        res = cat.isna()

        tm.assert_numpy_array_equal(res, exp)
 def test_constructor_imaginary(self):
     values = [1, 2, 3 + 1j]
     c1 = Categorical(values)
     tm.assert_index_equal(c1.categories, Index(values))
     tm.assert_numpy_array_equal(np.array(c1), np.array(values))
    def test_slicing_and_getting_ops(self):

        # systematically test the slicing operations:
        #  for all slicing ops:
        #   - returning a dataframe
        #   - returning a column
        #   - returning a row
        #   - returning a single value

        cats = Categorical(
            ["a", "c", "b", "c", "c", "c", "c"], categories=["a", "b", "c"]
        )
        idx = Index(["h", "i", "j", "k", "l", "m", "n"])
        values = [1, 2, 3, 4, 5, 6, 7]
        df = DataFrame({"cats": cats, "values": values}, index=idx)

        # the expected values
        cats2 = Categorical(["b", "c"], categories=["a", "b", "c"])
        idx2 = Index(["j", "k"])
        values2 = [3, 4]

        # 2:4,: | "j":"k",:
        exp_df = DataFrame({"cats": cats2, "values": values2}, index=idx2)

        # :,"cats" | :,0
        exp_col = Series(cats, index=idx, name="cats")

        # "j",: | 2,:
        exp_row = Series(["b", 3], index=["cats", "values"], dtype="object", name="j")

        # "j","cats | 2,0
        exp_val = "b"

        # iloc
        # frame
        res_df = df.iloc[2:4, :]
        tm.assert_frame_equal(res_df, exp_df)
        assert is_categorical_dtype(res_df["cats"])

        # row
        res_row = df.iloc[2, :]
        tm.assert_series_equal(res_row, exp_row)
        assert isinstance(res_row["cats"], str)

        # col
        res_col = df.iloc[:, 0]
        tm.assert_series_equal(res_col, exp_col)
        assert is_categorical_dtype(res_col)

        # single value
        res_val = df.iloc[2, 0]
        assert res_val == exp_val

        # loc
        # frame
        res_df = df.loc["j":"k", :]
        tm.assert_frame_equal(res_df, exp_df)
        assert is_categorical_dtype(res_df["cats"])

        # row
        res_row = df.loc["j", :]
        tm.assert_series_equal(res_row, exp_row)
        assert isinstance(res_row["cats"], str)

        # col
        res_col = df.loc[:, "cats"]
        tm.assert_series_equal(res_col, exp_col)
        assert is_categorical_dtype(res_col)

        # single value
        res_val = df.loc["j", "cats"]
        assert res_val == exp_val

        # ix
        # frame
        # res_df = df.loc["j":"k",[0,1]] # doesn't work?
        res_df = df.loc["j":"k", :]
        tm.assert_frame_equal(res_df, exp_df)
        assert is_categorical_dtype(res_df["cats"])

        # row
        res_row = df.loc["j", :]
        tm.assert_series_equal(res_row, exp_row)
        assert isinstance(res_row["cats"], str)

        # col
        res_col = df.loc[:, "cats"]
        tm.assert_series_equal(res_col, exp_col)
        assert is_categorical_dtype(res_col)

        # single value
        res_val = df.loc["j", df.columns[0]]
        assert res_val == exp_val

        # iat
        res_val = df.iat[2, 0]
        assert res_val == exp_val

        # at
        res_val = df.at["j", "cats"]
        assert res_val == exp_val

        # fancy indexing
        exp_fancy = df.iloc[[2]]

        res_fancy = df[df["cats"] == "b"]
        tm.assert_frame_equal(res_fancy, exp_fancy)
        res_fancy = df[df["values"] == 3]
        tm.assert_frame_equal(res_fancy, exp_fancy)

        # get_value
        res_val = df.at["j", "cats"]
        assert res_val == exp_val

        # i : int, slice, or sequence of integers
        res_row = df.iloc[2]
        tm.assert_series_equal(res_row, exp_row)
        assert isinstance(res_row["cats"], str)

        res_df = df.iloc[slice(2, 4)]
        tm.assert_frame_equal(res_df, exp_df)
        assert is_categorical_dtype(res_df["cats"])

        res_df = df.iloc[[2, 3]]
        tm.assert_frame_equal(res_df, exp_df)
        assert is_categorical_dtype(res_df["cats"])

        res_col = df.iloc[:, 0]
        tm.assert_series_equal(res_col, exp_col)
        assert is_categorical_dtype(res_col)

        res_df = df.iloc[:, slice(0, 2)]
        tm.assert_frame_equal(res_df, df)
        assert is_categorical_dtype(res_df["cats"])

        res_df = df.iloc[:, [0, 1]]
        tm.assert_frame_equal(res_df, df)
        assert is_categorical_dtype(res_df["cats"])
Exemple #34
0
    def test_categories_match_up_to_permutation(self):

        # test dtype comparisons between cats

        c1 = Categorical(list("aabca"), categories=list("abc"), ordered=False)
        c2 = Categorical(list("aabca"), categories=list("cab"), ordered=False)
        c3 = Categorical(list("aabca"), categories=list("cab"), ordered=True)
        assert c1._categories_match_up_to_permutation(c1)
        assert c2._categories_match_up_to_permutation(c2)
        assert c3._categories_match_up_to_permutation(c3)
        assert c1._categories_match_up_to_permutation(c2)
        assert not c1._categories_match_up_to_permutation(c3)
        assert not c1._categories_match_up_to_permutation(Index(list("aabca")))
        assert not c1._categories_match_up_to_permutation(c1.astype(object))
        assert c1._categories_match_up_to_permutation(CategoricalIndex(c1))
        assert c1._categories_match_up_to_permutation(
            CategoricalIndex(c1, categories=list("cab"))
        )
        assert not c1._categories_match_up_to_permutation(
            CategoricalIndex(c1, ordered=True)
        )

        # GH 16659
        s1 = Series(c1)
        s2 = Series(c2)
        s3 = Series(c3)
        assert c1._categories_match_up_to_permutation(s1)
        assert c2._categories_match_up_to_permutation(s2)
        assert c3._categories_match_up_to_permutation(s3)
        assert c1._categories_match_up_to_permutation(s2)
        assert not c1._categories_match_up_to_permutation(s3)
        assert not c1._categories_match_up_to_permutation(s1.astype(object))
Exemple #35
0
    def test_is_dtype_equal_deprecated(self):
        # GH#37545
        c1 = Categorical(list("aabca"), categories=list("abc"), ordered=False)

        with tm.assert_produces_warning(FutureWarning):
            c1.is_dtype_equal(c1)
    def test_reindexing(self):

        # reindexing
        # convert to a regular index
        result = self.df2.reindex(["a", "b", "e"])
        expected = DataFrame(
            {"A": [0, 1, 5, 2, 3, np.nan], "B": Series(list("aaabbe"))}
        ).set_index("B")
        assert_frame_equal(result, expected, check_index_type=True)

        result = self.df2.reindex(["a", "b"])
        expected = DataFrame(
            {"A": [0, 1, 5, 2, 3], "B": Series(list("aaabb"))}
        ).set_index("B")
        assert_frame_equal(result, expected, check_index_type=True)

        result = self.df2.reindex(["e"])
        expected = DataFrame({"A": [np.nan], "B": Series(["e"])}).set_index("B")
        assert_frame_equal(result, expected, check_index_type=True)

        result = self.df2.reindex(["d"])
        expected = DataFrame({"A": [np.nan], "B": Series(["d"])}).set_index("B")
        assert_frame_equal(result, expected, check_index_type=True)

        # since we are actually reindexing with a Categorical
        # then return a Categorical
        cats = list("cabe")

        result = self.df2.reindex(Categorical(["a", "d"], categories=cats))
        expected = DataFrame(
            {"A": [0, 1, 5, np.nan], "B": Series(list("aaad")).astype(CDT(cats))}
        ).set_index("B")
        assert_frame_equal(result, expected, check_index_type=True)

        result = self.df2.reindex(Categorical(["a"], categories=cats))
        expected = DataFrame(
            {"A": [0, 1, 5], "B": Series(list("aaa")).astype(CDT(cats))}
        ).set_index("B")
        assert_frame_equal(result, expected, check_index_type=True)

        result = self.df2.reindex(["a", "b", "e"])
        expected = DataFrame(
            {"A": [0, 1, 5, 2, 3, np.nan], "B": Series(list("aaabbe"))}
        ).set_index("B")
        assert_frame_equal(result, expected, check_index_type=True)

        result = self.df2.reindex(["a", "b"])
        expected = DataFrame(
            {"A": [0, 1, 5, 2, 3], "B": Series(list("aaabb"))}
        ).set_index("B")
        assert_frame_equal(result, expected, check_index_type=True)

        result = self.df2.reindex(["e"])
        expected = DataFrame({"A": [np.nan], "B": Series(["e"])}).set_index("B")
        assert_frame_equal(result, expected, check_index_type=True)

        # give back the type of categorical that we received
        result = self.df2.reindex(
            Categorical(["a", "d"], categories=cats, ordered=True)
        )
        expected = DataFrame(
            {
                "A": [0, 1, 5, np.nan],
                "B": Series(list("aaad")).astype(CDT(cats, ordered=True)),
            }
        ).set_index("B")
        assert_frame_equal(result, expected, check_index_type=True)

        result = self.df2.reindex(Categorical(["a", "d"], categories=["a", "d"]))
        expected = DataFrame(
            {"A": [0, 1, 5, np.nan], "B": Series(list("aaad")).astype(CDT(["a", "d"]))}
        ).set_index("B")
        assert_frame_equal(result, expected, check_index_type=True)

        # passed duplicate indexers are not allowed
        msg = "cannot reindex with a non-unique indexer"
        with pytest.raises(ValueError, match=msg):
            self.df2.reindex(["a", "a"])

        # args NotImplemented ATM
        msg = r"argument {} is not implemented for CategoricalIndex\.reindex"
        with pytest.raises(NotImplementedError, match=msg.format("method")):
            self.df2.reindex(["a"], method="ffill")
        with pytest.raises(NotImplementedError, match=msg.format("level")):
            self.df2.reindex(["a"], level=1)
        with pytest.raises(NotImplementedError, match=msg.format("limit")):
            self.df2.reindex(["a"], limit=2)
Exemple #37
0
 def test_iter_python_types(self):
     # GH-19909
     cat = Categorical([1, 2])
     assert isinstance(list(cat)[0], int)
     assert isinstance(cat.tolist()[0], int)
Exemple #38
0
 def test_set_dtype_nans(self):
     c = Categorical(["a", "b", np.nan])
     result = c._set_dtype(CategoricalDtype(["a", "c"]))
     tm.assert_numpy_array_equal(result.codes,
                                 np.array([0, -1, -1], dtype="int8"))
Exemple #39
0
 def test_set_dtype_same(self):
     c = Categorical(["a", "b", "c"])
     result = c._set_dtype(CategoricalDtype(["a", "b", "c"]))
     tm.assert_categorical_equal(result, c)
Exemple #40
0
 def test_set_dtype_new_categories(self):
     c = Categorical(["a", "b", "c"])
     result = c._set_dtype(CategoricalDtype(list("abcd")))
     tm.assert_numpy_array_equal(result.codes, c.codes)
     tm.assert_index_equal(result.dtype.categories, Index(list("abcd")))
Exemple #41
0
 def test_set_dtype_many(self, values, categories, new_categories, ordered):
     c = Categorical(values, categories)
     expected = Categorical(values, new_categories, ordered)
     result = c._set_dtype(expected.dtype)
     tm.assert_categorical_equal(result, expected)
Exemple #42
0
 def test_set_dtype_no_overlap(self):
     c = Categorical(["a", "b", "c"], ["d", "e"])
     result = c._set_dtype(CategoricalDtype(["a", "b"]))
     expected = Categorical([None, None, None], categories=["a", "b"])
     tm.assert_categorical_equal(result, expected)
Exemple #43
0
 def test_iter_python_types_datetime(self):
     cat = Categorical([Timestamp("2017-01-01"), Timestamp("2017-01-02")])
     assert isinstance(list(cat)[0], Timestamp)
     assert isinstance(cat.tolist()[0], Timestamp)
 def test_construction_with_ordered(self, ordered):
     # GH 9347, 9190
     cat = Categorical([0, 1, 2], ordered=ordered)
     assert cat.ordered == bool(ordered)