Example #1
0
    def test_duplicates(self):

        idx = CategoricalIndex([0, 0, 0], name='foo')
        self.assertFalse(idx.is_unique)
        self.assertTrue(idx.has_duplicates)

        expected = CategoricalIndex([0], name='foo')
        self.assert_index_equal(idx.drop_duplicates(), expected)
Example #2
0
    def test_identical(self):

        ci1 = CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=True)
        ci2 = CategoricalIndex(['a', 'b'], categories=['a', 'b', 'c'],
                               ordered=True)
        self.assertTrue(ci1.identical(ci1))
        self.assertTrue(ci1.identical(ci1.copy()))
        self.assertFalse(ci1.identical(ci2))
Example #3
0
    def test_fillna_categorical(self):
        # GH 11343
        idx = CategoricalIndex([1.0, np.nan, 3.0, 1.0], name='x')
        # fill by value in categories
        exp = CategoricalIndex([1.0, 1.0, 3.0, 1.0], name='x')
        self.assert_index_equal(idx.fillna(1.0), exp)

        # fill by value not in categories raises ValueError
        with tm.assertRaisesRegexp(ValueError,
                                   'fill value must be in categories'):
            idx.fillna(2.0)
Example #4
0
    def test_reindex_dtype(self):
        c = CategoricalIndex(['a', 'b', 'c', 'a'])
        res, indexer = c.reindex(['a', 'c'])
        tm.assert_index_equal(res, Index(['a', 'a', 'c']), exact=True)
        tm.assert_numpy_array_equal(indexer,
                                    np.array([0, 3, 2], dtype=np.int64))

        c = CategoricalIndex(['a', 'b', 'c', 'a'])
        res, indexer = c.reindex(Categorical(['a', 'c']))

        exp = CategoricalIndex(['a', 'a', 'c'], categories=['a', 'c'])
        tm.assert_index_equal(res, exp, exact=True)
        tm.assert_numpy_array_equal(indexer,
                                    np.array([0, 3, 2], dtype=np.int64))

        c = CategoricalIndex(['a', 'b', 'c', 'a'],
                             categories=['a', 'b', 'c', 'd'])
        res, indexer = c.reindex(['a', 'c'])
        exp = Index(['a', 'a', 'c'], dtype='object')
        tm.assert_index_equal(res, exp, exact=True)
        tm.assert_numpy_array_equal(indexer,
                                    np.array([0, 3, 2], dtype=np.int64))

        c = CategoricalIndex(['a', 'b', 'c', 'a'],
                             categories=['a', 'b', 'c', 'd'])
        res, indexer = c.reindex(Categorical(['a', 'c']))
        exp = CategoricalIndex(['a', 'a', 'c'], categories=['a', 'c'])
        tm.assert_index_equal(res, exp, exact=True)
        tm.assert_numpy_array_equal(indexer,
                                    np.array([0, 3, 2], dtype=np.int64))
Example #5
0
    def test_get_loc(self):
        # GH 12531
        cidx1 = CategoricalIndex(list('abcde'), categories=list('edabc'))
        idx1 = Index(list('abcde'))
        self.assertEqual(cidx1.get_loc('a'), idx1.get_loc('a'))
        self.assertEqual(cidx1.get_loc('e'), idx1.get_loc('e'))

        for i in [cidx1, idx1]:
            with tm.assertRaises(KeyError):
                i.get_loc('NOT-EXIST')

        # non-unique
        cidx2 = CategoricalIndex(list('aacded'), categories=list('edabc'))
        idx2 = Index(list('aacded'))

        # results in bool array
        res = cidx2.get_loc('d')
        self.assert_numpy_array_equal(res, idx2.get_loc('d'))
        self.assert_numpy_array_equal(res, np.array([False, False, False,
                                                     True, False, True]))
        # unique element results in scalar
        res = cidx2.get_loc('e')
        self.assertEqual(res, idx2.get_loc('e'))
        self.assertEqual(res, 4)

        for i in [cidx2, idx2]:
            with tm.assertRaises(KeyError):
                i.get_loc('NOT-EXIST')

        # non-unique, slicable
        cidx3 = CategoricalIndex(list('aabbb'), categories=list('abc'))
        idx3 = Index(list('aabbb'))

        # results in slice
        res = cidx3.get_loc('a')
        self.assertEqual(res, idx3.get_loc('a'))
        self.assertEqual(res, slice(0, 2, None))

        res = cidx3.get_loc('b')
        self.assertEqual(res, idx3.get_loc('b'))
        self.assertEqual(res, slice(2, 5, None))

        for i in [cidx3, idx3]:
            with tm.assertRaises(KeyError):
                i.get_loc('c')
Example #6
0
    def test_isin(self):

        ci = CategoricalIndex(
            list('aabca') + [np.nan], categories=['c', 'a', 'b'])
        tm.assert_numpy_array_equal(
            ci.isin(['c']),
            np.array([False, False, False, True, False, False]))
        tm.assert_numpy_array_equal(
            ci.isin(['c', 'a', 'b']), np.array([True] * 5 + [False]))
        tm.assert_numpy_array_equal(
            ci.isin(['c', 'a', 'b', np.nan]), np.array([True] * 6))

        # mismatched categorical -> coerced to ndarray so doesn't matter
        tm.assert_numpy_array_equal(
            ci.isin(ci.set_categories(list('abcdefghi'))), np.array([True] *
                                                                    6))
        tm.assert_numpy_array_equal(
            ci.isin(ci.set_categories(list('defghi'))),
            np.array([False] * 5 + [True]))
Example #7
0
    def test_get_indexer(self):

        idx1 = CategoricalIndex(list('aabcde'), categories=list('edabc'))
        idx2 = CategoricalIndex(list('abf'))

        for indexer in [idx2, list('abf'), Index(list('abf'))]:
            r1 = idx1.get_indexer(idx2)
            assert_almost_equal(r1, np.array([0, 1, 2, -1]))

        self.assertRaises(NotImplementedError,
                          lambda: idx2.get_indexer(idx1, method='pad'))
        self.assertRaises(NotImplementedError,
                          lambda: idx2.get_indexer(idx1, method='backfill'))
        self.assertRaises(NotImplementedError,
                          lambda: idx2.get_indexer(idx1, method='nearest'))
Example #8
0
    def test_equals(self):

        ci1 = CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=True)
        ci2 = CategoricalIndex(['a', 'b'], categories=['a', 'b', 'c'],
                               ordered=True)

        self.assertTrue(ci1.equals(ci1))
        self.assertFalse(ci1.equals(ci2))
        self.assertTrue(ci1.equals(ci1.astype(object)))
        self.assertTrue(ci1.astype(object).equals(ci1))

        self.assertTrue((ci1 == ci1).all())
        self.assertFalse((ci1 != ci1).all())
        self.assertFalse((ci1 > ci1).all())
        self.assertFalse((ci1 < ci1).all())
        self.assertTrue((ci1 <= ci1).all())
        self.assertTrue((ci1 >= ci1).all())

        self.assertFalse((ci1 == 1).all())
        self.assertTrue((ci1 == Index(['a', 'b'])).all())
        self.assertTrue((ci1 == ci1.values).all())

        # invalid comparisons
        with tm.assertRaisesRegexp(ValueError, "Lengths must match"):
            ci1 == Index(['a', 'b', 'c'])
        self.assertRaises(TypeError, lambda: ci1 == ci2)
        self.assertRaises(
            TypeError, lambda: ci1 == Categorical(ci1.values, ordered=False))
        self.assertRaises(
            TypeError,
            lambda: ci1 == Categorical(ci1.values, categories=list('abc')))

        # tests
        # make sure that we are testing for category inclusion properly
        self.assertTrue(CategoricalIndex(
            list('aabca'), categories=['c', 'a', 'b']).equals(list('aabca')))
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            self.assertTrue(CategoricalIndex(
                list('aabca'), categories=['c', 'a', 'b', np.nan]).equals(list(
                    'aabca')))

        self.assertFalse(CategoricalIndex(
            list('aabca') + [np.nan], categories=['c', 'a', 'b']).equals(list(
                'aabca')))
        self.assertTrue(CategoricalIndex(
            list('aabca') + [np.nan], categories=['c', 'a', 'b']).equals(list(
                'aabca') + [np.nan]))
Example #9
0
    def test_method_delegation(self):

        ci = CategoricalIndex(list('aabbca'), categories=list('cabdef'))
        result = ci.set_categories(list('cab'))
        tm.assert_index_equal(result, CategoricalIndex(
            list('aabbca'), categories=list('cab')))

        ci = CategoricalIndex(list('aabbca'), categories=list('cab'))
        result = ci.rename_categories(list('efg'))
        tm.assert_index_equal(result, CategoricalIndex(
            list('ffggef'), categories=list('efg')))

        ci = CategoricalIndex(list('aabbca'), categories=list('cab'))
        result = ci.add_categories(['d'])
        tm.assert_index_equal(result, CategoricalIndex(
            list('aabbca'), categories=list('cabd')))

        ci = CategoricalIndex(list('aabbca'), categories=list('cab'))
        result = ci.remove_categories(['c'])
        tm.assert_index_equal(result, CategoricalIndex(
            list('aabb') + [np.nan] + ['a'], categories=list('ab')))

        ci = CategoricalIndex(list('aabbca'), categories=list('cabdef'))
        result = ci.as_unordered()
        tm.assert_index_equal(result, ci)

        ci = CategoricalIndex(list('aabbca'), categories=list('cabdef'))
        result = ci.as_ordered()
        tm.assert_index_equal(result, CategoricalIndex(
            list('aabbca'), categories=list('cabdef'), ordered=True))

        # invalid
        self.assertRaises(ValueError, lambda: ci.set_categories(
            list('cab'), inplace=True))