Ejemplo n.º 1
0
    def test_construction_with_dtype(self):

        # specify dtype
        ci = self.create_index(categories=list('abc'))

        result = Index(np.array(ci), dtype='category')
        tm.assert_index_equal(result, ci, exact=True)

        result = Index(np.array(ci).tolist(), dtype='category')
        tm.assert_index_equal(result, ci, exact=True)

        # these are generally only equal when the categories are reordered
        ci = self.create_index()

        result = Index(np.array(ci),
                       dtype='category').reorder_categories(ci.categories)
        tm.assert_index_equal(result, ci, exact=True)

        # make sure indexes are handled
        expected = CategoricalIndex([0, 1, 2],
                                    categories=[0, 1, 2],
                                    ordered=True)
        idx = Index(range(3))
        result = CategoricalIndex(idx, categories=idx, ordered=True)
        tm.assert_index_equal(result, expected, exact=True)
Ejemplo n.º 2
0
    def test_append(self):

        ci = self.create_index()
        categories = ci.categories

        # append cats with the same categories
        result = ci[:3].append(ci[3:])
        tm.assert_index_equal(result, ci, exact=True)

        foos = [ci[:1], ci[1:3], ci[3:]]
        result = foos[0].append(foos[1:])
        tm.assert_index_equal(result, ci, exact=True)

        # empty
        result = ci.append([])
        tm.assert_index_equal(result, ci, exact=True)

        # appending with different categories or reoreded is not ok
        self.assertRaises(
            TypeError,
            lambda: ci.append(ci.values.set_categories(list('abcd'))))
        self.assertRaises(
            TypeError,
            lambda: ci.append(ci.values.reorder_categories(list('abc'))))

        # with objects
        result = ci.append(Index(['c', 'a']))
        expected = CategoricalIndex(list('aabbcaca'), categories=categories)
        tm.assert_index_equal(result, expected, exact=True)

        # invalid objects
        self.assertRaises(TypeError, lambda: ci.append(Index(['a', 'd'])))
Ejemplo n.º 3
0
    def test_reindex_dtype(self):
        c = CategoricalIndex(['a', 'b', 'c', 'a'])
        res, indexer = c.reindex(['a', 'c'])
        tm.assert_index_equal(res, Index(['a', 'a', 'c']), exact=True)
        tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2],
                                                      dtype=np.int64))

        c = CategoricalIndex(['a', 'b', 'c', 'a'])
        res, indexer = c.reindex(Categorical(['a', 'c']))

        exp = CategoricalIndex(['a', 'a', 'c'], categories=['a', 'c'])
        tm.assert_index_equal(res, exp, exact=True)
        tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2],
                                                      dtype=np.int64))

        c = CategoricalIndex(['a', 'b', 'c', 'a'],
                             categories=['a', 'b', 'c', 'd'])
        res, indexer = c.reindex(['a', 'c'])
        exp = Index(['a', 'a', 'c'], dtype='object')
        tm.assert_index_equal(res, exp, exact=True)
        tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2],
                                                      dtype=np.int64))

        c = CategoricalIndex(['a', 'b', 'c', 'a'],
                             categories=['a', 'b', 'c', 'd'])
        res, indexer = c.reindex(Categorical(['a', 'c']))
        exp = CategoricalIndex(['a', 'a', 'c'], categories=['a', 'c'])
        tm.assert_index_equal(res, exp, exact=True)
        tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2],
                                                      dtype=np.int64))
Ejemplo n.º 4
0
    def test_reindex_dtype(self):
        res, indexer = CategoricalIndex(['a', 'b', 'c',
                                         'a']).reindex(['a', 'c'])
        tm.assert_index_equal(res, Index(['a', 'a', 'c']), exact=True)
        tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2]))

        res, indexer = CategoricalIndex(['a', 'b', 'c',
                                         'a']).reindex(Categorical(['a', 'c']))
        tm.assert_index_equal(res,
                              CategoricalIndex(['a', 'a', 'c'],
                                               categories=['a', 'c']),
                              exact=True)
        tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2]))

        res, indexer = CategoricalIndex(['a', 'b', 'c', 'a'],
                                        categories=['a', 'b', 'c',
                                                    'd']).reindex(['a', 'c'])
        tm.assert_index_equal(res,
                              Index(['a', 'a', 'c'], dtype='object'),
                              exact=True)
        tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2]))

        res, indexer = CategoricalIndex(['a', 'b', 'c', 'a'],
                                        categories=['a', 'b', 'c',
                                                    'd']).reindex(
                                                        Categorical(['a',
                                                                     'c']))
        tm.assert_index_equal(res,
                              CategoricalIndex(['a', 'a', 'c'],
                                               categories=['a', 'c']),
                              exact=True)
        tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2]))
Ejemplo n.º 5
0
    def test_equals_categorical(self):

        ci1 = CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=True)
        ci2 = CategoricalIndex(['a', 'b'],
                               categories=['a', 'b', 'c'],
                               ordered=True)

        self.assertTrue(ci1.equals(ci1))
        self.assertFalse(ci1.equals(ci2))
        self.assertTrue(ci1.equals(ci1.astype(object)))
        self.assertTrue(ci1.astype(object).equals(ci1))

        self.assertTrue((ci1 == ci1).all())
        self.assertFalse((ci1 != ci1).all())
        self.assertFalse((ci1 > ci1).all())
        self.assertFalse((ci1 < ci1).all())
        self.assertTrue((ci1 <= ci1).all())
        self.assertTrue((ci1 >= ci1).all())

        self.assertFalse((ci1 == 1).all())
        self.assertTrue((ci1 == Index(['a', 'b'])).all())
        self.assertTrue((ci1 == ci1.values).all())

        # invalid comparisons
        with tm.assertRaisesRegexp(ValueError, "Lengths must match"):
            ci1 == Index(['a', 'b', 'c'])
        self.assertRaises(TypeError, lambda: ci1 == ci2)
        self.assertRaises(
            TypeError, lambda: ci1 == Categorical(ci1.values, ordered=False))
        self.assertRaises(
            TypeError,
            lambda: ci1 == Categorical(ci1.values, categories=list('abc')))

        # tests
        # make sure that we are testing for category inclusion properly
        ci = CategoricalIndex(list('aabca'), categories=['c', 'a', 'b'])
        self.assertFalse(ci.equals(list('aabca')))
        self.assertFalse(ci.equals(CategoricalIndex(list('aabca'))))
        self.assertTrue(ci.equals(ci.copy()))

        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            ci = CategoricalIndex(list('aabca'),
                                  categories=['c', 'a', 'b', np.nan])
        self.assertFalse(ci.equals(list('aabca')))
        self.assertFalse(ci.equals(CategoricalIndex(list('aabca'))))
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            self.assertTrue(ci.equals(ci.copy()))

        ci = CategoricalIndex(list('aabca') + [np.nan],
                              categories=['c', 'a', 'b'])
        self.assertFalse(ci.equals(list('aabca')))
        self.assertFalse(ci.equals(CategoricalIndex(list('aabca'))))
        self.assertTrue(ci.equals(ci.copy()))

        ci = CategoricalIndex(list('aabca') + [np.nan],
                              categories=['c', 'a', 'b'])
        self.assertFalse(ci.equals(list('aabca') + [np.nan]))
        self.assertFalse(ci.equals(CategoricalIndex(list('aabca') + [np.nan])))
        self.assertTrue(ci.equals(ci.copy()))
Ejemplo n.º 6
0
    def test_reindexing(self):

        ci = self.create_index()
        oidx = Index(np.array(ci))

        for n in [1, 2, 5, len(ci)]:
            finder = oidx[np.random.randint(0, len(ci), size=n)]
            expected = oidx.get_indexer_non_unique(finder)[0]

            actual = ci.get_indexer(finder)
            tm.assert_numpy_array_equal(expected, actual)
Ejemplo n.º 7
0
    def test_reindexing(self):

        ci = self.create_index()
        oidx = Index(np.array(ci))

        for n in [1, 2, 5, len(ci)]:
            finder = oidx[np.random.randint(0, len(ci), size=n)]
            expected = oidx.get_indexer_non_unique(finder)[0]

            actual = ci.get_indexer(finder)
            tm.assert_numpy_array_equal(expected.values, actual, check_dtype=False)
Ejemplo n.º 8
0
    def test_astype(self):

        ci = self.create_index()
        result = ci.astype('category')
        tm.assert_index_equal(result, ci, exact=True)

        result = ci.astype(object)
        self.assert_index_equal(result, Index(np.array(ci)))

        # this IS equal, but not the same class
        self.assertTrue(result.equals(ci))
        self.assertIsInstance(result, Index)
        self.assertNotIsInstance(result, CategoricalIndex)

        # interval
        ii = IntervalIndex.from_arrays(left=[-0.001, 2.0],
                                       right=[2, 4],
                                       closed='right')

        ci = CategoricalIndex(
            Categorical.from_codes([0, 1, -1], categories=ii, ordered=True))

        result = ci.astype('interval')
        expected = ii.take([0, 1, -1])
        tm.assert_index_equal(result, expected)

        result = IntervalIndex.from_intervals(result.values)
        tm.assert_index_equal(result, expected)
Ejemplo n.º 9
0
    def test_astype(self):

        ci = self.create_index()
        result = ci.astype('category')
        tm.assert_index_equal(result, ci, exact=True)

        result = ci.astype(object)
        self.assert_index_equal(result, Index(np.array(ci)))

        # this IS equal, but not the same class
        self.assertTrue(result.equals(ci))
        self.assertIsInstance(result, Index)
        self.assertNotIsInstance(result, CategoricalIndex)
Ejemplo n.º 10
0
    def test_get_indexer(self):

        idx1 = CategoricalIndex(list('aabcde'), categories=list('edabc'))
        idx2 = CategoricalIndex(list('abf'))

        for indexer in [idx2, list('abf'), Index(list('abf'))]:
            r1 = idx1.get_indexer(idx2)
            assert_almost_equal(r1, np.array([0, 1, 2, -1], dtype=np.intp))

        self.assertRaises(NotImplementedError,
                          lambda: idx2.get_indexer(idx1, method='pad'))
        self.assertRaises(NotImplementedError,
                          lambda: idx2.get_indexer(idx1, method='backfill'))
        self.assertRaises(NotImplementedError,
                          lambda: idx2.get_indexer(idx1, method='nearest'))
Ejemplo n.º 11
0
    def test_get_loc(self):
        # GH 12531
        cidx1 = CategoricalIndex(list('abcde'), categories=list('edabc'))
        idx1 = Index(list('abcde'))
        self.assertEqual(cidx1.get_loc('a'), idx1.get_loc('a'))
        self.assertEqual(cidx1.get_loc('e'), idx1.get_loc('e'))

        for i in [cidx1, idx1]:
            with tm.assertRaises(KeyError):
                i.get_loc('NOT-EXIST')

        # non-unique
        cidx2 = CategoricalIndex(list('aacded'), categories=list('edabc'))
        idx2 = Index(list('aacded'))

        # results in bool array
        res = cidx2.get_loc('d')
        self.assert_numpy_array_equal(res, idx2.get_loc('d'))
        self.assert_numpy_array_equal(res, np.array([False, False, False,
                                                     True, False, True]))
        # unique element results in scalar
        res = cidx2.get_loc('e')
        self.assertEqual(res, idx2.get_loc('e'))
        self.assertEqual(res, 4)

        for i in [cidx2, idx2]:
            with tm.assertRaises(KeyError):
                i.get_loc('NOT-EXIST')

        # non-unique, slicable
        cidx3 = CategoricalIndex(list('aabbb'), categories=list('abc'))
        idx3 = Index(list('aabbb'))

        # results in slice
        res = cidx3.get_loc('a')
        self.assertEqual(res, idx3.get_loc('a'))
        self.assertEqual(res, slice(0, 2, None))

        res = cidx3.get_loc('b')
        self.assertEqual(res, idx3.get_loc('b'))
        self.assertEqual(res, slice(2, 5, None))

        for i in [cidx3, idx3]:
            with tm.assertRaises(KeyError):
                i.get_loc('c')
Ejemplo n.º 12
0
    def test_map(self):
        ci = pd.CategoricalIndex(list('ABABC'),
                                 categories=list('CBA'),
                                 ordered=True)
        result = ci.map(lambda x: x.lower())
        exp = pd.CategoricalIndex(list('ababc'),
                                  categories=list('cba'),
                                  ordered=True)
        tm.assert_index_equal(result, exp)

        ci = pd.CategoricalIndex(list('ABABC'),
                                 categories=list('BAC'),
                                 ordered=False,
                                 name='XXX')
        result = ci.map(lambda x: x.lower())
        exp = pd.CategoricalIndex(list('ababc'),
                                  categories=list('bac'),
                                  ordered=False,
                                  name='XXX')
        tm.assert_index_equal(result, exp)

        # GH 12766: Return an index not an array
        tm.assert_index_equal(
            ci.map(lambda x: 1),
            Index(np.array([1] * 5, dtype=np.int64), name='XXX'))

        # change categories dtype
        ci = pd.CategoricalIndex(list('ABABC'),
                                 categories=list('BAC'),
                                 ordered=False)

        def f(x):
            return {'A': 10, 'B': 20, 'C': 30}.get(x)

        result = ci.map(f)
        exp = pd.CategoricalIndex([10, 20, 10, 20, 30],
                                  categories=[20, 10, 30],
                                  ordered=False)
        tm.assert_index_equal(result, exp)
Ejemplo n.º 13
0
    def test_get_loc(self):
        # GH 12531
        cidx1 = CategoricalIndex(list('abcde'), categories=list('edabc'))
        idx1 = Index(list('abcde'))
        self.assertEqual(cidx1.get_loc('a'), idx1.get_loc('a'))
        self.assertEqual(cidx1.get_loc('e'), idx1.get_loc('e'))

        for i in [cidx1, idx1]:
            with tm.assertRaises(KeyError):
                i.get_loc('NOT-EXIST')

        # non-unique
        cidx2 = CategoricalIndex(list('aacded'), categories=list('edabc'))
        idx2 = Index(list('aacded'))

        # results in bool array
        res = cidx2.get_loc('d')
        self.assert_numpy_array_equal(res, idx2.get_loc('d'))
        self.assert_numpy_array_equal(
            res, np.array([False, False, False, True, False, True]))
        # unique element results in scalar
        res = cidx2.get_loc('e')
        self.assertEqual(res, idx2.get_loc('e'))
        self.assertEqual(res, 4)

        for i in [cidx2, idx2]:
            with tm.assertRaises(KeyError):
                i.get_loc('NOT-EXIST')

        # non-unique, slicable
        cidx3 = CategoricalIndex(list('aabbb'), categories=list('abc'))
        idx3 = Index(list('aabbb'))

        # results in slice
        res = cidx3.get_loc('a')
        self.assertEqual(res, idx3.get_loc('a'))
        self.assertEqual(res, slice(0, 2, None))

        res = cidx3.get_loc('b')
        self.assertEqual(res, idx3.get_loc('b'))
        self.assertEqual(res, slice(2, 5, None))

        for i in [cidx3, idx3]:
            with tm.assertRaises(KeyError):
                i.get_loc('c')
Ejemplo n.º 14
0
    def test_construction(self):

        ci = self.create_index(categories=list('abcd'))
        categories = ci.categories

        result = Index(ci)
        tm.assert_index_equal(result, ci, exact=True)
        self.assertFalse(result.ordered)

        result = Index(ci.values)
        tm.assert_index_equal(result, ci, exact=True)
        self.assertFalse(result.ordered)

        # empty
        result = CategoricalIndex(categories=categories)
        self.assert_index_equal(result.categories, Index(categories))
        tm.assert_numpy_array_equal(result.codes, np.array([], dtype='int8'))
        self.assertFalse(result.ordered)

        # passing categories
        result = CategoricalIndex(list('aabbca'), categories=categories)
        self.assert_index_equal(result.categories, Index(categories))
        tm.assert_numpy_array_equal(result.codes,
                                    np.array([0, 0, 1, 1, 2, 0], dtype='int8'))

        c = pd.Categorical(list('aabbca'))
        result = CategoricalIndex(c)
        self.assert_index_equal(result.categories, Index(list('abc')))
        tm.assert_numpy_array_equal(result.codes,
                                    np.array([0, 0, 1, 1, 2, 0], dtype='int8'))
        self.assertFalse(result.ordered)

        result = CategoricalIndex(c, categories=categories)
        self.assert_index_equal(result.categories, Index(categories))
        tm.assert_numpy_array_equal(result.codes,
                                    np.array([0, 0, 1, 1, 2, 0], dtype='int8'))
        self.assertFalse(result.ordered)

        ci = CategoricalIndex(c, categories=list('abcd'))
        result = CategoricalIndex(ci)
        self.assert_index_equal(result.categories, Index(categories))
        tm.assert_numpy_array_equal(result.codes,
                                    np.array([0, 0, 1, 1, 2, 0], dtype='int8'))
        self.assertFalse(result.ordered)

        result = CategoricalIndex(ci, categories=list('ab'))
        self.assert_index_equal(result.categories, Index(list('ab')))
        tm.assert_numpy_array_equal(
            result.codes, np.array([0, 0, 1, 1, -1, 0], dtype='int8'))
        self.assertFalse(result.ordered)

        result = CategoricalIndex(ci, categories=list('ab'), ordered=True)
        self.assert_index_equal(result.categories, Index(list('ab')))
        tm.assert_numpy_array_equal(
            result.codes, np.array([0, 0, 1, 1, -1, 0], dtype='int8'))
        self.assertTrue(result.ordered)

        # turn me to an Index
        result = Index(np.array(ci))
        self.assertIsInstance(result, Index)
        self.assertNotIsInstance(result, CategoricalIndex)