Example #1
0
    def test_groupby_datetime_categorical(self):
        # GH9049: ensure backward compatibility
        levels = pd.date_range("2014-01-01", periods=4)
        codes = np.random.randint(0, 4, size=100)

        cats = Categorical.from_codes(codes, levels, ordered=True)

        data = DataFrame(np.random.randn(100, 4))
        result = data.groupby(cats).mean()

        expected = data.groupby(np.asarray(cats)).mean()
        expected = expected.reindex(levels)
        expected.index = CategoricalIndex(expected.index, categories=expected.index, ordered=True)

        assert_frame_equal(result, expected)

        grouped = data.groupby(cats)
        desc_result = grouped.describe()

        idx = cats.codes.argsort()
        ord_labels = cats.take_nd(idx)
        ord_data = data.take(idx)
        expected = ord_data.groupby(ord_labels).describe()
        expected.index.names = [None, None]
        assert_frame_equal(desc_result, expected)
        tm.assert_index_equal(desc_result.index, expected.index)
        tm.assert_index_equal(desc_result.index.get_level_values(0), expected.index.get_level_values(0))

        # GH 10460
        expc = Categorical.from_codes(np.arange(4).repeat(8), levels, ordered=True)
        exp = CategoricalIndex(expc)
        self.assert_index_equal(desc_result.index.get_level_values(0), exp)
        exp = Index(["count", "mean", "std", "min", "25%", "50%", "75%", "max"] * 4)
        self.assert_index_equal(desc_result.index.get_level_values(1), exp)
Example #2
0
    def test_groupby_categorical(self):
        levels = ["foo", "bar", "baz", "qux"]
        codes = np.random.randint(0, 4, size=100)

        cats = Categorical.from_codes(codes, levels, ordered=True)

        data = DataFrame(np.random.randn(100, 4))

        result = data.groupby(cats).mean()

        expected = data.groupby(np.asarray(cats)).mean()
        exp_idx = CategoricalIndex(levels, categories=cats.categories, ordered=True)
        expected = expected.reindex(exp_idx)

        assert_frame_equal(result, expected)

        grouped = data.groupby(cats)
        desc_result = grouped.describe()

        idx = cats.codes.argsort()
        ord_labels = np.asarray(cats).take(idx)
        ord_data = data.take(idx)

        exp_cats = Categorical(ord_labels, ordered=True, categories=["foo", "bar", "baz", "qux"])
        expected = ord_data.groupby(exp_cats, sort=False).describe()
        expected.index.names = [None, None]
        assert_frame_equal(desc_result, expected)

        # GH 10460
        expc = Categorical.from_codes(np.arange(4).repeat(8), levels, ordered=True)
        exp = CategoricalIndex(expc)
        self.assert_index_equal(desc_result.index.get_level_values(0), exp)
        exp = Index(["count", "mean", "std", "min", "25%", "50%", "75%", "max"] * 4)
        self.assert_index_equal(desc_result.index.get_level_values(1), exp)
Example #3
0
    def test_groupby_categorical(self):
        levels = ['foo', 'bar', 'baz', 'qux']
        codes = np.random.randint(0, 4, size=100)

        cats = Categorical.from_codes(codes, levels, ordered=True)

        data = DataFrame(np.random.randn(100, 4))

        result = data.groupby(cats).mean()

        expected = data.groupby(np.asarray(cats)).mean()
        exp_idx = CategoricalIndex(levels,
                                   categories=cats.categories,
                                   ordered=True)
        expected = expected.reindex(exp_idx)

        assert_frame_equal(result, expected)

        grouped = data.groupby(cats)
        desc_result = grouped.describe()

        idx = cats.codes.argsort()
        ord_labels = np.asarray(cats).take(idx)
        ord_data = data.take(idx)

        exp_cats = Categorical(ord_labels,
                               ordered=True,
                               categories=['foo', 'bar', 'baz', 'qux'])
        expected = ord_data.groupby(exp_cats, sort=False).describe()
        expected.index.names = [None, None]
        assert_frame_equal(desc_result, expected)

        # GH 10460
        expc = Categorical.from_codes(np.arange(4).repeat(8),
                                      levels,
                                      ordered=True)
        exp = CategoricalIndex(expc)
        self.assert_index_equal(desc_result.index.get_level_values(0), exp)
        exp = Index(
            ['count', 'mean', 'std', 'min', '25%', '50%', '75%', 'max'] * 4)
        self.assert_index_equal(desc_result.index.get_level_values(1), exp)
Example #4
0
    def test_groupby_datetime_categorical(self):
        # GH9049: ensure backward compatibility
        levels = pd.date_range('2014-01-01', periods=4)
        codes = np.random.randint(0, 4, size=100)

        cats = Categorical.from_codes(codes, levels, ordered=True)

        data = DataFrame(np.random.randn(100, 4))
        result = data.groupby(cats).mean()

        expected = data.groupby(np.asarray(cats)).mean()
        expected = expected.reindex(levels)
        expected.index = CategoricalIndex(expected.index,
                                          categories=expected.index,
                                          ordered=True)

        assert_frame_equal(result, expected)

        grouped = data.groupby(cats)
        desc_result = grouped.describe()

        idx = cats.codes.argsort()
        ord_labels = cats.take_nd(idx)
        ord_data = data.take(idx)
        expected = ord_data.groupby(ord_labels).describe()
        expected.index.names = [None, None]
        assert_frame_equal(desc_result, expected)
        tm.assert_index_equal(desc_result.index, expected.index)
        tm.assert_index_equal(desc_result.index.get_level_values(0),
                              expected.index.get_level_values(0))

        # GH 10460
        expc = Categorical.from_codes(np.arange(4).repeat(8),
                                      levels,
                                      ordered=True)
        exp = CategoricalIndex(expc)
        self.assert_index_equal(desc_result.index.get_level_values(0), exp)
        exp = Index(
            ['count', 'mean', 'std', 'min', '25%', '50%', '75%', 'max'] * 4)
        self.assert_index_equal(desc_result.index.get_level_values(1), exp)