Example #1
0
    def test_crosstab_multiple(self):
        df = self.df

        result = crosstab(df['A'], [df['B'], df['C']])
        expected = df.groupby(['A', 'B', 'C']).size()
        expected = expected.unstack('B').unstack('C').fillna(0).astype(np.int64)
        tm.assert_frame_equal(result, expected)

        result = crosstab([df['B'], df['C']], df['A'])
        expected = df.groupby(['B', 'C', 'A']).size()
        expected = expected.unstack('A').fillna(0).astype(np.int64)
        tm.assert_frame_equal(result, expected)
Example #2
0
    def test_crosstab_multiple(self):
        df = self.df

        result = crosstab(df["A"], [df["B"], df["C"]])
        expected = df.groupby(["A", "B", "C"]).size()
        expected = expected.unstack("B").unstack("C").fillna(0).astype(np.int64)
        tm.assert_frame_equal(result, expected)

        result = crosstab([df["B"], df["C"]], df["A"])
        expected = df.groupby(["B", "C", "A"]).size()
        expected = expected.unstack("A").fillna(0).astype(np.int64)
        tm.assert_frame_equal(result, expected)
Example #3
0
    def test_crosstab_multiple(self):
        df = self.df

        result = crosstab(df['A'], [df['B'], df['C']])
        expected = df.groupby(['A', 'B', 'C']).size()
        expected = expected.unstack('B').unstack('C').fillna(0).astype(np.int64)
        tm.assert_frame_equal(result, expected)

        result = crosstab([df['B'], df['C']], df['A'])
        expected = df.groupby(['B', 'C', 'A']).size()
        expected = expected.unstack('A').fillna(0).astype(np.int64)
        tm.assert_frame_equal(result, expected)
Example #4
0
    def test_crosstab_margins(self):
        a = np.random.randint(0, 7, size=100)
        b = np.random.randint(0, 3, size=100)
        c = np.random.randint(0, 5, size=100)

        df = DataFrame({'a': a, 'b': b, 'c': c})

        result = crosstab(a, [b, c],
                          rownames=['a'],
                          colnames=('b', 'c'),
                          margins=True)

        self.assertEqual(result.index.names, ('a', ))
        self.assertEqual(result.columns.names, ['b', 'c'])

        all_cols = result['All', '']
        exp_cols = df.groupby(['a']).size().astype('i8')
        exp_cols = exp_cols.append(Series([len(df)], index=['All']))
        exp_cols.name = ('All', '')

        tm.assert_series_equal(all_cols, exp_cols)

        all_rows = result.ix['All']
        exp_rows = df.groupby(['b', 'c']).size().astype('i8')
        exp_rows = exp_rows.append(Series([len(df)], index=[('All', '')]))
        exp_rows.name = 'All'

        exp_rows = exp_rows.reindex(all_rows.index)
        exp_rows = exp_rows.fillna(0).astype(np.int64)
        tm.assert_series_equal(all_rows, exp_rows)
Example #5
0
    def test_crosstab_margins(self):
        a = np.random.randint(0, 7, size=100)
        b = np.random.randint(0, 3, size=100)
        c = np.random.randint(0, 5, size=100)

        df = DataFrame({"a": a, "b": b, "c": c})

        result = crosstab(a, [b, c], rownames=["a"], colnames=("b", "c"), margins=True)

        self.assertEqual(result.index.names, ("a",))
        self.assertEqual(result.columns.names, ["b", "c"])

        all_cols = result["All", ""]
        exp_cols = df.groupby(["a"]).size().astype("i8")
        exp_cols = exp_cols.append(Series([len(df)], index=["All"]))
        exp_cols.name = ("All", "")

        tm.assert_series_equal(all_cols, exp_cols)

        all_rows = result.ix["All"]
        exp_rows = df.groupby(["b", "c"]).size().astype("i8")
        exp_rows = exp_rows.append(Series([len(df)], index=[("All", "")]))
        exp_rows.name = "All"

        exp_rows = exp_rows.reindex(all_rows.index)
        exp_rows = exp_rows.fillna(0).astype(np.int64)
        tm.assert_series_equal(all_rows, exp_rows)
Example #6
0
    def test_crosstab_margins(self):
        a = np.random.randint(0, 7, size=100)
        b = np.random.randint(0, 3, size=100)
        c = np.random.randint(0, 5, size=100)

        df = DataFrame({'a': a, 'b': b, 'c': c})

        result = crosstab(a, [b, c], rownames=['a'], colnames=('b', 'c'),
                          margins=True)

        self.assertEqual(result.index.names, ('a',))
        self.assertEqual(result.columns.names, ['b', 'c'])

        all_cols = result['All', '']
        exp_cols = df.groupby(['a']).size().astype('i8')
        exp_cols = exp_cols.append(Series([len(df)], index=['All']))

        tm.assert_series_equal(all_cols, exp_cols)

        all_rows = result.ix['All']
        exp_rows = df.groupby(['b', 'c']).size().astype('i8')
        exp_rows = exp_rows.append(Series([len(df)], index=[('All', '')]))

        exp_rows = exp_rows.reindex(all_rows.index)
        exp_rows = exp_rows.fillna(0).astype(np.int64)
        tm.assert_series_equal(all_rows, exp_rows)
Example #7
0
 def test_crosstab_dropna(self):
     # GH 3820
     a = np.array(["foo", "foo", "foo", "bar", "bar", "foo", "foo"], dtype=object)
     b = np.array(["one", "one", "two", "one", "two", "two", "two"], dtype=object)
     c = np.array(["dull", "dull", "dull", "dull", "dull", "shiny", "shiny"], dtype=object)
     res = crosstab(a, [b, c], rownames=["a"], colnames=["b", "c"], dropna=False)
     m = MultiIndex.from_tuples([("one", "dull"), ("one", "shiny"), ("two", "dull"), ("two", "shiny")])
     assert_equal(res.columns.values, m.values)
Example #8
0
 def test_crosstab_dropna(self):
     # GH 3820
     a = np.array(['foo', 'foo', 'foo', 'bar', 'bar', 'foo', 'foo'], dtype=object)
     b = np.array(['one', 'one', 'two', 'one', 'two', 'two', 'two'], dtype=object)
     c = np.array(['dull', 'dull', 'dull', 'dull', 'dull', 'shiny', 'shiny'], dtype=object)
     res = crosstab(a, [b, c], rownames=['a'], colnames=['b', 'c'], dropna=False)
     m = MultiIndex.from_tuples([('one', 'dull'), ('one', 'shiny'),
                                 ('two', 'dull'), ('two', 'shiny')])
     assert_equal(res.columns.values, m.values)
Example #9
0
 def test_crosstab_dropna(self):
     # GH 3820
     a = np.array(['foo', 'foo', 'foo', 'bar', 'bar', 'foo', 'foo'], dtype=object)
     b = np.array(['one', 'one', 'two', 'one', 'two', 'two', 'two'], dtype=object)
     c = np.array(['dull', 'dull', 'dull', 'dull', 'dull', 'shiny', 'shiny'], dtype=object)
     res = crosstab(a, [b, c], rownames=['a'], colnames=['b', 'c'], dropna=False)
     m = MultiIndex.from_tuples([('one', 'dull'), ('one', 'shiny'),
                                 ('two', 'dull'), ('two', 'shiny')])
     assert_equal(res.columns.values, m.values)
Example #10
0
    def test_crosstab_ndarray(self):
        a = np.random.randint(0, 5, size=100)
        b = np.random.randint(0, 3, size=100)
        c = np.random.randint(0, 10, size=100)

        df = DataFrame({"a": a, "b": b, "c": c})

        result = crosstab(a, [b, c], rownames=["a"], colnames=("b", "c"))
        expected = crosstab(df["a"], [df["b"], df["c"]])
        tm.assert_frame_equal(result, expected)

        result = crosstab([b, c], a, colnames=["a"], rownames=("b", "c"))
        expected = crosstab([df["b"], df["c"]], df["a"])
        tm.assert_frame_equal(result, expected)

        # assign arbitrary names
        result = crosstab(self.df["A"].values, self.df["C"].values)
        self.assertEqual(result.index.name, "row_0")
        self.assertEqual(result.columns.name, "col_0")
Example #11
0
    def test_crosstab_ndarray(self):
        a = np.random.randint(0, 5, size=100)
        b = np.random.randint(0, 3, size=100)
        c = np.random.randint(0, 10, size=100)

        df = DataFrame({'a': a, 'b': b, 'c': c})

        result = crosstab(a, [b, c], rownames=['a'], colnames=('b', 'c'))
        expected = crosstab(df['a'], [df['b'], df['c']])
        tm.assert_frame_equal(result, expected)

        result = crosstab([b, c], a, colnames=['a'], rownames=('b', 'c'))
        expected = crosstab([df['b'], df['c']], df['a'])
        tm.assert_frame_equal(result, expected)

        # assign arbitrary names
        result = crosstab(self.df['A'].values, self.df['C'].values)
        self.assertEqual(result.index.name, 'row_0')
        self.assertEqual(result.columns.name, 'col_0')
Example #12
0
    def test_crosstab_no_overlap(self):
        # GS 10291

        s1 = pd.Series([1, 2, 3], index=[1, 2, 3])
        s2 = pd.Series([4, 5, 6], index=[4, 5, 6])

        actual = crosstab(s1, s2)
        expected = pd.DataFrame()

        tm.assert_frame_equal(actual, expected)
Example #13
0
    def test_crosstab_ndarray(self):
        a = np.random.randint(0, 5, size=100)
        b = np.random.randint(0, 3, size=100)
        c = np.random.randint(0, 10, size=100)

        df = DataFrame({'a': a, 'b': b, 'c': c})

        result = crosstab(a, [b, c], rownames=['a'], colnames=('b', 'c'))
        expected = crosstab(df['a'], [df['b'], df['c']])
        tm.assert_frame_equal(result, expected)

        result = crosstab([b, c], a, colnames=['a'], rownames=('b', 'c'))
        expected = crosstab([df['b'], df['c']], df['a'])
        tm.assert_frame_equal(result, expected)

        # assign arbitrary names
        result = crosstab(self.df['A'].values, self.df['C'].values)
        self.assertEqual(result.index.name, 'row_0')
        self.assertEqual(result.columns.name, 'col_0')
Example #14
0
    def test_crosstab_no_overlap(self):
        # GS 10291

        s1 = pd.Series([1, 2, 3], index=[1, 2, 3])
        s2 = pd.Series([4, 5, 6], index=[4, 5, 6])

        actual = crosstab(s1, s2)
        expected = pd.DataFrame()

        tm.assert_frame_equal(actual, expected)
Example #15
0
    def test_crosstab_pass_values(self):
        a = np.random.randint(0, 7, size=100)
        b = np.random.randint(0, 3, size=100)
        c = np.random.randint(0, 5, size=100)
        values = np.random.randn(100)

        table = crosstab([a, b], c, values, aggfunc=np.sum, rownames=["foo", "bar"], colnames=["baz"])

        df = DataFrame({"foo": a, "bar": b, "baz": c, "values": values})

        expected = df.pivot_table("values", index=["foo", "bar"], columns="baz", aggfunc=np.sum)
        tm.assert_frame_equal(table, expected)
Example #16
0
    def test_crosstab_pass_values(self):
        a = np.random.randint(0, 7, size=100)
        b = np.random.randint(0, 3, size=100)
        c = np.random.randint(0, 5, size=100)
        values = np.random.randn(100)

        table = crosstab([a, b], c, values, aggfunc=np.sum,
                         rownames=['foo', 'bar'], colnames=['baz'])

        df = DataFrame({'foo': a, 'bar': b, 'baz': c, 'values': values})

        expected = df.pivot_table('values', index=['foo', 'bar'], columns='baz',
                                  aggfunc=np.sum)
        tm.assert_frame_equal(table, expected)
Example #17
0
    def test_crosstab_pass_values(self):
        a = np.random.randint(0, 7, size=100)
        b = np.random.randint(0, 3, size=100)
        c = np.random.randint(0, 5, size=100)
        values = np.random.randn(100)

        table = crosstab([a, b], c, values, aggfunc=np.sum,
                         rownames=['foo', 'bar'], colnames=['baz'])

        df = DataFrame({'foo': a, 'bar': b, 'baz': c, 'values': values})

        expected = df.pivot_table('values', rows=['foo', 'bar'], cols='baz',
                                  aggfunc=np.sum)
        tm.assert_frame_equal(table, expected)
Example #18
0
 def test_crosstab_single(self):
     df = self.df
     result = crosstab(df['A'], df['C'])
     expected = df.groupby(['A', 'C']).size().unstack()
     tm.assert_frame_equal(result, expected.fillna(0).astype(np.int64))
Example #19
0
 def test_crosstab_single(self):
     df = self.df
     result = crosstab(df['A'], df['C'])
     expected = df.groupby(['A', 'C']).size().unstack()
     tm.assert_frame_equal(result, expected.fillna(0).astype(np.int64))