def test_crosstab_multiple(self): df = self.df result = crosstab(df['A'], [df['B'], df['C']]) expected = df.groupby(['A', 'B', 'C']).size() expected = expected.unstack('B').unstack('C').fillna(0).astype(np.int64) tm.assert_frame_equal(result, expected) result = crosstab([df['B'], df['C']], df['A']) expected = df.groupby(['B', 'C', 'A']).size() expected = expected.unstack('A').fillna(0).astype(np.int64) tm.assert_frame_equal(result, expected)
def test_crosstab_multiple(self): df = self.df result = crosstab(df["A"], [df["B"], df["C"]]) expected = df.groupby(["A", "B", "C"]).size() expected = expected.unstack("B").unstack("C").fillna(0).astype(np.int64) tm.assert_frame_equal(result, expected) result = crosstab([df["B"], df["C"]], df["A"]) expected = df.groupby(["B", "C", "A"]).size() expected = expected.unstack("A").fillna(0).astype(np.int64) tm.assert_frame_equal(result, expected)
def test_crosstab_margins(self): a = np.random.randint(0, 7, size=100) b = np.random.randint(0, 3, size=100) c = np.random.randint(0, 5, size=100) df = DataFrame({'a': a, 'b': b, 'c': c}) result = crosstab(a, [b, c], rownames=['a'], colnames=('b', 'c'), margins=True) self.assertEqual(result.index.names, ('a', )) self.assertEqual(result.columns.names, ['b', 'c']) all_cols = result['All', ''] exp_cols = df.groupby(['a']).size().astype('i8') exp_cols = exp_cols.append(Series([len(df)], index=['All'])) exp_cols.name = ('All', '') tm.assert_series_equal(all_cols, exp_cols) all_rows = result.ix['All'] exp_rows = df.groupby(['b', 'c']).size().astype('i8') exp_rows = exp_rows.append(Series([len(df)], index=[('All', '')])) exp_rows.name = 'All' exp_rows = exp_rows.reindex(all_rows.index) exp_rows = exp_rows.fillna(0).astype(np.int64) tm.assert_series_equal(all_rows, exp_rows)
def test_crosstab_margins(self): a = np.random.randint(0, 7, size=100) b = np.random.randint(0, 3, size=100) c = np.random.randint(0, 5, size=100) df = DataFrame({"a": a, "b": b, "c": c}) result = crosstab(a, [b, c], rownames=["a"], colnames=("b", "c"), margins=True) self.assertEqual(result.index.names, ("a",)) self.assertEqual(result.columns.names, ["b", "c"]) all_cols = result["All", ""] exp_cols = df.groupby(["a"]).size().astype("i8") exp_cols = exp_cols.append(Series([len(df)], index=["All"])) exp_cols.name = ("All", "") tm.assert_series_equal(all_cols, exp_cols) all_rows = result.ix["All"] exp_rows = df.groupby(["b", "c"]).size().astype("i8") exp_rows = exp_rows.append(Series([len(df)], index=[("All", "")])) exp_rows.name = "All" exp_rows = exp_rows.reindex(all_rows.index) exp_rows = exp_rows.fillna(0).astype(np.int64) tm.assert_series_equal(all_rows, exp_rows)
def test_crosstab_margins(self): a = np.random.randint(0, 7, size=100) b = np.random.randint(0, 3, size=100) c = np.random.randint(0, 5, size=100) df = DataFrame({'a': a, 'b': b, 'c': c}) result = crosstab(a, [b, c], rownames=['a'], colnames=('b', 'c'), margins=True) self.assertEqual(result.index.names, ('a',)) self.assertEqual(result.columns.names, ['b', 'c']) all_cols = result['All', ''] exp_cols = df.groupby(['a']).size().astype('i8') exp_cols = exp_cols.append(Series([len(df)], index=['All'])) tm.assert_series_equal(all_cols, exp_cols) all_rows = result.ix['All'] exp_rows = df.groupby(['b', 'c']).size().astype('i8') exp_rows = exp_rows.append(Series([len(df)], index=[('All', '')])) exp_rows = exp_rows.reindex(all_rows.index) exp_rows = exp_rows.fillna(0).astype(np.int64) tm.assert_series_equal(all_rows, exp_rows)
def test_crosstab_dropna(self): # GH 3820 a = np.array(["foo", "foo", "foo", "bar", "bar", "foo", "foo"], dtype=object) b = np.array(["one", "one", "two", "one", "two", "two", "two"], dtype=object) c = np.array(["dull", "dull", "dull", "dull", "dull", "shiny", "shiny"], dtype=object) res = crosstab(a, [b, c], rownames=["a"], colnames=["b", "c"], dropna=False) m = MultiIndex.from_tuples([("one", "dull"), ("one", "shiny"), ("two", "dull"), ("two", "shiny")]) assert_equal(res.columns.values, m.values)
def test_crosstab_dropna(self): # GH 3820 a = np.array(['foo', 'foo', 'foo', 'bar', 'bar', 'foo', 'foo'], dtype=object) b = np.array(['one', 'one', 'two', 'one', 'two', 'two', 'two'], dtype=object) c = np.array(['dull', 'dull', 'dull', 'dull', 'dull', 'shiny', 'shiny'], dtype=object) res = crosstab(a, [b, c], rownames=['a'], colnames=['b', 'c'], dropna=False) m = MultiIndex.from_tuples([('one', 'dull'), ('one', 'shiny'), ('two', 'dull'), ('two', 'shiny')]) assert_equal(res.columns.values, m.values)
def test_crosstab_ndarray(self): a = np.random.randint(0, 5, size=100) b = np.random.randint(0, 3, size=100) c = np.random.randint(0, 10, size=100) df = DataFrame({"a": a, "b": b, "c": c}) result = crosstab(a, [b, c], rownames=["a"], colnames=("b", "c")) expected = crosstab(df["a"], [df["b"], df["c"]]) tm.assert_frame_equal(result, expected) result = crosstab([b, c], a, colnames=["a"], rownames=("b", "c")) expected = crosstab([df["b"], df["c"]], df["a"]) tm.assert_frame_equal(result, expected) # assign arbitrary names result = crosstab(self.df["A"].values, self.df["C"].values) self.assertEqual(result.index.name, "row_0") self.assertEqual(result.columns.name, "col_0")
def test_crosstab_ndarray(self): a = np.random.randint(0, 5, size=100) b = np.random.randint(0, 3, size=100) c = np.random.randint(0, 10, size=100) df = DataFrame({'a': a, 'b': b, 'c': c}) result = crosstab(a, [b, c], rownames=['a'], colnames=('b', 'c')) expected = crosstab(df['a'], [df['b'], df['c']]) tm.assert_frame_equal(result, expected) result = crosstab([b, c], a, colnames=['a'], rownames=('b', 'c')) expected = crosstab([df['b'], df['c']], df['a']) tm.assert_frame_equal(result, expected) # assign arbitrary names result = crosstab(self.df['A'].values, self.df['C'].values) self.assertEqual(result.index.name, 'row_0') self.assertEqual(result.columns.name, 'col_0')
def test_crosstab_no_overlap(self): # GS 10291 s1 = pd.Series([1, 2, 3], index=[1, 2, 3]) s2 = pd.Series([4, 5, 6], index=[4, 5, 6]) actual = crosstab(s1, s2) expected = pd.DataFrame() tm.assert_frame_equal(actual, expected)
def test_crosstab_pass_values(self): a = np.random.randint(0, 7, size=100) b = np.random.randint(0, 3, size=100) c = np.random.randint(0, 5, size=100) values = np.random.randn(100) table = crosstab([a, b], c, values, aggfunc=np.sum, rownames=["foo", "bar"], colnames=["baz"]) df = DataFrame({"foo": a, "bar": b, "baz": c, "values": values}) expected = df.pivot_table("values", index=["foo", "bar"], columns="baz", aggfunc=np.sum) tm.assert_frame_equal(table, expected)
def test_crosstab_pass_values(self): a = np.random.randint(0, 7, size=100) b = np.random.randint(0, 3, size=100) c = np.random.randint(0, 5, size=100) values = np.random.randn(100) table = crosstab([a, b], c, values, aggfunc=np.sum, rownames=['foo', 'bar'], colnames=['baz']) df = DataFrame({'foo': a, 'bar': b, 'baz': c, 'values': values}) expected = df.pivot_table('values', index=['foo', 'bar'], columns='baz', aggfunc=np.sum) tm.assert_frame_equal(table, expected)
def test_crosstab_pass_values(self): a = np.random.randint(0, 7, size=100) b = np.random.randint(0, 3, size=100) c = np.random.randint(0, 5, size=100) values = np.random.randn(100) table = crosstab([a, b], c, values, aggfunc=np.sum, rownames=['foo', 'bar'], colnames=['baz']) df = DataFrame({'foo': a, 'bar': b, 'baz': c, 'values': values}) expected = df.pivot_table('values', rows=['foo', 'bar'], cols='baz', aggfunc=np.sum) tm.assert_frame_equal(table, expected)
def test_crosstab_single(self): df = self.df result = crosstab(df['A'], df['C']) expected = df.groupby(['A', 'C']).size().unstack() tm.assert_frame_equal(result, expected.fillna(0).astype(np.int64))