def test_tally(): df = pd.DataFrame({ 'x': [1, 2, 3, 4, 5, 6], 'y': ['a', 'b', 'a', 'b', 'a', 'b'], 'w': [1, 2, 1, 2, 1, 2] }) result = df >> tally() assert result.loc[0, 'n'] == 6 result = df >> group_by('y') >> tally() assert result.loc[:, 'n'].tolist() == [3, 3] result = df >> group_by('y') >> tally('w') assert result.loc[:, 'n'].tolist() == [3, 6] result2 = df >> group_by('y') >> summarize(n='sum(w)') assert result.equals(result2) # External weights result = df >> tally(range(5)) assert result.loc[0, 'n'] == 10 # Sort result = df >> group_by('y') >> tally('w', sort=True) assert result.loc[:, 'n'].tolist() == [6, 3]
def test_data_as_first_argument(): def equals(df1, df2): return df1.equals(df2) df = pd.DataFrame({'x': [0, 1, 2, 3, 4, 5], 'y': [0, 0, 1, 1, 2, 3]}) assert equals(define(df.copy(), 'x*2'), df.copy() >> define('x*2')) assert equals(create(df, 'x*2'), df >> create('x*2')) assert len(sample_n(df, 5)) == len(df >> sample_n(5)) assert len(sample_frac(df, .3)) == len(df >> sample_frac(.3)) assert equals(select(df, 'x'), df >> select('x')) assert equals(rename(df.copy(), z='x'), df.copy() >> rename(z='x')) assert equals(distinct(df), df >> distinct()) assert equals(arrange(df, 'np.sin(x)'), df >> arrange('np.sin(x)')) assert equals(group_by(df, 'x'), df >> group_by('x')) assert equals(ungroup(group_by(df, 'x')), df >> group_by('x') >> ungroup()) assert equals(summarize(df, 'sum(x)'), df >> summarize('sum(x)')) assert equals(query(df, 'x % 2'), df >> query('x % 2')) assert equals(tally(df, 'x'), df >> tally('x')) def xsum(gdf): return [gdf['x'].sum()] assert equals(do(group_by(df, 'y'), xsum=xsum), df >> group_by('y') >> do(xsum=xsum)) assert len(head(df, 4) == 4) assert len(tail(df, 4) == 4)
def test_tally(self): v = tally() self._test(v)