def test_by_columns(): t = Symbol('t', 'var * {name: string, amount: int32, id: int32}') assert len(by(t['id'], total=t['amount'].sum()).fields) == 2 assert len(by(t['id'], count=t['id'].count()).fields) == 2 print(by(t, count=t.count()).fields) assert len(by(t, count=t.count()).fields) == 4
def test_reduction(): t = Symbol('t', 'var * {name: string, amount: int32}') r = sum(t['amount']) assert r.dshape in (dshape('int64'), dshape('{amount: int64}'), dshape('{amount_sum: int64}')) assert 'amount' not in str(t.count().dshape) assert t.count().dshape[0] in (int32, int64) assert 'int' in str(t.count().dshape) assert 'int' in str(t.nunique().dshape) assert 'string' in str(t['name'].max().dshape) assert 'string' in str(t['name'].min().dshape) assert 'string' not in str(t.count().dshape) t = Symbol('t', 'var * {name: string, amount: real, id: int}') assert 'int' in str(t['id'].sum().dshape) assert 'int' not in str(t['amount'].sum().dshape)
def test_count(): t = Symbol('t', '3 * int') assert compute(t.count(), [1, None, 2]) == 2
def test_errors(): t = Symbol('t', 'var * {foo: int}') with raises(NotImplementedError): compute_up(by(t, t.count()), 1)
def test_count_nan(): t = Symbol('t', '3 * ?real') x = np.array([1.0, np.nan, 2.0]) assert compute(t.count(), x) == 2