def test_distinct_name(): t = Symbol('t', 'var * {id: int32, name: string}') assert t.name.isidentical(t['name']) assert t.distinct().name.isidentical(t.distinct()['name']) assert t.id.distinct()._name == 'id' assert t.name._name == 'name'
def test_Distinct(): x = np.array([('Alice', 100), ('Alice', -200), ('Bob', 100), ('Bob', 100)], dtype=[('name', 'S5'), ('amount', 'i8')]) t = Symbol('t', 'var * {name: string, amount: int64}') assert eq(compute(t['name'].distinct(), x), np.unique(x['name'])) assert eq(compute(t.distinct(), x), np.unique(x))
def test_Distinct(): t = Symbol('t', 'var * {name: string, amount: int32}') r = distinct(t['name']) print(r.dshape) assert r.dshape == dshape('var * string') assert r._name == 'name' r = t.distinct() assert r.dshape == t.dshape
t[t.amount > 50]['name']: [], by(t.name, t.amount.sum()): [], by(t.id, t.id.count()): [], by(t[['id', 'amount']], t.id.count()): [], by(t[['id', 'amount']], (t.amount + 1).sum()): [mongo], by(t[['id', 'amount']], t.name.nunique()): [mongo], by(t.id, t.amount.count()): [], by(t.id, t.id.nunique()): [mongo], # by(t, t.count()): [], # by(t.id, t.count()): [df], t[['amount', 'id']]: [x], # https://github.com/numpy/numpy/issues/3256 t[['id', 'amount']]: [x, bc], # bcolz sorting t[0]: [sql, mongo], t[::2]: [sql, mongo], t.id.utcfromtimestamp: [sql], t.distinct().nrows: [], t.nelements(axis=0): [], t.nelements(axis=None): [] } base = df def df_eq(a, b): return (list(a.columns) == list(b.columns) and list(a.dtypes) == list(b.dtypes) and into(set, into(list, a)) == into(set, into(list, b))) def typename(obj): return type(obj).__name__