Example #1
0
def test_by():
    assert set(compute(by(t, t.name, t.amount.sum()), c)) == \
            set([('Alice', -200), ('Bob', 200),
                 ('Charlie', 400), ('Edith', 200)])
    assert set(compute(by(t, t.name, t.amount.count()), c)) == \
            set([('Alice', 2), ('Bob', 1),
                 ('Charlie', 1), ('Edith', 1)])
Example #2
0
def test_sample():
    NN = len(databig)
    for n in range(1, NN+1):
        assert (len(compute(tbig.sample(n=n), databig)) ==
                len(compute(tbig.sample(frac=float(n)/NN), databig)) ==
                n)
    assert len(compute(tbig.sample(n=NN*2), databig)) == NN
Example #3
0
def test_unicode_field_names():
    b = bcolz.ctable(np.array([(1, 1., 10.), (2, 2., 20.), (3, 3., 30.)],
                              dtype=[('a', 'i8'), ('b', 'f8'), ('c', 'f8')]))
    s = symbol('s', discover(b))

    assert eq(compute(s[u'a'], b)[:], compute(s['a'], b)[:])
    assert eq(compute(s[[u'a', u'c']], b)[:], compute(s[['a', 'c']], b)[:])
Example #4
0
def test_coalesce():
    data = pd.Series([0, None, 1, None, 2, None], dtype=object)

    s = symbol('s', 'var * ?int')
    t = symbol('t', 'int')
    u = symbol('u', '?int')
    v = symbol('v', 'var * int')
    w = symbol('w', 'var * ?int')

    # array to scalar
    tm.assert_series_equal(
        compute(coalesce(s, t), {s: data, t: -1}),
        pd.Series([0, -1, 1, -1, 2, -1], dtype=object),
    )
    # array to scalar with NULL
    tm.assert_series_equal(
        compute(coalesce(s, u), {s: data, u: None}),
        pd.Series([0, None, 1, None, 2, None], dtype=object),
    )
    # array to array
    tm.assert_series_equal(
        compute(coalesce(s, v), {
            s: data, v: np.array([-1, -2, -3, -4, -5, -6]),
        }),
        pd.Series([0, -2, 1, -4, 2, -6], dtype=object),
    )
    # array to array with NULL
    tm.assert_series_equal(
        compute(coalesce(s, w), {
            s: data, w: np.array([-1, None, -3, -4, -5, -6]),
        }),
        pd.Series([0, None, 1, -4, 2, -6], dtype=object),
    )
Example #5
0
def test_outer_join():
    left = [(1, 'Alice', 100),
            (2, 'Bob', 200),
            (4, 'Dennis', 400)]
    right = [('NYC', 1),
             ('Boston', 1),
             ('LA', 3),
             ('Moscow', 4)]

    L = TableSymbol('L', '{id: int, name: string, amount: real}')
    R = TableSymbol('R', '{city: string, id: int}')

    assert set(compute(join(L, R), {L: left, R: right})) == set(
            [(1, 'Alice', 100, 'NYC'),
             (1, 'Alice', 100, 'Boston'),
             (4, 'Dennis', 400, 'Moscow')])

    assert set(compute(join(L, R, how='left'), {L: left, R: right})) == set(
            [(1, 'Alice', 100, 'NYC'),
             (1, 'Alice', 100, 'Boston'),
             (2, 'Bob', 200, None),
             (4, 'Dennis', 400, 'Moscow')])

    assert set(compute(join(L, R, how='right'), {L: left, R: right})) == set(
            [(1, 'Alice', 100, 'NYC'),
             (1, 'Alice', 100, 'Boston'),
             (3, None, None, 'LA'),
             (4, 'Dennis', 400, 'Moscow')])

    assert set(compute(join(L, R, how='outer'), {L: left, R: right})) == set(
            [(1, 'Alice', 100, 'NYC'),
             (1, 'Alice', 100, 'Boston'),
             (2, 'Bob', 200, None),
             (3, None, None, 'LA'),
             (4, 'Dennis', 400, 'Moscow')])
Example #6
0
def test_arithmetic():
    assert str(compute(t['amount'] + t['id'], df)) == \
                str(df.amount + df.id)
    assert str(compute(t['amount'] * t['id'], df)) == \
                str(df.amount * df.id)
    assert str(compute(t['amount'] % t['id'], df)) == \
                str(df.amount % df.id)
Example #7
0
def test_arithmetic():
    assert list(compute(t['amount'] + t['id'], data)) == \
                [b + c for a, b, c, in data]
    assert list(compute(t['amount'] * t['id'], data)) == \
                [b * c for a, b, c, in data]
    assert list(compute(t['amount'] % t['id'], data)) == \
                [b % c for a, b, c, in data]
Example #8
0
def test_by_multi_column_grouper():
    t = TableSymbol('t', '{x: int, y: int, z: int}')
    expr = by(t[['x', 'y']], t['z'].count())
    data = [(1, 2, 0), (1, 2, 0), (1, 1, 0)]

    print(set(compute(expr, data)))
    assert set(compute(expr, data)) == set([(1, 2, 2), (1, 1, 1)])
Example #9
0
def test_arithmetic():
    assert_series_equal(compute(t['amount'] + t['id'], df),
                           df.amount + df.id)
    assert_series_equal(compute(t['amount'] * t['id'], df),
                           df.amount * df.id)
    assert_series_equal(compute(t['amount'] % t['id'], df),
                           df.amount % df.id)
Example #10
0
def test_arithmetic():
    assert eq(compute(t['amount'] + t['id'], x),
              x['amount'] + x['id'])
    assert eq(compute(t['amount'] * t['id'], x),
              x['amount'] * x['id'])
    assert eq(compute(t['amount'] % t['id'], x),
              x['amount'] % x['id'])
Example #11
0
def test_union():
    result = compute(union(t, t), x)
    assert result.shape == (x.shape[0] * 2,)
    assert eq(result[:5], x)
    assert eq(result[5:], x)
    result = compute(union(t.id, t.id), x)
    assert eq(result, np.array([1, 2, 3, 4, 5, 1, 2, 3, 4, 5]))
Example #12
0
def test_summary_on_ndarray():
    assert compute(summary(total=a.sum(), min=a.min()), ax) == (ax.min(), ax.sum())

    result = compute(summary(total=a.sum(), min=a.min(), keepdims=True), ax)
    expected = np.array([(ax.min(), ax.sum())], dtype=[("min", "float32"), ("total", "float64")])
    assert result.ndim == ax.ndim
    assert eq(expected, result)
Example #13
0
def test_timedelta_arith():
    dates = np.arange("2014-01-01", "2014-02-01", dtype="datetime64")
    delta = np.timedelta64(1, "D")
    sym = symbol("s", discover(dates))
    assert (compute(sym + delta, dates) == dates + delta).all()
    assert (compute(sym - delta, dates) == dates - delta).all()
    assert (compute(sym - (sym - delta), dates) == dates - (dates - delta)).all()
def test_scalar_ops(data):
    from operator import add, sub, mul, truediv

    for op in (add, sub, mul, truediv):
        assert eq(compute(op(t.amount, 10), data), op(x['amount'], 10))
        assert eq(compute(op(t.amount, t.id), data), op(x['amount'], x['id']))
        assert eq(compute(op(10.0, t.amount), data), op(10.0, x['amount']))
        assert eq(compute(op(10, t.amount), data), op(10, x['amount']))
Example #15
0
def test_apply_column():
    result = compute(t.amount.apply(np.sum, 'real'), df)
    expected = np.sum(df['amount'])
    assert result == expected

    result = compute(t.amount.apply(builtins.sum, 'real'), df)
    expected = builtins.sum(df['amount'])
    assert result == expected
Example #16
0
def test_arithmetic():
    with data() as d:
        assert eq(compute(t['amount'] + t['id'], d),
                  x['amount'] + x['id'])
        assert eq(compute(t['amount'] * t['id'], d),
                  x['amount'] * x['id'])
        assert eq(compute(t['amount'] % t['id'], d),
                  x['amount'] % x['id'])
Example #17
0
def test_summary_on_series():
    ser = Series([1, 2, 3])
    s = symbol('s', '3 * int')
    expr = summary(max=s.max(), min=s.min())
    assert compute(expr, ser) == (3, 1)

    expr = summary(max=s.max(), min=s.min(), keepdims=True)
    assert compute(expr, ser) == [(3, 1)]
Example #18
0
def test_nelements_array():
    t = symbol('t', '5 * 4 * 3 * float64')
    x = np.random.randn(*t.shape)
    result = compute(t.nelements(axis=(0, 1)), x)
    np.testing.assert_array_equal(result, np.array([20, 20, 20]))

    result = compute(t.nelements(axis=1), x)
    np.testing.assert_array_equal(result, 4 * np.ones((5, 3)))
Example #19
0
def test_by_with_single_row():
    ct = bcolz.ctable([[1, 1, 3, 3], [1, 2, 3, 4]], names=list('ab'))
    t = symbol('t', discover(ct))
    subset = t[t.a == 3]
    expr = by(subset.a, b_sum=subset.b.sum())
    result = compute(expr, ct)
    expected = compute(expr, ct, optimize=False)
    tm.assert_frame_equal(result, expected)
Example #20
0
def test_std():
    amt = [row[1] for row in data]
    assert np.allclose(compute(t.amount.std(), data), np.std(amt))
    assert np.allclose(compute(t.amount.std(unbiased=True), data),
                       np.std(amt, ddof=1))
    assert np.allclose(compute(t.amount.var(), data), np.var(amt))
    assert np.allclose(compute(t.amount.var(unbiased=True), data),
                       np.var(amt, ddof=1))
Example #21
0
def test_sort():
    assert eq(compute(t.sort("amount"), x), np.sort(x, order="amount"))

    assert eq(compute(t.sort("amount", ascending=False), x), np.sort(x, order="amount")[::-1])

    assert eq(compute(t.sort(["amount", "id"]), x), np.sort(x, order=["amount", "id"]))

    assert eq(compute(t.amount.sort(), x), np.sort(x["amount"]))
Example #22
0
def test_sort():
    assert eq(compute(t.sort('amount'), x),
              np.sort(x, order='amount'))

    assert eq(compute(t.sort('amount', ascending=False), x),
              np.sort(x, order='amount')[::-1])

    assert eq(compute(t.sort(['amount', 'id']), x),
              np.sort(x, order=['amount', 'id']))
Example #23
0
def test_Distinct():
    x = np.array(
        [("Alice", 100), ("Alice", -200), ("Bob", 100), ("Bob", 100)], dtype=[("name", "S5"), ("amount", "i8")]
    )

    t = symbol("t", "var * {name: string, amount: int64}")

    assert eq(compute(t["name"].distinct(), x), np.unique(x["name"]))
    assert eq(compute(t.distinct(), x), np.unique(x))
Example #24
0
def test_concat_mat():
    s_data = np.arange(15).reshape(5, 3)
    t_data = np.arange(15, 30).reshape(5, 3)

    s = symbol("s", discover(s_data))
    t = symbol("t", discover(t_data))

    assert (compute(concat(s, t), {s: s_data, t: t_data}) == np.arange(30).reshape(10, 3)).all()
    assert (compute(concat(s, t, axis=1), {s: s_data, t: t_data}) == np.concatenate((s_data, t_data), axis=1)).all()
Example #25
0
def test_multi_dataset_broadcast():
    x = symbol('x', '3 * int')
    y = symbol('y', '3 * int')

    a = [1, 2, 3]
    b = [10, 20, 30]

    assert list(compute(x + y, {x: a, y: b})) == [11, 22, 33]
    assert list(compute(2*x + (y + 1), {x: a, y: b})) == [13, 25, 37]
Example #26
0
    def test_compound(self):
        s = t.amount.mean()
        r = compute(s, data)
        assert isinstance(r, float)

        expr = cos(s) ** 2 + sin(s) ** 2
        result = compute(expr, data)
        expected = math.cos(r) ** 2 + math.sin(r) ** 2
        assert result == expected
Example #27
0
def test_summary_on_ndarray():
    assert compute(summary(total=a.sum(), min=a.min()), ax) == \
            (ax.min(), ax.sum())

    result = compute(summary(total=a.sum(), min=a.min(), keepdims=True), ax)
    expected = np.array([(ax.min(), ax.sum())],
                        dtype=[('min', 'float32'), ('total', 'float64')])
    assert result.ndim == ax.ndim
    assert eq(expected, result)
Example #28
0
def test_sort():
    tm.assert_frame_equal(compute(t.sort('amount'), df),
                          df.sort('amount'))

    tm.assert_frame_equal(compute(t.sort('amount', ascending=True), df),
                          df.sort('amount', ascending=True))

    tm.assert_frame_equal(compute(t.sort(['amount', 'id']), df),
                          df.sort(['amount', 'id']))
Example #29
0
def test_truncate_datetime():
    s = symbol('x', 'datetime')
    assert compute(s.truncate(2, 'days'), datetime(2002, 1, 3, 12, 30)) ==\
            date(2002, 1, 2)

    s = symbol('x', 'var * datetime')
    assert list(compute(s.truncate(2, 'days'),
                        [datetime(2002, 1, 3, 12, 30)])) ==\
            [date(2002, 1, 2)]
Example #30
0
def test_sort():
    assert list(compute(t.sort('amount'), data)) == \
            sorted(data, key=lambda x: x[1], reverse=False)

    assert list(compute(t.sort('amount', ascending=True), data)) == \
            sorted(data, key=lambda x: x[1], reverse=False)

    assert list(compute(t.sort(['amount', 'id']), data)) == \
            sorted(data, key=lambda x: (x[1], x[2]), reverse=False)
Example #31
0
def test_columns_series():
    assert isinstance(compute(t['amount'], df), Series)
    assert isinstance(compute(t['amount'] > 150, df), Series)
Example #32
0
def test_reductions_on_dataframes():
    assert compute(count(t), df) == 3
    assert shape(compute(count(t, keepdims=True), df)) == (1, )
Example #33
0
def test_notnull():
    assert (compute(nt.name.notnull(), ndf) == ndf.name.notnull()).all()
Example #34
0
def test_str_concat():
    a = Series(('a', 'b', 'c'))
    s = symbol('s', "3 * string[1, 'U32']")
    expr = s + 'a'
    assert (compute(expr, a) == (a + 'a')).all()
Example #35
0
def test_neg():
    assert_series_equal(compute(-t['amount'], df), -df['amount'])
Example #36
0
def test_summary_keepdims():
    expr = summary(count=t.id.count(), sum=t.amount.sum(), keepdims=True)
    expected = DataFrame([[3, 350]], columns=['count', 'sum'])
    tm.assert_frame_equal(compute(expr, df), expected)
Example #37
0
def test_field_on_series():
    expr = symbol('s', 'var * int')
    data = Series([1, 2, 3, 4], name='s')
    assert_series_equal(compute(expr.s, data), data)
Example #38
0
def test_by_one():
    result = compute(by(t['name'], total=t['amount'].sum()), df)
    expected = df.groupby('name')['amount'].sum().reset_index()
    expected.columns = ['name', 'total']
    tm.assert_frame_equal(result, expected)
Example #39
0
def test_1d_reductions_keepdims():
    series = df['amount']
    for r in [sum, min, max, nunique, count, std, var]:
        result = compute(r(t.amount, keepdims=True), {t.amount: series})
        assert type(result) == type(series)
Example #40
0
def test_unary_op():
    assert (compute(exp(t['amount']), df) == np.exp(df['amount'])).all()
Example #41
0
def test_arithmetic():
    assert_series_equal(compute(t['amount'] + t['id'], df), df.amount + df.id)
    assert_series_equal(compute(t['amount'] * t['id'], df), df.amount * df.id)
    assert_series_equal(compute(t['amount'] % t['id'], df), df.amount % df.id)
Example #42
0
def test_count_keepdims_frame():
    df = pd.DataFrame(dict(a=[1, 2, 3, np.nan]))
    s = symbol('s', discover(df))
    assert_series_equal(compute(s.count(keepdims=True), df),
                        pd.Series([df.shape[0]], name='s_count'))
Example #43
0
def test_abs():
    assert (compute(abs(t['amount']), df) == abs(df['amount'])).all()
Example #44
0
def test_reductions():
    assert compute(mean(t['amount']), df) == 350 / 3
    assert compute(count(t['amount']), df) == 3
    assert compute(sum(t['amount']), df) == 100 + 200 + 50
    assert compute(min(t['amount']), df) == 50
    assert compute(max(t['amount']), df) == 200
    assert compute(nunique(t['amount']), df) == 3
    assert compute(nunique(t['name']), df) == 2
    assert compute(any(t['amount'] > 150), df) is True
    assert compute(any(t['amount'] > 250), df) is False
    assert compute(var(t['amount']), df) == df.amount.var(ddof=0)
    assert compute(var(t['amount'], unbiased=True), df) == df.amount.var()
    assert compute(std(t['amount']), df) == df.amount.std(ddof=0)
    assert compute(std(t['amount'], unbiased=True), df) == df.amount.std()
    assert compute(t.amount[0], df) == df.amount.iloc[0]
    assert compute(t.amount[-1], df) == df.amount.iloc[-1]
Example #45
0
def test_by_with_complex_summary():
    expr = by(t.name, total=t.amount.sum() + t.id.sum() - 1, a=t.id.min())
    result = compute(expr, df)
    assert list(result.columns) == expr.fields
    assert list(result.total) == [150 + 4 - 1, 200 + 2 - 1]
Example #46
0
def test_coerce_series():
    s = pd.Series(list('123'), name='a')
    t = symbol('t', discover(s))
    result = compute(t.coerce(to='int64'), s)
    expected = pd.Series([1, 2, 3], name=s.name)
    assert_series_equal(result, expected)
Example #47
0
def test_datetime_truncation_same_as_python():
    data = Series(['2000-01-01T12:10:00Z', '2000-06-25T12:35:12Z'],
                  dtype='M8[ns]')
    s = symbol('s', 'var * datetime')
    assert (compute(s.truncate(weeks=2),
                    data[0].to_pydatetime()) == datetime(1999, 12, 26).date())
Example #48
0
def test_eq():
    assert_series_equal(compute(t['amount'] == 100, df), df['amount'] == 100)
Example #49
0
def test_nelements():
    assert compute(t.nelements(), df) == len(df)
    assert compute(t.nrows, df) == len(df)
Example #50
0
def test_str_interp():
    a = Series(('%s', '%s', '%s'))
    s = symbol('s', "3 * string[1, 'U32']")
    expr = s.interp(1)
    assert (compute(expr, a) == (a % 1)).all()
Example #51
0
def test_series_slice():
    assert compute(t.amount[0], df) == df.amount.iloc[0]
    assert compute(t.amount[2], df) == df.amount.iloc[2]
    assert_series_equal(compute(t.amount[:2], df), df.amount.iloc[:2])
    assert_series_equal(compute(t.amount[1:3], df), df.amount.iloc[1:3])
    assert_series_equal(compute(t.amount[1::2], df), df.amount.iloc[1::2])
Example #52
0
def test_isnan():
    assert (compute(nt.amount.isnan(), ndf) == ndf.amount.isnull()).all()
Example #53
0
def test_timedelta_arith():
    series = Series(pd.date_range('2014-01-01', '2014-02-01'))
    sym = symbol('s', discover(series))
    delta = timedelta(days=1)
    assert (compute(sym + delta, series) == series + delta).all()
    assert (compute(sym - delta, series) == series - delta).all()
Example #54
0
def test_neg_projection():
    assert_series_equal(compute(-t[['amount', 'id']], df),
                        -df[['amount', 'id']])
Example #55
0
def test_selection():
    tm.assert_frame_equal(compute(t[t['amount'] == 0], df),
                          df[df['amount'] == 0])
    tm.assert_frame_equal(compute(t[t['amount'] > 150], df),
                          df[df['amount'] > 150])
Example #56
0
def test_nunique_table():
    expr = t.nunique()
    result = compute(expr, df)
    assert result == len(df.drop_duplicates())
Example #57
0
def test_strlen():
    expr = t.name.strlen()
    expected = pd.Series([5, 3, 5], name='name')
    result = compute(expr, df).reset_index(drop=True)
    assert_series_equal(expected, result)
Example #58
0
def test_isin(keys):
    expr = t[t.id.isin(keys)]
    result = compute(expr, df)
    expected = df.loc[df.id.isin(keys)]
    tm.assert_frame_equal(result, expected)
Example #59
0
def test_projection():
    tm.assert_frame_equal(compute(t[['name', 'id']], df), df[['name', 'id']])
Example #60
0
def test_str_repeat():
    a = Series(('a', 'b', 'c'))
    s = symbol('s', "3 * string[1, 'U32']")
    expr = s.repeat(3)
    assert (compute(expr, a) == (a * 3)).all()