Example #1
0
def test_summary_on_ndarray():
    assert compute(summary(total=a.sum(), min=a.min()), ax) == (ax.min(), ax.sum())

    result = compute(summary(total=a.sum(), min=a.min(), keepdims=True), ax)
    expected = np.array([(ax.min(), ax.sum())], dtype=[("min", "float32"), ("total", "float64")])
    assert result.ndim == ax.ndim
    assert eq(expected, result)
Example #2
0
def test_summary_axis():
    x = symbol('x', '5 * 3 * float32')
    assert summary(a=x.min(), b=x.max(), axis=0).dshape == \
            dshape('3 * {a: float32, b: float32}')
    assert summary(a=x.min(), b=x.max(), axis=1).dshape == \
            dshape('5 * {a: float32, b: float32}')
    assert summary(a=x.min(), b=x.max(), axis=1, keepdims=True).dshape == \
            dshape('5 * 1 * {a: float32, b: float32}')
Example #3
0
def test_summary_on_series():
    ser = Series([1, 2, 3])
    s = symbol('s', '3 * int')
    expr = summary(max=s.max(), min=s.min())
    assert compute(expr, ser) == (3, 1)

    expr = summary(max=s.max(), min=s.min(), keepdims=True)
    assert compute(expr, ser) == [(3, 1)]
Example #4
0
def test_summary_on_series():
    ser = dd.from_pandas(pd.Series([1, 2, 3]), npartitions=2)
    s = symbol('s', '3 * int')
    expr = summary(max=s.max(), min=s.min())
    assert compute(expr, ser) == (3, 1)

    expr = summary(max=s.max(), min=s.min(), keepdims=True)
    assert compute(expr, ser) == [(3, 1)]
Example #5
0
def test_summary_on_series():
    ser = dd.from_pandas(pd.Series([1, 2, 3]), npartitions=2)
    s = symbol('s', '3 * int')
    expr = summary(max=s.max(), min=s.min())
    assert compute(expr, ser) == (3, 1)

    expr = summary(max=s.max(), min=s.min(), keepdims=True)
    assert compute(expr, ser) == [(3, 1)]
Example #6
0
def test_summary_on_series():
    ser = Series([1, 2, 3])
    s = symbol('s', '3 * int')
    expr = summary(max=s.max(), min=s.min())
    assert compute(expr, ser) == (3, 1)

    expr = summary(max=s.max(), min=s.min(), keepdims=True)
    assert compute(expr, ser) == [(3, 1)]
Example #7
0
def test_summary_axis():
    x = symbol('x', '5 * 3 * float32')
    assert summary(a=x.min(), b=x.max(), axis=0).dshape == \
            dshape('3 * {a: float32, b: float32}')
    assert summary(a=x.min(), b=x.max(), axis=1).dshape == \
            dshape('5 * {a: float32, b: float32}')
    assert summary(a=x.min(), b=x.max(), axis=1, keepdims=True).dshape == \
            dshape('5 * 1 * {a: float32, b: float32}')
Example #8
0
def test_summary_on_ndarray():
    assert compute(summary(total=a.sum(), min=a.min()), ax) == \
            (ax.min(), ax.sum())

    result = compute(summary(total=a.sum(), min=a.min(), keepdims=True), ax)
    expected = np.array([(ax.min(), ax.sum())],
                        dtype=[('min', 'float32'), ('total', 'float64')])
    assert result.ndim == ax.ndim
    assert eq(expected, result)
Example #9
0
def test_summary_on_ndarray():
    assert compute(summary(total=a.sum(), min=a.min()), ax) == \
            (ax.min(), ax.sum())

    result = compute(summary(total=a.sum(), min=a.min(), keepdims=True), ax)
    expected = np.array([(ax.min(), ax.sum())],
                        dtype=[('min', 'float32'), ('total', 'float64')])
    assert result.ndim == ax.ndim
    assert eq(expected, result)
Example #10
0
def test_summary_by():
    expr = by(t.name, summary(count=t.id.count(), sum=t.amount.sum()))
    assert str(compute(expr, df)) == \
            str(DataFrame([['Alice', 2, 150],
                           ['Bob', 1, 200]], columns=['name', 'count', 'sum']))

    expr = by(t.name, summary(count=t.id.count(), sum=(t.amount + 1).sum()))
    assert str(compute(expr, df)) == \
            str(DataFrame([['Alice', 2, 152],
                           ['Bob', 1, 201]], columns=['name', 'count', 'sum']))
Example #11
0
def test_summary_by():
    expr = by(t.name, summary(count=t.id.count(), sum=t.amount.sum()))
    result = compute(expr, df)
    expected = DataFrame([['Alice', 2, 150],
                          ['Bob', 1, 200]], columns=['name', 'count', 'sum'])

    expr = by(t.name, summary(count=t.id.count(), sum=(t.amount + 1).sum()))
    result = compute(expr, df)
    expected = DataFrame([['Alice', 2, 152],
                          ['Bob', 1, 201]], columns=['name', 'count', 'sum'])
    tm.assert_frame_equal(result, expected)
Example #12
0
def test_summary_by():
    expr = by(t.name, summary(count=t.id.count(), sum=t.amount.sum()))
    result = compute(expr, df)
    expected = DataFrame([['Alice', 2, 150], ['Bob', 1, 200]],
                         columns=['name', 'count', 'sum'])

    expr = by(t.name, summary(count=t.id.count(), sum=(t.amount + 1).sum()))
    result = compute(expr, df)
    expected = DataFrame([['Alice', 2, 152], ['Bob', 1, 201]],
                         columns=['name', 'count', 'sum'])
    tm.assert_frame_equal(result, expected)
Example #13
0
def test_summary_by():
    expr = by(t.name, summary(count=t.id.count(), sum=t.amount.sum()))
    assert set(compute(expr, data)) == set([('Alice', 2, 150),
                                            ('Bob', 1, 200)])

    expr = by(t.name, summary(count=t.id.count(), sum=(t.amount + 1).sum()))
    assert set(compute(expr, data)) == set([('Alice', 2, 152),
                                            ('Bob', 1, 201)])

    expr = by(t.name, summary(count=t.id.count(), sum=t.amount.sum() + 1))
    assert set(compute(expr, data)) == set([('Alice', 2, 151),
                                            ('Bob', 1, 201)])
Example #14
0
def test_summary_by():
    expr = by(t.name, summary(count=t.id.count(), sum=t.amount.sum()))
    assert set(compute(expr, data)) == set([('Alice', 2, 150),
                                            ('Bob', 1, 200)])

    expr = by(t.name, summary(count=t.id.count(), sum=(t.amount + 1).sum()))
    assert set(compute(expr, data)) == set([('Alice', 2, 152),
                                            ('Bob', 1, 201)])

    expr = by(t.name, summary(count=t.id.count(), sum=t.amount.sum() + 1))
    assert set(compute(expr, data)) == set([('Alice', 2, 151),
                                            ('Bob', 1, 201)])
Example #15
0
def test_summary():
    t = TableSymbol("t", "{id: int32, name: string, amount: int32}")
    s = summary(total=t.amount.sum(), num=t.id.count())
    assert s.dshape == dshape("{num: int32, total: int64}")
    assert hash(s)
    assert eval(str(s)).isidentical(s)

    assert "summary(" in str(s)
    assert "total=" in str(s)
    assert "num=" in str(s)
    assert str(t.amount.sum()) in str(s)

    assert not summary(total=t.amount.sum())._child.isidentical(t.amount.sum())
    assert iscollection(summary(total=t.amount.sum() + 1)._child.dshape)
Example #16
0
def test_summary():
    t = symbol('t', 'var * {id: int32, name: string, amount: int32}')
    s = summary(total=t.amount.sum(), num=t.id.count())
    assert s.dshape == dshape('{num: int32, total: int64}')
    assert hash(s)
    assert eval(str(s)).isidentical(s)

    assert 'summary(' in str(s)
    assert 'total=' in str(s)
    assert 'num=' in str(s)
    assert str(t.amount.sum()) in str(s)

    assert not summary(total=t.amount.sum())._child.isidentical(t.amount.sum())
    assert iscollection(summary(total=t.amount.sum() + 1)._child.dshape)
Example #17
0
def test_summary_with_mean():
    (chunk, chunk_expr), (agg, agg_expr) = split(t, summary(a=t.amount.count(),
                                                            b=t.id.mean() + 1))

    assert chunk.schema == t.schema
    assert chunk_expr.isidentical(summary(a=chunk.amount.count(),
                                          b_total=chunk.id.sum(),
                                          b_count=chunk.id.count(),
                                          keepdims=True))

    # assert not agg.schema == dshape('{a: int32, b: int32}')
    expected = summary(a=agg.a.sum(),
                       b=(agg.b_total.sum() / agg.b_count.sum()) + 1)
    assert agg_expr.isidentical(expected)
Example #18
0
def test_summary():
    t = TableSymbol('t', '{id: int32, name: string, amount: int32}')
    s = summary(total=t.amount.sum(), num=t.id.count())
    assert s.dshape == dshape('{num: int32, total: int32}')
    assert hash(s)
    assert eval(str(s)).isidentical(s)

    assert 'summary(' in str(s)
    assert 'total=' in str(s)
    assert 'num=' in str(s)
    assert str(t.amount.sum()) in str(s)

    assert not summary(total=t.amount.sum()).child.isidentical(
            t.amount.sum())
    assert isinstance(summary(total=t.amount.sum() + 1).child, TableExpr)
Example #19
0
def test_summary_with_mean():
    (chunk, chunk_expr), (agg, agg_expr) = split(
        t, summary(a=t.amount.count(), b=t.id.mean() + 1))

    assert chunk.schema == t.schema
    assert chunk_expr.isidentical(
        summary(a=chunk.amount.count(),
                b_total=chunk.id.sum(),
                b_count=chunk.id.count(),
                keepdims=True))

    # assert not agg.schema == dshape('{a: int32, b: int32}')
    expected = summary(a=agg.a.sum(),
                       b=(agg.b_total.sum() / agg.b_count.sum()) + 1)
    assert agg_expr.isidentical(expected)
Example #20
0
def test_elemwise_with_multiple_paths():
    s = symbol('s', 'var * {x: int, y: int, z: int}')
    expr = s.x.sum() / s.y.sum()

    (chunk, chunk_expr), (agg, agg_expr) = split(s, expr)
    assert chunk_expr.isidentical(summary(x=chunk.x.sum(), y=chunk.y.sum()))
    assert agg_expr.isidentical(agg.x / agg.y)
Example #21
0
def test_elemwise_with_multiple_paths():
    s = symbol('s', 'var * {x: int, y: int, z: int}')
    expr = s.x.sum() / s.y.sum()

    (chunk, chunk_expr), (agg, agg_expr) = split(s, expr)
    assert chunk_expr.isidentical(summary(x=chunk.x.sum(), y=chunk.y.sum()))
    assert agg_expr.isidentical(agg.x / agg.y)
Example #22
0
def test_lean_by_with_summary():
    assert lean_projection(by(t.x, total=t.y.sum()))._child.isidentical(t[["x", "y"]])

    tt = t[["x", "y"]]
    result = lean_projection(by(t.x, a=t.y.sum(), b=t.z.sum())[["x", "a"]])
    expected = Projection(By(Field(tt, "x"), summary(a=sum(Field(tt, "y")))), ("x", "a"))
    assert result.isidentical(expected)
Example #23
0
def test_summary_clean():
    t2 = t[t.amount > 0]
    expr = summary(a=t2.amount.sum(), b=t2.id.count())
    result = str(compute(expr, s))

    assert normalize(result) == normalize("""
    SELECT sum(accounts.amount) as a, count(accounts.id) as b
    FROM accounts
    WHERE accounts.amount > :amount_1""")
Example #24
0
def test_summary_by():
    expr = by(t.name, summary(a=t.amount.sum(), b=t.id.count()))

    result = str(compute(expr, s))

    assert 'sum(accounts.amount) as a' in result.lower()
    assert 'count(accounts.id) as b' in result.lower()

    assert 'group by accounts.name' in result.lower()
Example #25
0
def test_summary_clean():
    t2 = t[t.amount > 0]
    expr = summary(a=t2.amount.sum(), b=t2.id.count())
    result = str(compute(expr, s))

    assert normalize(result) == normalize("""
    SELECT sum(accounts.amount) as a, count(accounts.id) as b
    FROM accounts
    WHERE accounts.amount > :amount_1""")
Example #26
0
def test_summary_by():
    expr = by(t.name, summary(a=t.amount.sum(), b=t.id.count()))

    result = str(compute(expr, s))

    assert 'sum(accounts.amount) as a' in result.lower()
    assert 'count(accounts.id) as b' in result.lower()

    assert 'group by accounts.name' in result.lower()
Example #27
0
def test_mean():
    (chunk, chunk_expr), (agg, agg_expr) = split(t, t.amount.mean())

    assert chunk.schema == t.schema
    assert chunk_expr.isidentical(
        summary(total=chunk.amount.sum(),
                count=chunk.amount.count(),
                keepdims=True))
    assert isrecord(agg.dshape.measure)
    assert agg_expr.isidentical(agg.total.sum() / agg['count'].sum())
Example #28
0
def test_complex_summaries():
    t = symbol('t', '100 * {a: int, b: int}')
    (chunk, chunk_expr), (agg, agg_expr) = split(t, summary(q=t.a.mean(),
                                                            w=t.a.std(),
                                                            e=t.a.sum()))

    assert chunk_expr.isidentical(summary(e=chunk.a.sum(),
                                          q_count=chunk.a.count(),
                                          q_total=chunk.a.sum(),
                                          w_n=chunk.a.count(),
                                          w_x=chunk.a.sum(),
                                          w_x2=(chunk.a ** 2).sum(),
                                          keepdims=True))

    expected = summary(e=agg.e.sum(),
                       q=agg.q_total.sum() / agg.q_count.sum(),
                       w=sqrt((agg.w_x2.sum() / agg.w_n.sum())
                              - (agg.w_x.sum() / agg.w_n.sum()) ** 2))
    assert agg_expr.isidentical(expected)
Example #29
0
def test_complex_summaries():
    t = symbol('t', '100 * {a: int, b: int}')
    (chunk, chunk_expr), (agg, agg_expr) = split(
        t, summary(q=t.a.mean(), w=t.a.std(), e=t.a.sum()))

    assert chunk_expr.isidentical(
        summary(e=chunk.a.sum(),
                q_count=chunk.a.count(),
                q_total=chunk.a.sum(),
                w_n=chunk.a.count(),
                w_x=chunk.a.sum(),
                w_x2=(chunk.a**2).sum(),
                keepdims=True))

    expected = summary(e=agg.e.sum(),
                       q=agg.q_total.sum() / agg.q_count.sum(),
                       w=sqrt((agg.w_x2.sum() / agg.w_n.sum()) -
                              (agg.w_x.sum() / agg.w_n.sum())**2))
    assert agg_expr.isidentical(expected)
Example #30
0
def test_mean():
    (chunk, chunk_expr), (agg, agg_expr) = split(t, t.amount.mean())

    assert chunk.schema == t.schema
    assert chunk_expr.isidentical(summary(total=chunk.amount.sum(),
                                          count=chunk.amount.count(),
                                          keepdims=True))

    assert isrecord(agg.dshape.measure)
    assert agg_expr.isidentical(agg.total.sum() / agg.count.sum())
Example #31
0
def test_lean_by_with_summary():
    assert lean_projection(by(t.x, total=t.y.sum()))._child.isidentical(
        t[['x', 'y']], )

    tt = t[['x', 'y']]
    result = lean_projection(by(t.x, a=t.y.sum(), b=t.z.sum())[['x', 'a']])
    expected = Projection(
        By(Field(tt, 'x'), summary(a=sum(Field(tt, 'y')))),
        ('x', 'a'),
    )
    assert result.isidentical(expected)
Example #32
0
def test_summary_on_ndarray_with_axis():
    for axis in [0, 1, (1, 0)]:
        expr = summary(total=a.sum(), min=a.min(), axis=axis)
        result = compute(expr, ax)

        shape, dtype = to_numpy(expr.dshape)
        expected = np.empty(shape=shape, dtype=dtype)
        expected['total'] = ax.sum(axis=axis)
        expected['min'] = ax.min(axis=axis)

        assert eq(result, expected)
Example #33
0
def test_summary_on_ndarray_with_axis():
    for axis in [0, 1, (1, 0)]:
        expr = summary(total=a.sum(), min=a.min(), axis=axis)
        result = compute(expr, ax)

        shape, dtype = to_numpy(expr.dshape)
        expected = np.empty(shape=shape, dtype=dtype)
        expected['total'] = ax.sum(axis=axis)
        expected['min'] = ax.min(axis=axis)

        assert eq(result, expected)
Example #34
0
def test_std():
    (chunk, chunk_expr), (agg, agg_expr) = split(t, t.amount.std())

    assert chunk.schema == t.schema
    assert chunk_expr.isidentical(summary(x=chunk.amount.sum(),
                                          x2=(chunk.amount ** 2).sum(),
                                          n=chunk.amount.count(),
                                          keepdims=True))

    assert isrecord(agg.dshape.measure)
    assert agg_expr.isidentical(sqrt((agg.x2.sum() / (agg.n.sum())
                                      - (agg.x.sum() / (agg.n.sum())) ** 2)))
Example #35
0
def test_var():
    (chunk, chunk_expr), (agg, agg_expr) = split(t, t.amount.var())

    assert chunk.schema == t.schema
    assert chunk_expr.isidentical(
        summary(x=chunk.amount.sum(),
                x2=(chunk.amount**2).sum(),
                n=chunk.amount.count(),
                keepdims=True))

    assert isrecord(agg.dshape.measure)
    assert agg_expr.isidentical(
        (agg.x2.sum() / (agg.n.sum()) - (agg.x.sum() / (agg.n.sum()))**2))
Example #36
0
def test_path_split():
    expr = t.amount.sum() + 1
    assert path_split(t, expr).isidentical(t.amount.sum())

    expr = t.amount.distinct().sort()
    assert path_split(t, expr).isidentical(t.amount.distinct())

    t2 = transform(t, id=t.id * 2)
    expr = by(t2.id, amount=t2.amount.sum()).amount + 1
    assert path_split(t, expr).isidentical(by(t2.id, amount=t2.amount.sum()))

    expr = count(t.amount.distinct())
    assert path_split(t, expr).isidentical(t.amount.distinct())

    expr = summary(total=t.amount.sum())
    assert path_split(t, expr).isidentical(expr)
Example #37
0
def test_path_split():
    expr = t.amount.sum() + 1
    assert path_split(t, expr).isidentical(t.amount.sum())

    expr = t.amount.distinct().sort()
    assert path_split(t, expr).isidentical(t.amount.distinct())

    t2 = transform(t, id=t.id * 2)
    expr = by(t2.id, amount=t2.amount.sum()).amount + 1
    assert path_split(t, expr).isidentical(by(t2.id, amount=t2.amount.sum()))

    expr = count(t.amount.distinct())
    assert path_split(t, expr).isidentical(t.amount.distinct())

    expr = summary(total=t.amount.sum())
    assert path_split(t, expr).isidentical(expr)
Example #38
0
def test_join_on_same_table():
    metadata = sa.MetaData()
    T = sa.Table(
        'tab',
        metadata,
        sa.Column('a', sa.Integer),
        sa.Column('b', sa.Integer),
    )

    t = symbol('tab', discover(T))
    expr = join(t, t, 'a')

    result = compute(expr, {t: T})

    assert normalize(str(result)) == normalize("""
    SELECT tab_left.a, tab_left.b, tab_right.b
    FROM tab AS tab_left JOIN tab AS tab_right
    ON tab_left.a = tab_right.a
    """)

    expr = join(t, t, 'a').b_left.sum()

    result = compute(expr, {t: T})

    assert normalize(str(result)) == normalize("""
   with alias as
    (select tab_left.b as b
     from tab as tab_left
        join tab as tab_right
        on tab_left.a = tab_right.a)
    select sum(alias.b) as b_left_sum from alias""")

    expr = join(t, t, 'a')
    expr = summary(total=expr.a.sum(), smallest=expr.b_right.min())

    result = compute(expr, {t: T})

    assert normalize(str(result)) == normalize("""
    SELECT min(tab_right.b) as smallest, sum(tab_left.a) as total
    FROM tab AS tab_left JOIN tab AS tab_right
    ON tab_left.a = tab_right.a
    """)
Example #39
0
def test_join_on_same_table():
    metadata = sa.MetaData()
    T = sa.Table('tab', metadata,
                 sa.Column('a', sa.Integer),
                 sa.Column('b', sa.Integer),
                 )

    t = symbol('tab', discover(T))
    expr = join(t, t, 'a')

    result = compute(expr, {t: T})

    assert normalize(str(result)) == normalize("""
    SELECT tab_left.a, tab_left.b, tab_right.b
    FROM tab AS tab_left JOIN tab AS tab_right
    ON tab_left.a = tab_right.a
    """)

    expr = join(t, t, 'a').b_left.sum()

    result = compute(expr, {t: T})

    assert normalize(str(result)) == normalize("""
   with alias as
    (select tab_left.b as b
     from tab as tab_left
        join tab as tab_right
        on tab_left.a = tab_right.a)
    select sum(alias.b) as b_left_sum from alias""")

    expr = join(t, t, 'a')
    expr = summary(total=expr.a.sum(), smallest=expr.b_right.min())

    result = compute(expr, {t: T})

    assert normalize(str(result)) == normalize("""
    SELECT min(tab_right.b) as smallest, sum(tab_left.a) as total
    FROM tab AS tab_left JOIN tab AS tab_right
    ON tab_left.a = tab_right.a
    """)
Example #40
0
def test_summary():
    (chunk, chunk_expr), (agg, agg_expr) = split(
        t, summary(a=t.amount.count(), b=t.id.sum() + 1))

    assert chunk.schema == t.schema
    assert chunk_expr.isidentical(
        summary(a=chunk.amount.count(), b=chunk.id.sum(), keepdims=True))

    # assert not agg.schema == dshape('{a: int32, b: int32}')
    assert agg_expr.isidentical(summary(a=agg.a.sum(), b=agg.b.sum() + 1))

    (chunk, chunk_expr), (agg, agg_expr) = \
        split(t, summary(total=t.amount.sum()))

    assert chunk_expr.isidentical(
        summary(total=chunk.amount.sum(), keepdims=True))
    assert agg_expr.isidentical(summary(total=agg.total.sum()))
Example #41
0
def test_summary():
    (chunk, chunk_expr), (agg, agg_expr) = split(t, summary(a=t.amount.count(),
                                                            b=t.id.sum() + 1))

    assert chunk.schema == t.schema
    assert chunk_expr.isidentical(summary(a=chunk.amount.count(),
                                          b=chunk.id.sum(), keepdims=True))

    # assert not agg.schema == dshape('{a: int32, b: int32}')
    assert agg_expr.isidentical(summary(a=agg.a.sum(),
                                        b=agg.b.sum() + 1))

    (chunk, chunk_expr), (agg, agg_expr) = \
        split(t, summary(total=t.amount.sum()))

    assert chunk_expr.isidentical(summary(total=chunk.amount.sum(),
                                          keepdims=True))
    assert agg_expr.isidentical(summary(total=agg.total.sum()))
Example #42
0
def test_summary_keepdims():
    x = symbol('x', '5 * 3 * float32')
    assert summary(a=x.min(), b=x.max()).dshape == \
            dshape('{a: float32, b: float32}')
    assert summary(a=x.min(), b=x.max(), keepdims=True).dshape == \
            dshape('1 * 1 * {a: float32, b: float32}')
Example #43
0
def test_summary_with_multiple_children():
    t = symbol('t', 'var * {x: int, y: int, z: int}')

    assert summary(a=t.x.sum() + t.y.sum())._child.isidentical(t)
Example #44
0
def test_summary():
    expr = summary(count=t.id.count(), sum=t.amount.sum())
    assert str(compute(expr, df)) == str(Series({'count': 3, 'sum': 350}))
Example #45
0
def test_by_summary():
    t = symbol('t', 'var * {name: string, amount: int32, id: int32}')
    a = by(t['name'], sum=sum(t['amount']))
    b = by(t['name'], summary(sum=sum(t['amount'])))

    assert a.isidentical(b)
Example #46
0
def test_summary():
    expr = summary(count=t.id.count(), sum=t.amount.sum())
    assert_series_equal(compute(expr, df), Series({'count': 3, 'sum': 350}))
Example #47
0
def test_summary():
    expr = summary(count=t.id.count(), sum=t.amount.sum())
    assert_series_equal(compute(expr, df), Series({'count': 3, 'sum': 350}))
Example #48
0
def test_summary_keepdims():
    expr = summary(count=t.id.count(), sum=t.amount.sum(), keepdims=True)
    expected = DataFrame([[3, 350]], columns=['count', 'sum'])
    tm.assert_frame_equal(compute(expr, df), expected)
Example #49
0
def test_summary():
    expr = summary(count=t.id.count(), sum=t.amount.sum())
    assert compute(expr, data) == (3, 350)
Example #50
0
def test_summary_by_reduction_arithmetic():
    expr = by(t.name, summary(count=t.id.count(), sum=t.amount.sum() + 1))
    assert str(compute(expr, df)) == \
            str(DataFrame([['Alice', 2, 151],
                           ['Bob', 1, 202]], columns=['name', 'count', 'sum']))
Example #51
0
def test_summary_by_reduction_arithmetic():
    expr = by(t.name, summary(count=t.id.count(), sum=t.amount.sum() + 1))
    result = compute(expr, df)
    expected = DataFrame([['Alice', 2, 151],
                          ['Bob', 1, 201]], columns=['name', 'count', 'sum'])
    tm.assert_frame_equal(result, expected)
Example #52
0
def test_summary():
    expr = summary(count=t.id.count(), sum=t.amount.sum())
    eq(compute(expr, ddf), pd.Series({'count': 3, 'sum': 350}))
Example #53
0
def test_summary_keepdims():
    expr = summary(count=t.id.count(), sum=t.amount.sum(), keepdims=True)
    expected = DataFrame([[3, 350]], columns=['count', 'sum'])
    tm.assert_frame_equal(compute(expr, df), expected)
Example #54
0
def test_summary_by_reduction_arithmetic():
    expr = by(t.name, summary(count=t.id.count(), sum=t.amount.sum() + 1))
    result = compute(expr, df)
    expected = DataFrame([['Alice', 2, 151], ['Bob', 1, 201]],
                         columns=['name', 'count', 'sum'])
    tm.assert_frame_equal(result, expected)
Example #55
0
def test_by_summary():
    t = TableSymbol('t', '{name: string, amount: int32, id: int32}')
    a = by(t['name'], sum=sum(t['amount']))
    b = by(t['name'], summary(sum=sum(t['amount'])))

    assert a.isidentical(b)
Example #56
0
def test_summary_str():
    x = symbol('x', '5 * 3 * float32')
    assert 'keepdims' not in str(summary(a=x.min(), b=x.max()))