Example #1
0
def test_by_columns():
    t = TableSymbol("t", "{name: string, amount: int32, id: int32}")

    assert len(by(t["id"], total=t["amount"].sum()).fields) == 2
    assert len(by(t["id"], count=t["id"].count()).fields) == 2
    print(by(t, count=t.count()).fields)
    assert len(by(t, count=t.count()).fields) == 4
def test_by_raises_informative_error_on_old_syntax():
    s = symbol('t', 'var * {name: string, amount: int}')
    try:
        by(s.name, s.amount.sum())
        assert False
    except ValueError as e:
        assert 'please' in str(e).lower()
Example #3
0
def test_by_count():
    (chunk, chunk_expr), (agg, agg_expr) = \
        split(t, by(t.name, total=t.amount.count()))

    assert chunk_expr.isidentical(by(chunk.name, total=chunk.amount.count()))

    assert agg_expr.isidentical(by(agg.name, total=agg.total.sum()))
Example #4
0
def test_by_columns():
    t = symbol('t', 'var * {name: string, amount: int32, id: int32}')

    assert len(by(t['id'], total=t['amount'].sum()).fields) == 2
    assert len(by(t['id'], count=t['id'].count()).fields) == 2
    print(by(t, count=t.count()).fields)
    assert len(by(t, count=t.count()).fields) == 4
Example #5
0
def test_lean_by_with_summary():
    assert lean_projection(by(t.x, total=t.y.sum()))._child.isidentical(t[["x", "y"]])

    tt = t[["x", "y"]]
    result = lean_projection(by(t.x, a=t.y.sum(), b=t.z.sum())[["x", "a"]])
    expected = Projection(By(Field(tt, "x"), summary(a=sum(Field(tt, "y")))), ("x", "a"))
    assert result.isidentical(expected)
Example #6
0
def test_by_columns():
    t = TableSymbol('t', '{name: string, amount: int32, id: int32}')

    assert len(by(t['id'], t['amount'].sum()).fields) == 2
    assert len(by(t['id'], t['id'].count()).fields) == 2
    print(by(t, t.count()).fields)
    assert len(by(t, t.count()).fields) == 4
Example #7
0
def test_count_values():
    t = symbol('t', 'var * {name: string, amount: int, city: string}')
    assert t.name.count_values(sort=False).isidentical(
        by(t.name, count=t.name.count()),
    )
    assert t.name.count_values(sort=True).isidentical(
        by(t.name, count=t.name.count()).sort('count', ascending=False),
    )
Example #8
0
def test_by():
    assert set(compute(by(t.name, sum=t.amount.sum()), c)) == \
            set([('Alice', -200), ('Bob', 200),
                 ('Charlie', 400), ('Edith', 200)])

    assert set(compute(by(t.name, count=t.amount.count()), c)) == \
            set([('Alice', 2), ('Bob', 1),
                 ('Charlie', 1), ('Edith', 1)])
Example #9
0
def test_by(bank):
    assert set(compute(by(t.name, total=t.amount.sum()), bank)) == \
        set([('Alice', 300), ('Bob', 600)])
    assert set(compute(by(t.name, min=t.amount.min()), bank)) == \
        set([('Alice', 100), ('Bob', 100)])
    assert set(compute(by(t.name, max=t.amount.max()), bank)) == \
        set([('Alice', 200), ('Bob', 300)])
    assert set(compute(by(t.name, count=t.name.count()), bank)) == \
        set([('Alice', 2), ('Bob', 3)])
Example #10
0
def test_by(bank):
    assert set(compute(by(t.name, t.amount.sum()), bank)) == \
            set([('Alice', 300), ('Bob', 600)])
    assert set(compute(by(t.name, t.amount.min()), bank)) == \
            set([('Alice', 100), ('Bob', 100)])
    assert set(compute(by(t.name, t.amount.max()), bank)) == \
            set([('Alice', 200), ('Bob', 300)])
    assert set(compute(by(t.name, t.name.count()), bank)) == \
            set([('Alice', 2), ('Bob', 3)])
Example #11
0
def test_by_sum():
    (chunk, chunk_expr), (agg, agg_expr) = \
        split(t, by(t.name, total=t.amount.sum()))

    assert chunk.schema == t.schema
    assert chunk_expr.isidentical(by(chunk.name, total=chunk.amount.sum()))

    assert not isscalar(agg.dshape.measure)
    assert agg_expr.isidentical(by(agg.name, total=agg.total.sum()))
Example #12
0
def test_by_with_single_field_child():
    x = symbol('x', 'var * int')
    (chunk, chunk_expr), (agg, agg_expr) = split(x, by(x, total=x.sum()))

    assert chunk_expr.isidentical(by(chunk, total=chunk.sum()))

    assert (agg_expr.isidentical(
        by(agg[agg.fields[0]],
           total=agg.total.sum()).relabel({agg.fields[0]: 'x'})))
Example #13
0
def test_by_sum():
    (chunk, chunk_expr), (agg, agg_expr) = \
        split(t, by(t.name, total=t.amount.sum()))

    assert chunk.schema == t.schema
    assert chunk_expr.isidentical(by(chunk.name, total=chunk.amount.sum()))

    assert not isscalar(agg.dshape.measure)
    assert agg_expr.isidentical(by(agg.name, total=agg.total.sum()))
Example #14
0
def test_by_with_single_field_child():
    x = symbol('x', 'var * int')
    (chunk, chunk_expr), (agg, agg_expr) = split(x, by(x, total=x.sum()))

    assert chunk_expr.isidentical(by(chunk, total=chunk.sum()))

    assert (agg_expr.isidentical(by(agg[agg.fields[0]],
                                    total=agg.total.sum())
            .relabel({agg.fields[0]: 'x'})))
Example #15
0
def test_summary_by():
    expr = by(t.name, summary(count=t.id.count(), sum=t.amount.sum()))
    assert str(compute(expr, df)) == \
            str(DataFrame([['Alice', 2, 150],
                           ['Bob', 1, 200]], columns=['name', 'count', 'sum']))

    expr = by(t.name, summary(count=t.id.count(), sum=(t.amount + 1).sum()))
    assert str(compute(expr, df)) == \
            str(DataFrame([['Alice', 2, 152],
                           ['Bob', 1, 201]], columns=['name', 'count', 'sum']))
Example #16
0
def test_by():
    with collection(bank) as coll:
        assert set(compute(by(t, t.name, t.amount.sum()), coll)) == \
                set([('Alice', 300), ('Bob', 600)])
        assert set(compute(by(t, t.name, t.amount.min()), coll)) == \
                set([('Alice', 100), ('Bob', 100)])
        assert set(compute(by(t, t.name, t.amount.max()), coll)) == \
                set([('Alice', 200), ('Bob', 300)])
        assert set(compute(by(t, t.name, t.name.count()), coll)) == \
                set([('Alice', 2), ('Bob', 3)])
Example #17
0
def test_by_mean():
    (chunk, chunk_expr), (agg, agg_expr) = \
        split(t, by(t.name, avg=t.amount.mean()))

    assert chunk_expr.isidentical(by(chunk.name,
                                     avg_total=chunk.amount.sum(),
                                     avg_count=chunk.amount.count()))

    assert agg_expr.isidentical(by(agg.name,
                                   avg=(agg.avg_total.sum() / agg.avg_count.sum())))
Example #18
0
def test_summary_by():
    expr = by(t.name, summary(count=t.id.count(), sum=t.amount.sum()))
    result = compute(expr, df)
    expected = DataFrame([['Alice', 2, 150], ['Bob', 1, 200]],
                         columns=['name', 'count', 'sum'])

    expr = by(t.name, summary(count=t.id.count(), sum=(t.amount + 1).sum()))
    result = compute(expr, df)
    expected = DataFrame([['Alice', 2, 152], ['Bob', 1, 201]],
                         columns=['name', 'count', 'sum'])
    tm.assert_frame_equal(result, expected)
Example #19
0
def test_summary_by():
    expr = by(t.name, summary(count=t.id.count(), sum=t.amount.sum()))
    result = compute(expr, df)
    expected = DataFrame([['Alice', 2, 150],
                          ['Bob', 1, 200]], columns=['name', 'count', 'sum'])

    expr = by(t.name, summary(count=t.id.count(), sum=(t.amount + 1).sum()))
    result = compute(expr, df)
    expected = DataFrame([['Alice', 2, 152],
                          ['Bob', 1, 201]], columns=['name', 'count', 'sum'])
    tm.assert_frame_equal(result, expected)
Example #20
0
def test_by_mean():
    (chunk, chunk_expr), (agg, agg_expr) = \
        split(t, by(t.name, avg=t.amount.mean()))

    assert chunk_expr.isidentical(
        by(chunk.name,
           avg_total=chunk.amount.sum(),
           avg_count=chunk.amount.count()))

    assert agg_expr.isidentical(
        by(agg.name, avg=(agg.avg_total.sum() / agg.avg_count.sum())))
Example #21
0
def test_lean_by_with_summary():
    assert lean_projection(by(t.x, total=t.y.sum()))._child.isidentical(
        t[['x', 'y']], )

    tt = t[['x', 'y']]
    result = lean_projection(by(t.x, a=t.y.sum(), b=t.z.sum())[['x', 'a']])
    expected = Projection(
        By(Field(tt, 'x'), summary(a=sum(Field(tt, 'y')))),
        ('x', 'a'),
    )
    assert result.isidentical(expected)
Example #22
0
def test_summary_by():
    expr = by(t.name, summary(count=t.id.count(), sum=t.amount.sum()))
    assert set(compute(expr, data)) == set([('Alice', 2, 150),
                                            ('Bob', 1, 200)])

    expr = by(t.name, summary(count=t.id.count(), sum=(t.amount + 1).sum()))
    assert set(compute(expr, data)) == set([('Alice', 2, 152),
                                            ('Bob', 1, 201)])

    expr = by(t.name, summary(count=t.id.count(), sum=t.amount.sum() + 1))
    assert set(compute(expr, data)) == set([('Alice', 2, 151),
                                            ('Bob', 1, 201)])
Example #23
0
def test_summary_by():
    expr = by(t.name, summary(count=t.id.count(), sum=t.amount.sum()))
    assert set(compute(expr, data)) == set([('Alice', 2, 150),
                                            ('Bob', 1, 200)])

    expr = by(t.name, summary(count=t.id.count(), sum=(t.amount + 1).sum()))
    assert set(compute(expr, data)) == set([('Alice', 2, 152),
                                            ('Bob', 1, 201)])

    expr = by(t.name, summary(count=t.id.count(), sum=t.amount.sum() + 1))
    assert set(compute(expr, data)) == set([('Alice', 2, 151),
                                            ('Bob', 1, 201)])
Example #24
0
def test_aliased_views_with_two_group_bys():
    expr = by(bank.name, total=bank.amount.sum())
    expr2 = by(expr.total, count=expr.name.count())

    result = compute(expr2, {bank: sql_bank, cities: sql_cities})

    assert normalize(str(result)) == normalize("""
    SELECT alias.total, count(alias.name) as count
    FROM (SELECT bank.name AS name, sum(bank.amount) AS total
          FROM bank
          GROUP BY bank.name) as alias
    GROUP BY alias.total
    """)
Example #25
0
def test_aliased_views_with_two_group_bys():
    expr = by(bank.name, total=bank.amount.sum())
    expr2 = by(expr.total, count=expr.name.count())

    result = compute(expr2, {bank: sql_bank, cities: sql_cities})

    assert normalize(str(result)) == normalize("""
    SELECT alias.total, count(alias.name) as count
    FROM (SELECT bank.name AS name, sum(bank.amount) AS total
          FROM bank
          GROUP BY bank.name) as alias
    GROUP BY alias.total
    """)
Example #26
0
def test_aliased_views_more():
    metadata = sa.MetaData()
    lhs = sa.Table('aaa', metadata,
                   sa.Column('x', sa.Integer),
                   sa.Column('y', sa.Integer),
                   sa.Column('z', sa.Integer))

    rhs = sa.Table('bbb', metadata,
                   sa.Column('w', sa.Integer),
                   sa.Column('x', sa.Integer),
                   sa.Column('y', sa.Integer))

    L = symbol('L', 'var * {x: int, y: int, z: int}')
    R = symbol('R', 'var * {w: int, x: int, y: int}')

    expr = join(by(L.x, y_total=L.y.sum()),
                R)

    result = compute(expr, {L: lhs, R: rhs})

    assert normalize(str(result)) == normalize("""
        SELECT alias.x, alias.y_total, bbb.w, bbb.y
        FROM (SELECT aaa.x as x, sum(aaa.y) as y_total
              FROM aaa
              GROUP BY aaa.x) AS alias
        JOIN bbb ON alias.x = bbb.x """)

    expr2 = by(expr.w, count=expr.x.count(), total2=expr.y_total.sum())

    result2 = compute(expr2, {L: lhs, R: rhs})

    assert (
        normalize(str(result2)) == normalize("""
            SELECT alias_2.w, count(alias_2.x) as count, sum(alias_2.y_total) as total2
            FROM (SELECT alias.x, alias.y_total, bbb.w, bbb.y
                  FROM (SELECT aaa.x as x, sum(aaa.y) as y_total
                        FROM aaa
                        GROUP BY aaa.x) AS alias
                  JOIN bbb ON alias.x = bbb.x) AS alias_2
            GROUP BY alias_2.w""")

        or

        normalize(str(result2)) == normalize("""
            SELECT bbb.w, count(alias.x) as count, sum(alias.y_total) as total2
            FROM (SELECT aaa.x as x, sum(aaa.y) as y_total
                  FROM aaa
                  GROUP BY aaa.x) as alias
              JOIN bbb ON alias.x = bbb.x
            GROUP BY bbb.w"""))
Example #27
0
def test_aliased_views_with_join():
    joined = join(bank, cities)
    expr = by(joined.city, total=joined.amount.sum())
    expr2 = by(expr.total, count=expr.city.nunique())

    result = compute(expr2, {bank: sql_bank, cities: sql_cities})

    assert normalize(str(result)) == normalize("""
    SELECT alias.total, count(DISTINCT alias.city) AS count
    FROM (SELECT cities.city AS city, sum(bank.amount) AS total
          FROM bank
          JOIN cities ON bank.name = cities.name
          GROUP BY cities.city) as alias
    GROUP BY alias.total
    """)
Example #28
0
def test_aliased_views_with_join():
    joined = join(bank, cities)
    expr = by(joined.city, total=joined.amount.sum())
    expr2 = by(expr.total, count=expr.city.nunique())

    result = compute(expr2, {bank: sql_bank, cities: sql_cities})

    assert normalize(str(result)) == normalize("""
    SELECT alias.total, count(DISTINCT alias.city) AS count
    FROM (SELECT cities.city AS city, sum(bank.amount) AS total
          FROM bank
          JOIN cities ON bank.name = cities.name
          GROUP BY cities.city) as alias
    GROUP BY alias.total
    """)
Example #29
0
def test_by_multi_column_grouper():
    t = TableSymbol('t', '{x: int, y: int, z: int}')
    expr = by(t[['x', 'y']], t['z'].count())
    data = [(1, 2, 0), (1, 2, 0), (1, 1, 0)]

    print(set(compute(expr, data)))
    assert set(compute(expr, data)) == set([(1, 2, 2), (1, 1, 1)])
Example #30
0
def compute_up(t, rdd, **kwargs):
    grouper = optimize(t.grouper, rdd)
    apply = optimize(t.apply, rdd)
    t = by(grouper, apply)
    if ((isinstance(t.apply, Reduction) and type(t.apply) in binops) or
        (isinstance(t.apply, Summary)
         and builtins.all(type(val) in binops for val in t.apply.values))):
        grouper, binop, combiner, initial = reduce_by_funcs(t)

        if isscalar(t.grouper.dshape.measure):
            keyfunc = lambda x: (x, )
        else:
            keyfunc = identity
        if isscalar(t.apply.dshape.measure):
            valfunc = lambda x: (x, )
        else:
            valfunc = identity
        unpack = lambda kv: keyfunc(kv[0]) + valfunc(kv[1])

        create = lambda v: binop(initial, v)

        return (rdd.keyBy(grouper).combineByKey(create, binop,
                                                combiner).map(unpack))
    else:
        raise NotImplementedError("By only implemented for common reductions."
                                  "\nGot %s" % type(t.apply))
Example #31
0
def test_join_by_arcs():
    df_idx = DataFrame([['A', 1],
                        ['B', 2],
                        ['C', 3]],
                      columns=['name', 'node_id'])

    df_arc = DataFrame([[1, 3],
                        [2, 3],
                        [3, 1]],
                       columns=['node_out', 'node_id'])

    t_idx = Symbol('t_idx', 'var * {name: string, node_id: int32}')

    t_arc = Symbol('t_arc', 'var * {node_out: int32, node_id: int32}')

    joined = join(t_arc, t_idx, "node_id")

    want = by(joined['name'], joined['node_id'].count())

    result = compute(want, {t_arc: df_arc, t_idx:df_idx})

    result_pandas = pd.merge(df_arc, df_idx, on='node_id')

    expected = result_pandas.groupby('name')['node_id'].count().reset_index()
    assert str(result.values) == str(expected.values)
    assert list(result.columns) == ['name', 'node_id_count']
Example #32
0
def test_by_multi_column_grouper():
    t = Symbol('t', 'var * {x: int, y: int, z: int}')
    expr = by(t[['x', 'y']], t['z'].count())
    data = [(1, 2, 0), (1, 2, 0), (1, 1, 0)]

    print(set(compute(expr, data)))
    assert set(compute(expr, data)) == set([(1, 2, 2), (1, 1, 1)])
Example #33
0
def test_by_two():
    result = compute(by(tbig[['name', 'sex']], tbig['amount'].sum()), databig)

    expected = [('Alice', 'F', 200), ('Drew', 'F', 100), ('Drew', 'M', 300)]

    print(set(result))
    assert set(result) == set(expected)
Example #34
0
def test_join_by_arcs():
    df_idx = DataFrame([['A', 1],
                        ['B', 2],
                        ['C', 3]],
                      columns=['name', 'node_id'])

    df_arc = DataFrame([[1, 3],
                        [2, 3],
                        [3, 1]],
                       columns=['node_out', 'node_id'])

    t_idx = TableSymbol('t_idx', '{name: string, node_id: int32}')

    t_arc = TableSymbol('t_arc', '{node_out: int32, node_id: int32}')

    joined = join(t_arc, t_idx, "node_id")

    want = by(joined['name'], joined['node_id'].count())

    result = compute(want, {t_arc: df_arc, t_idx:df_idx})

    result_pandas = pd.merge(df_arc, df_idx, on='node_id')

    expected = result_pandas.groupby('name')['node_id'].count().reset_index()
    assert str(result.values) == str(expected.values)
    assert list(result.columns) == ['name', 'node_id_count']
Example #35
0
def test_by():
    t = symbol('t', 'var * {name: string, amount: int32, id: int32}')
    r = by(t['name'], total=sum(t['amount']))

    print(r.schema)
    assert isinstance(r.schema[0], Record)
    assert str(r.schema[0]['name']) == 'string'
Example #36
0
def test_by():
    t = TableSymbol("t", "{name: string, amount: int32, id: int32}")
    r = by(t["name"], total=sum(t["amount"]))

    print(r.schema)
    assert isinstance(r.schema[0], Record)
    assert str(r.schema[0]["name"]) == "string"
Example #37
0
def test_path_split():
    expr = t.amount.sum() + 1
    assert path_split(t, expr).isidentical(t.amount.sum())

    expr = t.amount.distinct().sort()
    assert path_split(t, expr).isidentical(t.amount.distinct())

    t2 = transform(t, id=t.id * 2)
    expr = by(t2.id, amount=t2.amount.sum()).amount + 1
    assert path_split(t, expr).isidentical(by(t2.id, amount=t2.amount.sum()))

    expr = count(t.amount.distinct())
    assert path_split(t, expr).isidentical(t.amount.distinct())

    expr = summary(total=t.amount.sum())
    assert path_split(t, expr).isidentical(expr)
Example #38
0
def test_by():
    t = TableSymbol('t', '{name: string, amount: int32, id: int32}')
    r = by(t['name'], sum(t['amount']))

    print(r.schema)
    assert isinstance(r.schema[0], Record)
    assert str(r.schema[0]['name']) == 'string'
Example #39
0
def test_join_by_arcs():
    df_idx = DataFrame([['A', 1],
                        ['B', 2],
                        ['C', 3]],
                       columns=['name', 'node_id'])

    df_arc = DataFrame([[1, 3],
                        [2, 3],
                        [3, 1]],
                       columns=['node_out', 'node_id'])

    t_idx = symbol('t_idx', 'var * {name: string, node_id: int32}')

    t_arc = symbol('t_arc', 'var * {node_out: int32, node_id: int32}')

    joined = join(t_arc, t_idx, "node_id")

    want = by(joined['name'], count=joined['node_id'].count())

    result = compute(want, {t_arc: df_arc, t_idx: df_idx})

    result_pandas = pd.merge(df_arc, df_idx, on='node_id')

    gb = result_pandas.groupby('name')
    expected = gb.node_id.count().reset_index().rename(columns={
                                                       'node_id': 'count'
                                                       })

    tm.assert_frame_equal(result, expected)
    assert list(result.columns) == ['name', 'count']
Example #40
0
def test_path_split():
    expr = t.amount.sum() + 1
    assert path_split(t, expr).isidentical(t.amount.sum())

    expr = t.amount.distinct().sort()
    assert path_split(t, expr).isidentical(t.amount.distinct())

    t2 = transform(t, id=t.id * 2)
    expr = by(t2.id, amount=t2.amount.sum()).amount + 1
    assert path_split(t, expr).isidentical(by(t2.id, amount=t2.amount.sum()))

    expr = count(t.amount.distinct())
    assert path_split(t, expr).isidentical(t.amount.distinct())

    expr = summary(total=t.amount.sum())
    assert path_split(t, expr).isidentical(expr)
Example #41
0
def test_complex_group_by():
    expr = by(merge(tbig.amount // 10, tbig.id % 2), count=tbig.name.count())
    result = compute(expr, dfbig)  # can we do this? yes we can!
    expected = dfbig.groupby([dfbig.amount // 10,
                              dfbig.id % 2])['name'].count().reset_index()
    expected = expected.rename(columns={'name': 'count'})
    tm.assert_frame_equal(result, expected)
Example #42
0
def test_join_by_arcs():
    df_idx = DataFrame([['A', 1], ['B', 2], ['C', 3]],
                       columns=['name', 'node_id'])

    df_arc = DataFrame([[1, 3], [2, 3], [3, 1]],
                       columns=['node_out', 'node_id'])

    t_idx = symbol('t_idx', 'var * {name: string, node_id: int32}')

    t_arc = symbol('t_arc', 'var * {node_out: int32, node_id: int32}')

    joined = join(t_arc, t_idx, "node_id")

    want = by(joined['name'], count=joined['node_id'].count())

    result = compute(want, {t_arc: df_arc, t_idx: df_idx})

    result_pandas = pd.merge(df_arc, df_idx, on='node_id')

    gb = result_pandas.groupby('name')
    expected = gb.node_id.count().reset_index().rename(
        columns={'node_id': 'count'})

    tm.assert_frame_equal(result, expected)
    assert list(result.columns) == ['name', 'count']
Example #43
0
def test_agg_shape_in_tabular_case_with_explicit_chunk():
    t = symbol('t', '1000 * {name: string, amount: int, id: int}')
    c = symbol('chunk', 100 * t.schema)

    expr = by(t.name, total=t.amount.sum())
    (chunk, chunk_expr), (agg, agg_expr) = split(t, expr, chunk=c)

    assert agg.dshape == dshape('var * {name: string, total: int64}')
Example #44
0
def test_complex_group_by():
    expr = by(merge(tbig.amount // 10, tbig.id % 2),
              count=tbig.name.count())
    result = compute(expr, dfbig)  # can we do this? yes we can!
    expected = dfbig.groupby([dfbig.amount // 10,
                              dfbig.id % 2])['name'].count().reset_index()
    expected = expected.rename(columns={'name': 'count'})
    tm.assert_frame_equal(result, expected)
Example #45
0
def test_agg_shape_in_tabular_case_with_explicit_chunk():
    t = symbol('t', '1000 * {name: string, amount: int, id: int}')
    c = symbol('chunk', 100 * t.schema)

    expr = by(t.name, total=t.amount.sum())
    (chunk, chunk_expr), (agg, agg_expr) = split(t, expr, chunk=c)

    assert agg.dshape == dshape('var * {name: string, total: int64}')
Example #46
0
def test_by_four():
    t = tbig[['sex', 'amount']]
    expr = by(t['sex'], max=t['amount'].max())
    result = compute(expr, dfbig)

    expected = DataFrame([['F', 100], ['M', 200]], columns=['sex', 'max'])

    tm.assert_frame_equal(result, expected)
Example #47
0
def test_aliased_views_with_computation():
    engine = sa.create_engine('sqlite:///:memory:')

    df_aaa = DataFrame({
        'x': [1, 2, 3, 2, 3],
        'y': [2, 1, 2, 3, 1],
        'z': [3, 3, 3, 1, 2]
    })
    df_bbb = DataFrame({
        'w': [1, 2, 3, 2, 3],
        'x': [2, 1, 2, 3, 1],
        'y': [3, 3, 3, 1, 2]
    })

    df_aaa.to_sql('aaa', engine)
    df_bbb.to_sql('bbb', engine)

    metadata = sa.MetaData(engine)
    metadata.reflect()

    sql_aaa = metadata.tables['aaa']
    sql_bbb = metadata.tables['bbb']

    L = symbol('aaa', discover(df_aaa))
    R = symbol('bbb', discover(df_bbb))

    expr = join(by(L.x, y_total=L.y.sum()), R)
    a = compute(expr, {L: df_aaa, R: df_bbb})
    b = compute(expr, {L: sql_aaa, R: sql_bbb})
    assert into(set, a) == into(set, b)

    expr2 = by(expr.w, count=expr.x.count(), total2=expr.y_total.sum())
    a = compute(expr2, {L: df_aaa, R: df_bbb})
    b = compute(expr2, {L: sql_aaa, R: sql_bbb})
    assert into(set, a) == into(set, b)

    expr3 = by(expr.x, count=expr.y_total.count())
    a = compute(expr3, {L: df_aaa, R: df_bbb})
    b = compute(expr3, {L: sql_aaa, R: sql_bbb})
    assert into(set, a) == into(set, b)

    expr4 = join(expr2, R)
    a = compute(expr4, {L: df_aaa, R: df_bbb})
    b = compute(expr4, {L: sql_aaa, R: sql_bbb})
    assert into(set, a) == into(set, b)
    """ # Takes a while
Example #48
0
def test_by():
    expr = by(t['name'], total=t['amount'].sum())
    result = compute(expr, s)
    expected = sa.select([s.c.name,
                          sa.sql.functions.sum(s.c.amount).label('total')]
                         ).group_by(s.c.name)

    assert str(result) == str(expected)
Example #49
0
def test_by():
    expr = by(t['name'], total=t['amount'].sum())
    result = compute(expr, s)
    expected = sa.select(
        [s.c.name,
         sa.sql.functions.sum(s.c.amount).label('total')]).group_by(s.c.name)

    assert str(result) == str(expected)
Example #50
0
def test_by_four():
    t = tbig[['sex', 'amount']]
    expr = by(t['sex'], t['amount'].max())
    result = compute(expr, dfbig)

    expected = DataFrame([['F', 100],
                          ['M', 200]], columns=['sex', 'amount_max'])

    assert str(result) == str(expected)
Example #51
0
def test_by_on_same_column():
    df = pd.DataFrame([[1, 2], [1, 4], [2, 9]], columns=['id', 'value'])
    t = symbol('data', 'var * {id: int, value: int}')

    gby = by(t['id'], count=t['id'].count())

    expected = DataFrame([[1, 2], [2, 1]], columns=['id', 'count'])
    result = compute(gby, {t: df})
    tm.assert_frame_equal(result, expected)
Example #52
0
def test_by_three():
    result = compute(
        by(tbig[['name', 'sex']], (tbig['id'] + tbig['amount']).sum()),
        databig)

    expected = [('Alice', 'F', 204), ('Drew', 'F', 104), ('Drew', 'M', 310)]

    print(result)
    assert set(result) == set(expected)
Example #53
0
def test_by_four():
    t = tbig[['sex', 'amount']]
    expr = by(t['sex'], max=t['amount'].max())
    result = compute(expr, dfbig)

    expected = DataFrame([['F', 100],
                          ['M', 200]], columns=['sex', 'max'])

    tm.assert_frame_equal(result, expected)
Example #54
0
def test_by_on_count():
    expr = by(t.name, count=t.count())
    result = compute(expr, s)

    assert normalize(str(result)) == normalize("""
    SELECT accounts.name, count(accounts.id) AS count
    FROM accounts
    GROUP BY accounts.name
    """)
Example #55
0
def test_by_two():
    result = compute(by(tbig[['name', 'sex']], total=sum(tbig['amount'])),
                     dfbig)

    expected = DataFrame(
        [['Alice', 'F', 200], ['Drew', 'F', 100], ['Drew', 'M', 300]],
        columns=['name', 'sex', 'total'])

    tm.assert_frame_equal(result, expected)
Example #56
0
def test_summary_by():
    expr = by(t.name, summary(a=t.amount.sum(), b=t.id.count()))

    result = str(compute(expr, s))

    assert 'sum(accounts.amount) as a' in result.lower()
    assert 'count(accounts.id) as b' in result.lower()

    assert 'group by accounts.name' in result.lower()
Example #57
0
def test_by_two():
    expr = by(tbig[['name', 'sex']], total=tbig['amount'].sum())
    result = compute(expr, sbig)
    expected = (sa.select([
        sbig.c.name, sbig.c.sex,
        sa.sql.functions.sum(sbig.c.amount).label('total')
    ]).group_by(sbig.c.name, sbig.c.sex))

    assert str(result) == str(expected)
Example #58
0
def test_reduce_does_not_compose():
    expr = by(t.name, counts=t.count()).counts.max()
    result = str(compute(expr, s))
    expected = """WITH alias AS
(SELECT count(accounts.id) AS counts
FROM accounts GROUP BY accounts.name)
 SELECT max(alias.counts) AS counts_max
FROM alias"""
    assert normalize(result) == normalize(expected)
Example #59
0
def test_by_on_same_column():
    df = pd.DataFrame([[1, 2], [1, 4], [2, 9]], columns=['id', 'value'])
    t = symbol('data', 'var * {id: int, value: int}')

    gby = by(t['id'], count=t['id'].count())

    expected = DataFrame([[1, 2], [2, 1]], columns=['id', 'count'])
    result = compute(gby, {t: df})
    tm.assert_frame_equal(result, expected)