Beispiel #1
0
def test_outer_join():
    left = [(1, 'Alice', 100), (2, 'Bob', 200), (4, 'Dennis', 400)]
    right = [('NYC', 1), ('Boston', 1), ('LA', 3), ('Moscow', 4)]

    L = Symbol('L', 'var * {id: int, name: string, amount: real}')
    R = Symbol('R', 'var * {city: string, id: int}')

    assert set(compute(join(L, R), {
        L: left,
        R: right
    })) == set([(1, 'Alice', 100, 'NYC'), (1, 'Alice', 100, 'Boston'),
                (4, 'Dennis', 400, 'Moscow')])

    assert set(compute(join(L, R, how='left'), {
        L: left,
        R: right
    })) == set([(1, 'Alice', 100, 'NYC'), (1, 'Alice', 100, 'Boston'),
                (2, 'Bob', 200, None), (4, 'Dennis', 400, 'Moscow')])

    assert set(compute(join(L, R, how='right'), {
        L: left,
        R: right
    })) == set([(1, 'Alice', 100, 'NYC'), (1, 'Alice', 100, 'Boston'),
                (3, None, None, 'LA'), (4, 'Dennis', 400, 'Moscow')])

    assert set(compute(join(L, R, how='outer'), {
        L: left,
        R: right
    })) == set([(1, 'Alice', 100, 'NYC'), (1, 'Alice', 100, 'Boston'),
                (2, 'Bob', 200, None), (3, None, None, 'LA'),
                (4, 'Dennis', 400, 'Moscow')])
Beispiel #2
0
def test_outer_join():
    left = [(1, 'Alice', 100),
            (2, 'Bob', 200),
            (4, 'Dennis', 400)]
    right = [('NYC', 1),
             ('Boston', 1),
             ('LA', 3),
             ('Moscow', 4)]

    L = TableSymbol('L', '{id: int, name: string, amount: real}')
    R = TableSymbol('R', '{city: string, id: int}')

    assert set(compute(join(L, R), {L: left, R: right})) == set(
            [(1, 'Alice', 100, 'NYC'),
             (1, 'Alice', 100, 'Boston'),
             (4, 'Dennis', 400, 'Moscow')])

    assert set(compute(join(L, R, how='left'), {L: left, R: right})) == set(
            [(1, 'Alice', 100, 'NYC'),
             (1, 'Alice', 100, 'Boston'),
             (2, 'Bob', 200, None),
             (4, 'Dennis', 400, 'Moscow')])

    assert set(compute(join(L, R, how='right'), {L: left, R: right})) == set(
            [(1, 'Alice', 100, 'NYC'),
             (1, 'Alice', 100, 'Boston'),
             (3, None, None, 'LA'),
             (4, 'Dennis', 400, 'Moscow')])

    assert set(compute(join(L, R, how='outer'), {L: left, R: right})) == set(
            [(1, 'Alice', 100, 'NYC'),
             (1, 'Alice', 100, 'Boston'),
             (2, 'Bob', 200, None),
             (3, None, None, 'LA'),
             (4, 'Dennis', 400, 'Moscow')])
Beispiel #3
0
def test_graph_double_join():
    idx = [['A', 1], ['B', 2], ['C', 3], ['D', 4], ['E', 5], ['F', 6]]

    arc = [[1, 3], [2, 3], [4, 3], [5, 3], [3, 1], [2, 1], [5, 1], [1, 6],
           [2, 6], [4, 6]]

    wanted = [['A'], ['F']]

    t_idx = symbol('t_idx', 'var * {name: string, b: int32}')
    t_arc = symbol('t_arc', 'var * {a: int32, b: int32}')
    t_wanted = symbol('t_wanted', 'var * {name: string}')

    # >>> compute(join(t_idx, t_arc, 'b'), {t_idx: idx, t_arc: arc})
    # [[1, A, 3],
    #  [1, A, 2],
    #  [1, A, 5],
    #  [3, C, 1],
    #  [3, C, 2],
    #  [3, C, 4],
    #  [3, C, 5],
    #  [6, F, 1],
    #  [6, F, 2],
    #  [6, F, 4]]

    j = join(join(t_idx, t_arc, 'b'), t_wanted, 'name')[['name', 'b', 'a']]

    result = compute(j, {t_idx: idx, t_arc: arc, t_wanted: wanted})
    result = sorted(map(tuple, result))
    expected = sorted([('A', 1, 3), ('A', 1, 2), ('A', 1, 5), ('F', 6, 1),
                       ('F', 6, 2), ('F', 6, 4)])

    assert result == expected
Beispiel #4
0
def test_join():
    t = TableSymbol('t', '{name: string, amount: int}')
    s = TableSymbol('t', '{name: string, id: int}')
    j = join(t, s, 'name', 'name')

    assert j.schema == dshape('{name: string, amount: int, id: int}')

    assert join(t, s, 'name') == join(t, s, 'name')
Beispiel #5
0
def test_leaves():
    t = TableSymbol("t", "{id: int32, name: string}")
    v = TableSymbol("v", "{id: int32, city: string}")
    x = symbol("x", "int32")

    assert t._leaves() == [t]
    assert t.id._leaves() == [t]
    assert by(t.name, count=t.id.nunique())._leaves() == [t]
    assert join(t, v)._leaves() == [t, v]
    assert join(v, t)._leaves() == [v, t]

    assert (x + 1)._leaves() == [x]
Beispiel #6
0
def test_leaves():
    t = TableSymbol('t', '{id: int32, name: string}')
    v = TableSymbol('v', '{id: int32, city: string}')
    x = Symbol('x', 'int32')

    assert t._leaves() == [t]
    assert t.id._leaves() == [t]
    assert by(t.name, t.id.nunique())._leaves() == [t]
    assert join(t, v)._leaves() == [t, v]
    assert join(v, t)._leaves() == [v, t]

    assert (x + 1)._leaves() == [x]
Beispiel #7
0
def test_leaves():
    t = symbol('t', 'var * {id: int32, name: string}')
    v = symbol('v', 'var * {id: int32, city: string}')
    x = symbol('x', 'int32')

    assert t._leaves() == [t]
    assert t.id._leaves() == [t]
    assert by(t.name, count=t.id.nunique())._leaves() == [t]
    assert join(t, v)._leaves() == [t, v]
    assert join(v, t)._leaves() == [v, t]

    assert (x + 1)._leaves() == [x]
Beispiel #8
0
def test_join_on_single_column():
    expr = join(cities[['name']], bank)
    result = compute(expr, {bank: sql_bank, cities: sql_cities})

    assert normalize(str(result)) == """
    SELECT bank.id, bank.name, bank.amount
    FROM bank join cities ON bank.name = cities.name"""

    expr = join(bank, cities.name)
    result = compute(expr, {bank: sql_bank, cities: sql_cities})

    assert normalize(str(result)) == """
Beispiel #9
0
def test_join_on_single_column():
    expr = join(cities[['name']], bank)
    result = compute(expr, {bank: sql_bank, cities: sql_cities})

    assert normalize(str(result)) == """
    SELECT bank.id, bank.name, bank.amount
    FROM bank join cities ON bank.name = cities.name"""

    expr = join(bank, cities.name)
    result = compute(expr, {bank: sql_bank, cities: sql_cities})

    assert normalize(str(result)) == """
Beispiel #10
0
def test_clean_join():
    metadata = sa.MetaData()
    name = sa.Table(
        'name',
        metadata,
        sa.Column('id', sa.Integer),
        sa.Column('name', sa.String),
    )
    city = sa.Table(
        'place',
        metadata,
        sa.Column('id', sa.Integer),
        sa.Column('city', sa.String),
        sa.Column('country', sa.String),
    )
    friends = sa.Table(
        'friends',
        metadata,
        sa.Column('a', sa.Integer),
        sa.Column('b', sa.Integer),
    )

    tcity = symbol('city', discover(city))
    tfriends = symbol('friends', discover(friends))
    tname = symbol('name', discover(name))

    ns = {tname: name, tfriends: friends, tcity: city}

    expr = join(tfriends, tname, 'a', 'id')
    assert normalize(str(compute(expr, ns))) == normalize("""
    SELECT friends.a, friends.b, name.name
    FROM friends JOIN name on friends.a = name.id""")

    expr = join(join(tfriends, tname, 'a', 'id'), tcity, 'a', 'id')

    result = compute(expr, ns)

    expected1 = """
    SELECT friends.a, friends.b, name.name, place.city, place.country
    FROM friends
        JOIN name ON friends.a = name.id
        JOIN place ON friends.a = place.id
        """

    expected2 = """
    SELECT alias.a, alias.b, alias.name, place.city, place.country
    FROM (SELECT friends.a AS a, friends.b AS b, name.name AS name
          FROM friends JOIN name ON friends.a = name.id) AS alias
    JOIN place ON alias.a = place.id
    """

    assert (normalize(str(result)) == normalize(expected1)
            or normalize(str(result)) == normalize(expected2))
Beispiel #11
0
def test_path():
    from blaze.expr import TableSymbol, join
    t = TableSymbol('t', '{name: string, amount: int, id: int}')
    v = TableSymbol('v', '{city: string, id: int}')
    expr = t['amount'].sum()

    assert list(path(expr, t)) == [t.amount.sum(), t.amount, t]
    assert list(path(expr, t.amount)) == [t.amount.sum(), t.amount]
    assert list(path(expr, t.amount)) == [t.amount.sum(), t.amount]

    expr = join(t, v).amount
    assert list(path(expr, t)) == [join(t, v).amount, join(t, v), t]
    assert list(path(expr, v)) == [join(t, v).amount, join(t, v), v]
Beispiel #12
0
def test_path():
    from blaze.expr import TableSymbol, join
    t = TableSymbol('t', '{name: string, amount: int, id: int}')
    v = TableSymbol('v', '{city: string, id: int}')
    expr = t['amount'].sum()

    assert list(path(expr, t)) == [t.amount.sum(), t.amount, t]
    assert list(path(expr, t.amount)) == [t.amount.sum(), t.amount]
    assert list(path(expr, t.amount)) == [t.amount.sum(), t.amount]

    expr = join(t, v).amount
    assert list(path(expr, t)) == [join(t, v).amount, join(t, v), t]
    assert list(path(expr, v)) == [join(t, v).amount, join(t, v), v]
Beispiel #13
0
def test_outer_join():
    left = [(1, 'Alice', 100),
            (2, 'Bob', 200),
            (4, 'Dennis', 400)]
    left = DataFrame(left, columns=['id', 'name', 'amount'])

    right = [('NYC', 1),
             ('Boston', 1),
             ('LA', 3),
             ('Moscow', 4)]
    right = DataFrame(right, columns=['city', 'id'])

    lsym = symbol('lsym', 'var * {id: int, name: string, amount: real}')
    rsym = symbol('rsym', 'var * {city: string, id: int}')

    convert = lambda df: set(df.to_records(index=False).tolist())

    assert (convert(compute(join(lsym, rsym), {lsym: left, rsym: right})) ==
            set([(1, 'Alice', 100, 'NYC'),
                 (1, 'Alice', 100, 'Boston'),
                 (4, 'Dennis', 400, 'Moscow')]))

    assert (convert(compute(join(lsym, rsym, how='left'),
                            {lsym: left, rsym: right})) ==
            set([(1, 'Alice', 100, 'NYC'),
                 (1, 'Alice', 100, 'Boston'),
                 (2, 'Bob', 200, np.nan),
                 (4, 'Dennis', 400, 'Moscow')]))

    df = compute(join(lsym, rsym, how='right'), {lsym: left, rsym: right})
    expected = DataFrame([(1., 'Alice', 100., 'NYC'),
                          (1., 'Alice', 100., 'Boston'),
                          (3., np.nan, np.nan, 'lsymA'),
                          (4., 'Dennis', 400., 'Moscow')],
                         columns=['id', 'name', 'amount', 'city'])

    result = pdsort(df, 'id').to_records(index=False)
    expected = pdsort(expected, 'id').to_records(index=False)
    np.array_equal(result, expected)

    df = compute(join(lsym, rsym, how='outer'), {lsym: left, rsym: right})
    expected = DataFrame([(1., 'Alice', 100., 'NYC'),
                          (1., 'Alice', 100., 'Boston'),
                          (2., 'Bob', 200., np.nan),
                          (3., np.nan, np.nan, 'LA'),
                          (4., 'Dennis', 400., 'Moscow')],
                         columns=['id', 'name', 'amount', 'city'])

    result = pdsort(df, 'id').to_records(index=False)
    expected = pdsort(expected, 'id').to_records(index=False)
    np.array_equal(result, expected)
def test_outer_join():
    left = [(1, 'Alice', 100),
            (2, 'Bob', 200),
            (4, 'Dennis', 400)]
    left = DataFrame(left, columns=['id', 'name', 'amount'])

    right = [('NYC', 1),
             ('Boston', 1),
             ('LA', 3),
             ('Moscow', 4)]
    right = DataFrame(right, columns=['city', 'id'])

    lsym = symbol('lsym', 'var * {id: int, name: string, amount: real}')
    rsym = symbol('rsym', 'var * {city: string, id: int}')

    convert = lambda df: set(df.to_records(index=False).tolist())

    assert (convert(compute(join(lsym, rsym), {lsym: left, rsym: right})) ==
            set([(1, 'Alice', 100, 'NYC'),
                 (1, 'Alice', 100, 'Boston'),
                 (4, 'Dennis', 400, 'Moscow')]))

    assert (convert(compute(join(lsym, rsym, how='left'),
                            {lsym: left, rsym: right})) ==
            set([(1, 'Alice', 100, 'NYC'),
                 (1, 'Alice', 100, 'Boston'),
                 (2, 'Bob', 200, np.nan),
                 (4, 'Dennis', 400, 'Moscow')]))

    df = compute(join(lsym, rsym, how='right'), {lsym: left, rsym: right})
    expected = DataFrame([(1., 'Alice', 100., 'NYC'),
                          (1., 'Alice', 100., 'Boston'),
                          (3., np.nan, np.nan, 'lsymA'),
                          (4., 'Dennis', 400., 'Moscow')],
                         columns=['id', 'name', 'amount', 'city'])

    result = df.sort('id').to_records(index=False)
    expected = expected.sort('id').to_records(index=False)
    np.array_equal(result, expected)

    df = compute(join(lsym, rsym, how='outer'), {lsym: left, rsym: right})
    expected = DataFrame([(1., 'Alice', 100., 'NYC'),
                          (1., 'Alice', 100., 'Boston'),
                          (2., 'Bob', 200., np.nan),
                          (3., np.nan, np.nan, 'LA'),
                          (4., 'Dennis', 400., 'Moscow')],
                         columns=['id', 'name', 'amount', 'city'])

    result = df.sort('id').to_records(index=False)
    expected = expected.sort('id').to_records(index=False)
    np.array_equal(result, expected)
Beispiel #15
0
def test_outer_join():
    left = [(1, 'Alice', 100),
            (2, 'Bob', 200),
            (4, 'Dennis', 400)]
    left = DataFrame(left, columns=['id', 'name', 'amount'])

    right = [('NYC', 1),
             ('Boston', 1),
             ('LA', 3),
             ('Moscow', 4)]
    right = DataFrame(right, columns=['city', 'id'])

    L = symbol('L', 'var * {id: int, name: string, amount: real}')
    R = symbol('R', 'var * {city: string, id: int}')

    convert = lambda df: set(df.to_records(index=False).tolist())

    assert convert(compute(join(L, R), {L: left, R: right})) == set(
            [(1, 'Alice', 100, 'NYC'),
             (1, 'Alice', 100, 'Boston'),
             (4, 'Dennis', 400, 'Moscow')])

    assert convert(compute(join(L, R, how='left'), {L: left, R: right})) == set(
            [(1, 'Alice', 100, 'NYC'),
             (1, 'Alice', 100, 'Boston'),
             (2, 'Bob', 200, np.nan),
             (4, 'Dennis', 400, 'Moscow')])

    df = compute(join(L, R, how='right'), {L: left, R: right})
    expected = DataFrame(
            [(1., 'Alice', 100., 'NYC'),
             (1., 'Alice', 100., 'Boston'),
             (3., np.nan, np.nan, 'LA'),
             (4., 'Dennis', 400., 'Moscow')],
            columns=['id', 'name', 'amount', 'city'])

    assert str(df.sort('id').to_records(index=False)) ==\
            str(expected.sort('id').to_records(index=False))

    df = compute(join(L, R, how='outer'), {L: left, R: right})
    expected = DataFrame(
            [(1., 'Alice', 100., 'NYC'),
             (1., 'Alice', 100., 'Boston'),
             (2., 'Bob', 200., np.nan),
             (3., np.nan, np.nan, 'LA'),
             (4., 'Dennis', 400., 'Moscow')],
            columns=['id', 'name', 'amount', 'city'])

    assert str(df.sort('id').to_records(index=False)) ==\
            str(expected.sort('id').to_records(index=False))
Beispiel #16
0
def test_graph_double_join():
    idx = [['A', 1],
           ['B', 2],
           ['C', 3],
           ['D', 4],
           ['E', 5],
           ['F', 6]]

    arc = [[1, 3],
           [2, 3],
           [4, 3],
           [5, 3],
           [3, 1],
           [2, 1],
           [5, 1],
           [1, 6],
           [2, 6],
           [4, 6]]

    wanted = [['A'],
              ['F']]

    t_idx = symbol('t_idx', 'var * {name: string, b: int32}')
    t_arc = symbol('t_arc', 'var * {a: int32, b: int32}')
    t_wanted = symbol('t_wanted', 'var * {name: string}')

    # >>> compute(join(t_idx, t_arc, 'b'), {t_idx: idx, t_arc: arc})
    # [[1, A, 3],
    #  [1, A, 2],
    #  [1, A, 5],
    #  [3, C, 1],
    #  [3, C, 2],
    #  [3, C, 4],
    #  [3, C, 5],
    #  [6, F, 1],
    #  [6, F, 2],
    #  [6, F, 4]]

    j = join(join(t_idx, t_arc, 'b'), t_wanted, 'name')[['name', 'b', 'a']]

    result = compute(j, {t_idx: idx, t_arc: arc, t_wanted: wanted})
    result = sorted(map(tuple, result))
    expected = sorted([('A', 1, 3),
                       ('A', 1, 2),
                       ('A', 1, 5),
                       ('F', 6, 1),
                       ('F', 6, 2),
                       ('F', 6, 4)])

    assert result == expected
def test_clean_join():
    metadata = sa.MetaData()
    name = sa.Table('name', metadata,
             sa.Column('id', sa.Integer),
             sa.Column('name', sa.String),
             )
    city = sa.Table('place', metadata,
             sa.Column('id', sa.Integer),
             sa.Column('city', sa.String),
             sa.Column('country', sa.String),
             )
    friends = sa.Table('friends', metadata,
             sa.Column('a', sa.Integer),
             sa.Column('b', sa.Integer),
             )

    tcity = symbol('city', discover(city))
    tfriends = symbol('friends', discover(friends))
    tname = symbol('name', discover(name))

    ns = {tname: name, tfriends: friends, tcity: city}

    expr = join(tfriends, tname, 'a', 'id')
    assert normalize(str(compute(expr, ns))) == normalize("""
    SELECT friends.a, friends.b, name.name
    FROM friends JOIN name on friends.a = name.id""")


    expr = join(join(tfriends, tname, 'a', 'id'), tcity, 'a', 'id')

    result = compute(expr, ns)

    expected1 = """
    SELECT friends.a, friends.b, name.name, place.city, place.country
    FROM friends
        JOIN name ON friends.a = name.id
        JOIN place ON friends.a = place.id
        """

    expected2 = """
    SELECT alias.a, alias.b, alias.name, place.city, place.country
    FROM (SELECT friends.a AS a, friends.b AS b, name.name AS name
          FROM friends JOIN name ON friends.a = name.id) AS alias
    JOIN place ON alias.a = place.id
    """

    assert (normalize(str(result)) == normalize(expected1) or
            normalize(str(result)) == normalize(expected2))
def test_multi_column_join():
    left = [(1, 2, 3),
            (2, 3, 4),
            (1, 3, 5)]
    left = DataFrame(left, columns=['x', 'y', 'z'])
    right = [(1, 2, 30),
             (1, 3, 50),
             (1, 3, 150)]
    right = DataFrame(right, columns=['x', 'y', 'w'])

    lsym = symbol('lsym', 'var * {x: int, y: int, z: int}')
    rsym = symbol('rsym', 'var * {x: int, y: int, w: int}')

    j = join(lsym, rsym, ['x', 'y'])

    expected = [(1, 2, 3, 30),
                (1, 3, 5, 50),
                (1, 3, 5, 150)]
    expected = DataFrame(expected, columns=['x', 'y', 'z', 'w'])

    result = compute(j, {lsym: left, rsym: right})

    print(result)

    tm.assert_frame_equal(result, expected)
    assert list(result.columns) == list(j.fields)
Beispiel #19
0
def test_multi_column_join():
    left = [(1, 2, 3),
            (2, 3, 4),
            (1, 3, 5)]
    left = DataFrame(left, columns=['x', 'y', 'z'])
    right = [(1, 2, 30),
             (1, 3, 50),
             (1, 3, 150)]
    right = DataFrame(right, columns=['x', 'y', 'w'])

    lsym = symbol('lsym', 'var * {x: int, y: int, z: int}')
    rsym = symbol('rsym', 'var * {x: int, y: int, w: int}')

    j = join(lsym, rsym, ['x', 'y'])

    expected = [(1, 2, 3, 30),
                (1, 3, 5, 50),
                (1, 3, 5, 150)]
    expected = DataFrame(expected, columns=['x', 'y', 'z', 'w'])

    result = compute(j, {lsym: left, rsym: right})

    print(result)

    tm.assert_frame_equal(result, expected)
    assert list(result.columns) == list(j.fields)
def test_join_by_arcs():
    df_idx = DataFrame([['A', 1],
                        ['B', 2],
                        ['C', 3]],
                       columns=['name', 'node_id'])

    df_arc = DataFrame([[1, 3],
                        [2, 3],
                        [3, 1]],
                       columns=['node_out', 'node_id'])

    t_idx = symbol('t_idx', 'var * {name: string, node_id: int32}')

    t_arc = symbol('t_arc', 'var * {node_out: int32, node_id: int32}')

    joined = join(t_arc, t_idx, "node_id")

    want = by(joined['name'], count=joined['node_id'].count())

    result = compute(want, {t_arc: df_arc, t_idx: df_idx})

    result_pandas = pd.merge(df_arc, df_idx, on='node_id')

    gb = result_pandas.groupby('name')
    expected = gb.node_id.count().reset_index().rename(columns={
                                                       'node_id': 'count'
                                                       })

    tm.assert_frame_equal(result, expected)
    assert list(result.columns) == ['name', 'count']
Beispiel #21
0
def test_join_complex_clean():
    metadata = sa.MetaData()
    name = sa.Table('name', metadata,
                    sa.Column('id', sa.Integer),
                    sa.Column('name', sa.String),
                    )
    city = sa.Table('place', metadata,
                    sa.Column('id', sa.Integer),
                    sa.Column('city', sa.String),
                    sa.Column('country', sa.String),
                    )

    tname = symbol('name', discover(name))
    tcity = symbol('city', discover(city))

    ns = {tname: name, tcity: city}

    expr = join(tname[tname.id > 0], tcity, 'id')
    result = compute(expr, ns)

    expected1 = """
        SELECT name.id, name.name, place.city, place.country
        FROM name JOIN place ON name.id = place.id
        WHERE name.id > :id_1"""

    expected2 = """
        SELECT alias.id, alias.name, place.city, place.country
        FROM (SELECT name.id as id, name.name AS name
              FROM name
              WHERE name.id > :id_1) AS alias
        JOIN place ON alias.id = place.id"""
    assert (normalize(str(result)) == normalize(expected1) or
            normalize(str(result)) == normalize(expected2))
Beispiel #22
0
def test_relabel_join():
    names = TableSymbol('names', '{first: string, last: string}')

    siblings = join(names.relabel({'last': 'left'}),
                    names.relabel({'last': 'right'}), 'first')

    assert siblings.fields == ['first', 'left', 'right']
Beispiel #23
0
def test_join_by_arcs():
    df_idx = DataFrame([['A', 1], ['B', 2], ['C', 3]],
                       columns=['name', 'node_id'])

    df_arc = DataFrame([[1, 3], [2, 3], [3, 1]],
                       columns=['node_out', 'node_id'])

    t_idx = symbol('t_idx', 'var * {name: string, node_id: int32}')

    t_arc = symbol('t_arc', 'var * {node_out: int32, node_id: int32}')

    joined = join(t_arc, t_idx, "node_id")

    want = by(joined['name'], count=joined['node_id'].count())

    result = compute(want, {t_arc: df_arc, t_idx: df_idx})

    result_pandas = pd.merge(df_arc, df_idx, on='node_id')

    gb = result_pandas.groupby('name')
    expected = gb.node_id.count().reset_index().rename(
        columns={'node_id': 'count'})

    tm.assert_frame_equal(result, expected)
    assert list(result.columns) == ['name', 'count']
Beispiel #24
0
def test_join_count():
    ds = datashape.dshape(
        '{t1: var * {x: int, y: int}, t2: var * {a: int, b: int}}')
    engine = resource('sqlite:///:memory:', dshape=ds)
    db = symbol('db', ds)

    expr = join(db.t1[db.t1.x > -1], db.t2, 'x', 'a').count()

    result = compute(expr, {db: engine}, post_compute=False)

    expected1 = """
    SELECT count(alias.x) as count
    FROM (SELECT t1.x AS x, t1.y AS y, t2.b AS b
          FROM t1 JOIN t2 ON t1.x = t2.a
          WHERE t1.x > ?) as alias
          """
    expected2 = """
    SELECT count(alias2.x) AS count
    FROM (SELECT alias1.x AS x, alias1.y AS y, t2.b AS b
          FROM (SELECT t1.x AS x, t1.y AS y
                FROM t1
                WHERE t1.x > ?) AS alias1
          JOIN t2 ON alias1.x = t2.a) AS alias2"""

    assert (normalize(str(result)) == normalize(expected1)
            or normalize(str(result)) == normalize(expected2))
Beispiel #25
0
def test_clean_complex_join():
    metadata = sa.MetaData()
    lhs = sa.Table('amounts', metadata,
                   sa.Column('name', sa.String),
                   sa.Column('amount', sa.Integer))

    rhs = sa.Table('ids', metadata,
                   sa.Column('name', sa.String),
                   sa.Column('id', sa.Integer))

    L = symbol('L', 'var * {name: string, amount: int}')
    R = symbol('R', 'var * {name: string, id: int}')

    joined = join(L[L.amount > 0], R, 'name')

    result = compute(joined, {L: lhs, R: rhs})

    expected1 = """
        SELECT amounts.name, amounts.amount, ids.id
        FROM amounts JOIN ids ON amounts.name = ids.name
        WHERE amounts.amount > :amount_1"""

    expected2 = """
        SELECT alias.name, alias.amount, ids.id
        FROM (SELECT amounts.name AS name, amounts.amount AS amount
              FROM amounts
              WHERE amounts.amount > :amount_1) AS alias
        JOIN ids ON alias.name = ids.name"""

    assert (normalize(str(result)) == normalize(expected1) or
            normalize(str(result)) == normalize(expected2))
Beispiel #26
0
def test_selection_of_join():
    metadata = sa.MetaData()
    name = sa.Table('name', metadata,
                    sa.Column('id', sa.Integer),
                    sa.Column('name', sa.String),
                    )
    city = sa.Table('place', metadata,
                    sa.Column('id', sa.Integer),
                    sa.Column('city', sa.String),
                    sa.Column('country', sa.String),
                    )

    tname = symbol('name', discover(name))
    tcity = symbol('city', discover(city))

    ns = {tname: name, tcity: city}

    j = join(tname, tcity, 'id')
    expr = j[j.city == 'NYC'].name
    result = compute(expr, ns)

    assert normalize(str(result)) == normalize("""
    SELECT name.name
    FROM name JOIN place ON name.id = place.id
    WHERE place.city = :city_1""")
Beispiel #27
0
def test_multi_column_join():
    metadata = sa.MetaData()
    lhs = sa.Table('aaa', metadata, sa.Column('x', sa.Integer),
                   sa.Column('y', sa.Integer), sa.Column('z', sa.Integer))

    rhs = sa.Table('bbb', metadata, sa.Column('w', sa.Integer),
                   sa.Column('x', sa.Integer), sa.Column('y', sa.Integer))

    L = symbol('L', 'var * {x: int, y: int, z: int}')
    R = symbol('R', 'var * {w: int, x: int, y: int}')
    joined = join(L, R, ['x', 'y'])

    expected = lhs.join(rhs, (lhs.c.x == rhs.c.x) & (lhs.c.y == rhs.c.y))
    expected = select(list(unique(expected.columns,
                                  key=lambda c: c.name))).select_from(expected)

    result = compute(joined, {L: lhs, R: rhs})

    assert str(result) == str(expected)

    assert str(select(result)) == str(select(expected))

    # Schemas match
    print(result.c.keys())
    print(joined.fields)
    assert list(result.c.keys()) == list(joined.fields)
Beispiel #28
0
def test_join_count():
    ds = datashape.dshape(
        '{t1: var * {x: int, y: int}, t2: var * {a: int, b: int}}')
    engine = resource('sqlite:///:memory:', dshape=ds)
    db = symbol('db', ds)

    expr = join(db.t1[db.t1.x > -1], db.t2, 'x', 'a').count()

    result = compute(expr, {db: engine}, post_compute=False)

    expected1 = """
    SELECT count(alias.x) as count
    FROM (SELECT t1.x AS x, t1.y AS y, t2.b AS b
          FROM t1 JOIN t2 ON t1.x = t2.a
          WHERE t1.x > ?) as alias
          """
    expected2 = """
    SELECT count(alias2.x) AS count
    FROM (SELECT alias1.x AS x, alias1.y AS y, t2.b AS b
          FROM (SELECT t1.x AS x, t1.y AS y
                FROM t1
                WHERE t1.x > ?) AS alias1
          JOIN t2 ON alias1.x = t2.a) AS alias2"""

    assert (normalize(str(result)) == normalize(expected1) or
            normalize(str(result)) == normalize(expected2))
Beispiel #29
0
def test_multi_column_join():
    left = [(1, 2, 3),
            (2, 3, 4),
            (1, 3, 5)]
    left = DataFrame(left, columns=['x', 'y', 'z'])
    right = [(1, 2, 30),
             (1, 3, 50),
             (1, 3, 150)]
    right = DataFrame(right, columns=['x', 'y', 'w'])

    L = symbol('L', 'var * {x: int, y: int, z: int}')
    R = symbol('R', 'var * {x: int, y: int, w: int}')

    j = join(L, R, ['x', 'y'])

    expected = [(1, 2, 3, 30),
                (1, 3, 5, 50),
                (1, 3, 5, 150)]
    expected = DataFrame(expected, columns=['x', 'y', 'z', 'w'])

    result = compute(j, {L: left, R: right})

    print(result)

    assert str(result) == str(expected)
    assert list(result.columns) == list(j.fields)
Beispiel #30
0
def test_relabel_join():
    names = symbol('names', 'var * {first: string, last: string}')

    siblings = join(names.relabel({'last': 'left'}),
                    names.relabel({'last': 'right'}), 'first')

    assert siblings.fields == ['first', 'left', 'right']
Beispiel #31
0
def test_selection_of_join():
    metadata = sa.MetaData()
    name = sa.Table(
        'name',
        metadata,
        sa.Column('id', sa.Integer),
        sa.Column('name', sa.String),
    )
    city = sa.Table(
        'place',
        metadata,
        sa.Column('id', sa.Integer),
        sa.Column('city', sa.String),
        sa.Column('country', sa.String),
    )

    tname = symbol('name', discover(name))
    tcity = symbol('city', discover(city))

    ns = {tname: name, tcity: city}

    j = join(tname, tcity, 'id')
    expr = j[j.city == 'NYC'].name
    result = compute(expr, ns)

    assert normalize(str(result)) == normalize("""
    SELECT name.name
    FROM name JOIN place ON name.id = place.id
    WHERE place.city = :city_1""")
Beispiel #32
0
def test_clean_complex_join():
    metadata = sa.MetaData()
    lhs = sa.Table('amounts', metadata, sa.Column('name', sa.String),
                   sa.Column('amount', sa.Integer))

    rhs = sa.Table('ids', metadata, sa.Column('name', sa.String),
                   sa.Column('id', sa.Integer))

    L = symbol('L', 'var * {name: string, amount: int}')
    R = symbol('R', 'var * {name: string, id: int}')

    joined = join(L[L.amount > 0], R, 'name')

    result = compute(joined, {L: lhs, R: rhs})

    expected1 = """
        SELECT amounts.name, amounts.amount, ids.id
        FROM amounts JOIN ids ON amounts.name = ids.name
        WHERE amounts.amount > :amount_1"""

    expected2 = """
        SELECT alias.name, alias.amount, ids.id
        FROM (SELECT amounts.name AS name, amounts.amount AS amount
              FROM amounts
              WHERE amounts.amount > :amount_1) AS alias
        JOIN ids ON alias.name = ids.name"""

    assert (normalize(str(result)) == normalize(expected1)
            or normalize(str(result)) == normalize(expected2))
Beispiel #33
0
def test_multi_column_join():
    metadata = sa.MetaData()
    lhs = sa.Table('aaa', metadata,
                   sa.Column('x', sa.Integer),
                   sa.Column('y', sa.Integer),
                   sa.Column('z', sa.Integer))

    rhs = sa.Table('bbb', metadata,
                   sa.Column('w', sa.Integer),
                   sa.Column('x', sa.Integer),
                   sa.Column('y', sa.Integer))

    L = symbol('L', 'var * {x: int, y: int, z: int}')
    R = symbol('R', 'var * {w: int, x: int, y: int}')
    joined = join(L, R, ['x', 'y'])

    expected = lhs.join(rhs, (lhs.c.x == rhs.c.x)
                        & (lhs.c.y == rhs.c.y))
    expected = select(list(unique(expected.columns, key=lambda c:
                                  c.name))).select_from(expected)

    result = compute(joined, {L: lhs, R: rhs})

    assert str(result) == str(expected)

    assert str(select(result)) == str(select(expected))

    # Schemas match
    print(result.c.keys())
    print(joined.fields)
    assert list(result.c.keys()) == list(joined.fields)
Beispiel #34
0
def test_join_by_arcs():
    df_idx = DataFrame([['A', 1],
                        ['B', 2],
                        ['C', 3]],
                      columns=['name', 'node_id'])

    df_arc = DataFrame([[1, 3],
                        [2, 3],
                        [3, 1]],
                       columns=['node_out', 'node_id'])

    t_idx = TableSymbol('t_idx', '{name: string, node_id: int32}')

    t_arc = TableSymbol('t_arc', '{node_out: int32, node_id: int32}')

    joined = join(t_arc, t_idx, "node_id")

    want = by(joined['name'], joined['node_id'].count())

    result = compute(want, {t_arc: df_arc, t_idx:df_idx})

    result_pandas = pd.merge(df_arc, df_idx, on='node_id')

    expected = result_pandas.groupby('name')['node_id'].count().reset_index()
    assert str(result.values) == str(expected.values)
    assert list(result.columns) == ['name', 'node_id_count']
Beispiel #35
0
def test_join_by_arcs():
    df_idx = DataFrame([['A', 1],
                        ['B', 2],
                        ['C', 3]],
                      columns=['name', 'node_id'])

    df_arc = DataFrame([[1, 3],
                        [2, 3],
                        [3, 1]],
                       columns=['node_out', 'node_id'])

    t_idx = Symbol('t_idx', 'var * {name: string, node_id: int32}')

    t_arc = Symbol('t_arc', 'var * {node_out: int32, node_id: int32}')

    joined = join(t_arc, t_idx, "node_id")

    want = by(joined['name'], joined['node_id'].count())

    result = compute(want, {t_arc: df_arc, t_idx:df_idx})

    result_pandas = pd.merge(df_arc, df_idx, on='node_id')

    expected = result_pandas.groupby('name')['node_id'].count().reset_index()
    assert str(result.values) == str(expected.values)
    assert list(result.columns) == ['name', 'node_id_count']
Beispiel #36
0
def test_aliased_views_with_computation():
    engine = sa.create_engine('sqlite:///:memory:')

    df_aaa = DataFrame({
        'x': [1, 2, 3, 2, 3],
        'y': [2, 1, 2, 3, 1],
        'z': [3, 3, 3, 1, 2]
    })
    df_bbb = DataFrame({
        'w': [1, 2, 3, 2, 3],
        'x': [2, 1, 2, 3, 1],
        'y': [3, 3, 3, 1, 2]
    })

    df_aaa.to_sql('aaa', engine)
    df_bbb.to_sql('bbb', engine)

    metadata = sa.MetaData(engine)
    metadata.reflect()

    sql_aaa = metadata.tables['aaa']
    sql_bbb = metadata.tables['bbb']

    L = symbol('aaa', discover(df_aaa))
    R = symbol('bbb', discover(df_bbb))

    expr = join(by(L.x, y_total=L.y.sum()), R)
    a = compute(expr, {L: df_aaa, R: df_bbb})
    b = compute(expr, {L: sql_aaa, R: sql_bbb})
    assert into(set, a) == into(set, b)

    expr2 = by(expr.w, count=expr.x.count(), total2=expr.y_total.sum())
    a = compute(expr2, {L: df_aaa, R: df_bbb})
    b = compute(expr2, {L: sql_aaa, R: sql_bbb})
    assert into(set, a) == into(set, b)

    expr3 = by(expr.x, count=expr.y_total.count())
    a = compute(expr3, {L: df_aaa, R: df_bbb})
    b = compute(expr3, {L: sql_aaa, R: sql_bbb})
    assert into(set, a) == into(set, b)

    expr4 = join(expr2, R)
    a = compute(expr4, {L: df_aaa, R: df_bbb})
    b = compute(expr4, {L: sql_aaa, R: sql_bbb})
    assert into(set, a) == into(set, b)
    """ # Takes a while
Beispiel #37
0
def test_join():
    cities = np.array([("Alice", "NYC"), ("Alice", "LA"), ("Bob", "Chicago")], dtype=[("name", "S7"), ("city", "O")])

    c = symbol("cities", discover(cities))

    expr = join(t, c, "name")
    result = compute(expr, {t: x, c: cities})
    assert (b"Alice", 1, 100, "LA") in into(list, result)
def test_join():
    cities = np.array([('Alice', 'NYC'), ('Alice', 'LA'), ('Bob', 'Chicago')],
                      dtype=[('name', 'S7'), ('city', 'O')])

    c = symbol('cities', discover(cities))

    expr = join(t, c, 'name')
    result = compute(expr, {t: x, c: cities})
    assert (b'Alice', 1, 100, 'LA') in into(list, result)
Beispiel #39
0
def test_aliased_views_with_computation():
    engine = sa.create_engine('sqlite:///:memory:')

    df_aaa = DataFrame({'x': [1, 2, 3, 2, 3],
                        'y': [2, 1, 2, 3, 1],
                        'z': [3, 3, 3, 1, 2]})
    df_bbb = DataFrame({'w': [1, 2, 3, 2, 3],
                        'x': [2, 1, 2, 3, 1],
                        'y': [3, 3, 3, 1, 2]})

    df_aaa.to_sql('aaa', engine)
    df_bbb.to_sql('bbb', engine)

    metadata = sa.MetaData(engine)
    metadata.reflect()

    sql_aaa = metadata.tables['aaa']
    sql_bbb = metadata.tables['bbb']

    L = symbol('aaa', discover(df_aaa))
    R = symbol('bbb', discover(df_bbb))

    expr = join(by(L.x, y_total=L.y.sum()),
                R)
    a = compute(expr, {L: df_aaa, R: df_bbb})
    b = compute(expr, {L: sql_aaa, R: sql_bbb})
    assert into(set, a) == into(set, b)

    expr2 = by(expr.w, count=expr.x.count(), total2=expr.y_total.sum())
    a = compute(expr2, {L: df_aaa, R: df_bbb})
    b = compute(expr2, {L: sql_aaa, R: sql_bbb})
    assert into(set, a) == into(set, b)

    expr3 = by(expr.x, count=expr.y_total.count())
    a = compute(expr3, {L: df_aaa, R: df_bbb})
    b = compute(expr3, {L: sql_aaa, R: sql_bbb})
    assert into(set, a) == into(set, b)

    expr4 = join(expr2, R)
    a = compute(expr4, {L: df_aaa, R: df_bbb})
    b = compute(expr4, {L: sql_aaa, R: sql_bbb})
    assert into(set, a) == into(set, b)

    """ # Takes a while
Beispiel #40
0
def test_join_on_same_table():
    metadata = sa.MetaData()
    T = sa.Table(
        'tab',
        metadata,
        sa.Column('a', sa.Integer),
        sa.Column('b', sa.Integer),
    )

    t = symbol('tab', discover(T))
    expr = join(t, t, 'a')

    result = compute(expr, {t: T})

    assert normalize(str(result)) == normalize("""
    SELECT tab_left.a, tab_left.b, tab_right.b
    FROM tab AS tab_left JOIN tab AS tab_right
    ON tab_left.a = tab_right.a
    """)

    expr = join(t, t, 'a').b_left.sum()

    result = compute(expr, {t: T})

    assert normalize(str(result)) == normalize("""
   with alias as
    (select tab_left.b as b
     from tab as tab_left
        join tab as tab_right
        on tab_left.a = tab_right.a)
    select sum(alias.b) as b_left_sum from alias""")

    expr = join(t, t, 'a')
    expr = summary(total=expr.a.sum(), smallest=expr.b_right.min())

    result = compute(expr, {t: T})

    assert normalize(str(result)) == normalize("""
    SELECT min(tab_right.b) as smallest, sum(tab_left.a) as total
    FROM tab AS tab_left JOIN tab AS tab_right
    ON tab_left.a = tab_right.a
    """)
Beispiel #41
0
def test_multi_column_join():
    a = TableSymbol("a", "{x: int, y: int, z: int}")
    b = TableSymbol("b", "{w: int, x: int, y: int}")
    j = join(a, b, ["x", "y"])

    assert set(j.fields) == set("wxyz")

    assert j.on_left == j.on_right == ["x", "y"]
    assert hash(j)

    assert j.fields == ["x", "y", "z", "w"]
Beispiel #42
0
def test_join():
    t = TableSymbol("t", "{name: string, amount: int}")
    s = TableSymbol("t", "{name: string, id: int}")
    r = TableSymbol("r", "{name: string, amount: int}")
    q = TableSymbol("q", "{name: int}")

    j = join(t, s, "name", "name")

    assert j.schema == dshape("{name: string, amount: int, id: int}")

    assert join(t, s, "name") == join(t, s, "name")

    assert join(t, s, "name").on_left == "name"
    assert join(t, s, "name").on_right == "name"

    assert join(t, r, ("name", "amount")).on_left == ["name", "amount"]
    with pytest.raises(TypeError):
        join(t, q, "name")
    with pytest.raises(ValueError):
        join(t, s, how="upside_down")
Beispiel #43
0
def test_multi_column_join():
    a = TableSymbol('a', '{x: int, y: int, z: int}')
    b = TableSymbol('b', '{w: int, x: int, y: int}')
    j = join(a, b, ['x', 'y'])

    assert set(j.fields) == set('wxyz')

    assert j.on_left == j.on_right == ['x', 'y']
    assert hash(j)

    assert j.fields == ['x', 'y', 'z', 'w']
Beispiel #44
0
def test_join():
    cities = np.array([('Alice', 'NYC'),
                       ('Alice', 'LA'),
                       ('Bob', 'Chicago')],
                      dtype=[('name', 'S7'), ('city', 'O')])

    c = symbol('cities', discover(cities))

    expr = join(t, c, 'name')
    result = compute(expr, {t: x, c: cities})
    assert (b'Alice', 1, 100, 'LA') in into(list, result)
Beispiel #45
0
def test_multi_column_join():
    a = symbol('a', 'var * {x: int, y: int, z: int}')
    b = symbol('b', 'var * {w: int, x: int, y: int}')
    j = join(a, b, ['x', 'y'])

    assert set(j.fields) == set('wxyz')

    assert j.on_left == j.on_right == ['x', 'y']
    assert hash(j)

    assert j.fields == ['x', 'y', 'z', 'w']
Beispiel #46
0
def test_join():
    t = TableSymbol('t', '{name: string, amount: int}')
    s = TableSymbol('t', '{name: string, id: int}')
    r = TableSymbol('r', '{name: string, amount: int}')
    q = TableSymbol('q', '{name: int}')

    j = join(t, s, 'name', 'name')

    assert j.schema == dshape('{name: string, amount: int, id: int}')

    assert join(t, s, 'name') == join(t, s, 'name')

    assert join(t, s, 'name').on_left == 'name'
    assert join(t, s, 'name').on_right == 'name'

    assert join(t, r, ('name', 'amount')).on_left == ['name', 'amount']
    with pytest.raises(TypeError):
        join(t, q, 'name')
    with pytest.raises(ValueError):
        join(t, s, how='upside_down')
Beispiel #47
0
def test_join():
    t = symbol('t', 'var * {name: string, amount: int}')
    s = symbol('t', 'var * {name: string, id: int}')
    r = symbol('r', 'var * {name: string, amount: int}')
    q = symbol('q', 'var * {name: int}')

    j = join(t, s, 'name', 'name')

    assert j.schema == dshape('{name: string, amount: int, id: int}')

    assert join(t, s, 'name') == join(t, s, 'name')

    assert join(t, s, 'name').on_left == 'name'
    assert join(t, s, 'name').on_right == 'name'

    assert join(t, r, ('name', 'amount')).on_left == ['name', 'amount']
    with pytest.raises(TypeError):
        join(t, q, 'name')
    with pytest.raises(ValueError):
        join(t, s, how='upside_down')
Beispiel #48
0
def test_serializable():
    t = TableSymbol('t', '{id: int, name: string, amount: int}')
    import pickle
    t2 = pickle.loads(pickle.dumps(t))

    assert t.isidentical(t2)

    s = TableSymbol('t', '{id: int, city: string}')
    expr = join(t[t.amount < 0], s).sort('id').city.head()
    expr2 = pickle.loads(pickle.dumps(expr))

    assert expr.isidentical(expr2)
Beispiel #49
0
def test_relabel_join():
    names = Symbol('names', 'var * {first: string, last: string}')

    siblings = join(names.relabel({'first': 'left'}),
                    names.relabel({'first': 'right'}),
                    'last')[['left', 'right']]

    data = [('Alice', 'Smith'), ('Bob', 'Jones'), ('Charlie', 'Smith')]

    print(set(compute(siblings, {names: data})))
    assert ('Alice', 'Charlie') in set(compute(siblings, {names: data}))
    assert ('Alice', 'Bob') not in set(compute(siblings, {names: data}))
def test_join_promotion():
    a_data = pd.DataFrame([[0.0, 1.5], [1.0, 2.5]], columns=list('ab'))
    b_data = pd.DataFrame([[0, 1], [1, 2]], columns=list('ac'))
    a = symbol('a', discover(a_data))
    b = symbol('b', discover(b_data))

    joined = join(a, b, 'a')
    assert joined.dshape == dshape('var * {a: float64, b: float64, c: int64}')

    expected = pd.merge(a_data, b_data, on='a')
    result = compute(joined, {a: a_data, b: b_data})
    tm.assert_frame_equal(result, expected)
Beispiel #51
0
def test_join_suffixes():
    df = pd.DataFrame(
        list(dict((k, n) for k in ascii_lowercase[:5]) for n in range(5)), )
    a = symbol('a', discover(df))
    b = symbol('b', discover(df))

    suffixes = '_x', '_y'
    joined = join(a, b, 'a', suffixes=suffixes)

    expected = pd.merge(df, df, on='a', suffixes=suffixes)
    result = compute(joined, {a: df, b: df})
    tm.assert_frame_equal(result, expected)
Beispiel #52
0
def test_join_promotion():
    a_data = pd.DataFrame([[0.0, 1.5], [1.0, 2.5]], columns=list('ab'))
    b_data = pd.DataFrame([[0, 1], [1, 2]], columns=list('ac'))
    a = symbol('a', discover(a_data))
    b = symbol('b', discover(b_data))

    joined = join(a, b, 'a')
    assert joined.dshape == dshape('var * {a: float64, b: float64, c: int64}')

    expected = pd.merge(a_data, b_data, on='a')
    result = compute(joined, {a: a_data, b: b_data})
    tm.assert_frame_equal(result, expected)
Beispiel #53
0
def test_serializable():
    t = symbol('t', 'var * {id: int, name: string, amount: int}')
    import pickle
    t2 = pickle.loads(pickle.dumps(t, protocol=pickle.HIGHEST_PROTOCOL))

    assert t.isidentical(t2)

    s = symbol('t', 'var * {id: int, city: string}')
    expr = join(t[t.amount < 0], s).sort('id').city.head()
    expr2 = pickle.loads(pickle.dumps(expr, protocol=pickle.HIGHEST_PROTOCOL))

    assert expr.isidentical(expr2)
Beispiel #54
0
def test_serializable():
    t = TableSymbol('t', '{id: int, name: string, amount: int}')
    import pickle
    t2 = pickle.loads(pickle.dumps(t))

    assert t.isidentical(t2)

    s = TableSymbol('t', '{id: int, city: string}')
    expr = join(t[t.amount < 0], s).sort('id').city.head()
    expr2 = pickle.loads(pickle.dumps(expr))

    assert expr.isidentical(expr2)
Beispiel #55
0
def test_join():
    cities = Symbol('cities', dshape='var * {id: int, city: string}')
    j = join(t, cities, 'id')

    city_data = [[1, 'NYC'], [1, 'Chicago'], [5, 'Paris']]

    assert set(concat(compute(j[['name', 'city']],
                              {t: c, cities: city_data}))) == \
            set((('Alice', 'NYC'), ('Alice', 'Chicago'), ('Edith', 'Paris')))

    assert set(concat(compute(j[['name', 'city']],
                              {t: c, cities: city_data}))) == \
            set((('Alice', 'NYC'), ('Alice', 'Chicago'), ('Edith', 'Paris')))
Beispiel #56
0
def test_multi_column_join():
    left = [(1, 2, 3), (2, 3, 4), (1, 3, 5)]
    right = [(1, 2, 30), (1, 3, 50), (1, 3, 150)]

    L = Symbol('L', 'var * {x: int, y: int, z: int}')
    R = Symbol('R', 'var * {x: int, y: int, w: int}')

    j = join(L, R, ['x', 'y'])

    print(list(compute(j, {L: left, R: right})))
    assert list(compute(j, {
        L: left,
        R: right
    })) == [(1, 2, 3, 30), (1, 3, 5, 50), (1, 3, 5, 150)]
Beispiel #57
0
def test_aliased_views_with_join():
    joined = join(bank, cities)
    expr = by(joined.city, total=joined.amount.sum())
    expr2 = by(expr.total, count=expr.city.nunique())

    result = compute(expr2, {bank: sql_bank, cities: sql_cities})

    assert normalize(str(result)) == normalize("""
    SELECT alias.total, count(DISTINCT alias.city) AS count
    FROM (SELECT cities.city AS city, sum(bank.amount) AS total
          FROM bank
          JOIN cities ON bank.name = cities.name
          GROUP BY cities.city) as alias
    GROUP BY alias.total
    """)