Beispiel #1
0
def test_split_reasons_correctly_about_aggregate_shape():
    chunk = symbol('chunk', '100 * 100 * {x: float32, y: float32}')
    (chunk, chunk_expr), (agg, agg_expr) = split(a, a.x.sum(), chunk=chunk)

    assert agg.shape == (10, 20)

    chunk = symbol('chunk', '100 * 100 * {x: float32, y: float32}')
    (chunk, chunk_expr), (agg, agg_expr) = split(a, a.x.sum(axis=0), chunk=chunk)

    assert agg.shape == (10, 2000)
Beispiel #2
0
def test_split_reasons_correctly_about_uneven_aggregate_shape():
    x = symbol('chunk', '10 * 10 * int')
    chunk = symbol('chunk', '3 * 3 * int')
    (chunk, chunk_expr), (agg, agg_expr) = split(x, x.sum(axis=0), chunk=chunk)
    assert agg.shape == (4, 10)

    x = symbol('leaf', '1643 * 60 * int')
    chunk = symbol('chunk', '40 * 60 * int')
    (chunk, chunk_expr), (agg, agg_expr) = split(x, x.sum(), chunk=chunk)
    assert agg.shape == (42, 1)
Beispiel #3
0
def test_split_reasons_correctly_about_aggregate_shape():
    chunk = Symbol('chunk', '100 * 100 * {x: float32, y: float32}')
    (chunk, chunk_expr), (agg, agg_expr) = split(a, a.x.sum(), chunk=chunk)

    assert agg.shape == (10, 20)

    chunk = Symbol('chunk', '100 * 100 * {x: float32, y: float32}')
    (chunk, chunk_expr), (agg, agg_expr) = split(a,
                                                 a.x.sum(axis=0),
                                                 chunk=chunk)

    assert agg.shape == (10, 2000)
Beispiel #4
0
def test_split_reasons_correctly_about_uneven_aggregate_shape():
    x = symbol('chunk', '10 * 10 * int')
    chunk = symbol('chunk', '3 * 3 * int')
    (chunk, chunk_expr), (agg, agg_expr) = split(x, x.sum(axis=0),
                                                 chunk=chunk)
    assert agg.shape == (4, 10)

    x = symbol('leaf', '1643 * 60 * int')
    chunk = symbol('chunk', '40 * 60 * int')
    (chunk, chunk_expr), (agg, agg_expr) = split(x, x.sum(),
                                                 chunk=chunk)
    assert agg.shape == (42, 1)
Beispiel #5
0
def test_by_count():
    (chunk, chunk_expr), (agg, agg_expr) = \
            split(t, by(t.name, total=t.amount.count()))

    assert chunk_expr.isidentical(by(chunk.name, total=chunk.amount.count()))

    assert agg_expr.isidentical(by(agg.name, total=agg.total.sum()))
Beispiel #6
0
def test_by_count():
    (chunk, chunk_expr), (agg, agg_expr) = \
            split(t, by(t.name, total=t.amount.count()))

    assert chunk_expr.isidentical(by(chunk.name, total=chunk.amount.count()))

    assert agg_expr.isidentical(by(agg.name, total=agg.total.sum()))
Beispiel #7
0
def test_distinct():
    (chunk, chunk_expr), (agg, agg_expr) = split(t, count(t.amount.distinct()))

    assert chunk.schema == t.schema
    assert chunk_expr.isidentical(chunk.amount.distinct())

    assert isscalar(agg.dshape.measure)
    assert agg_expr.isidentical(count(agg.distinct()))
Beispiel #8
0
def test_summary():
    (chunk, chunk_expr), (agg, agg_expr) = split(
        t, summary(a=t.amount.count(), b=t.id.sum() + 1))

    assert chunk.schema == t.schema
    assert chunk_expr.isidentical(
        summary(a=chunk.amount.count(), b=chunk.id.sum(), keepdims=True))

    # assert not agg.schema == dshape('{a: int32, b: int32}')
    assert agg_expr.isidentical(summary(a=agg.a.sum(), b=agg.b.sum() + 1))

    (chunk, chunk_expr), (agg, agg_expr) = \
            split(t, summary(total=t.amount.sum()))

    assert chunk_expr.isidentical(
        summary(total=chunk.amount.sum(), keepdims=True))
    assert agg_expr.isidentical(summary(total=agg.total.sum()))
Beispiel #9
0
def test_sum():
    (chunk, chunk_expr), (agg, agg_expr) = split(t, t.amount.sum())

    assert chunk.schema == t.schema
    assert chunk_expr.isidentical(chunk.amount.sum())

    assert isscalar(agg.dshape.measure)
    assert agg_expr.isidentical(sum(agg))
Beispiel #10
0
def test_agg_shape_in_tabular_case_with_explicit_chunk():
    t = Symbol('t', '1000 * {name: string, amount: int, id: int}')
    c = Symbol('chunk', 100 * t.schema)

    expr = by(t.name, total=t.amount.sum())
    (chunk, chunk_expr), (agg, agg_expr) = split(t, expr, chunk=c)

    assert agg.dshape == dshape('var * {name: string, total: int}')
Beispiel #11
0
def test_distinct():
    (chunk, chunk_expr), (agg, agg_expr) = split(t, count(t.amount.distinct()))

    assert chunk.schema == t.schema
    assert chunk_expr.isidentical(chunk.amount.distinct())

    assert agg.iscolumn
    assert agg_expr.isidentical(count(agg.distinct()))
Beispiel #12
0
def test_sum():
    (chunk, chunk_expr), (agg, agg_expr) = split(t, t.amount.sum())

    assert chunk.schema == t.schema
    assert chunk_expr.isidentical(chunk.amount.sum(keepdims=True))

    assert isscalar(agg.dshape.measure)
    assert agg_expr.isidentical(sum(agg))
Beispiel #13
0
def test_distinct():
    (chunk, chunk_expr), (agg, agg_expr) = split(t, count(t.amount.distinct()))

    assert chunk.schema == t.schema
    assert chunk_expr.isidentical(chunk.amount.distinct())

    assert isscalar(agg.dshape.measure)
    assert agg_expr.isidentical(count(agg.distinct()))
Beispiel #14
0
def test_summary():
    (chunk, chunk_expr), (agg, agg_expr) = split(t, summary(a=t.amount.count(),
                                                            b=t.id.sum() + 1))

    assert chunk.schema == t.schema
    assert chunk_expr.isidentical(summary(a=chunk.amount.count(),
                                          b=chunk.id.sum()))

    assert not agg.schema == dshape('{a: int32, b: int32}')
    assert agg_expr.isidentical(summary(a=agg.a.sum(),
                                        b=agg.b.sum() + 1))

    (chunk, chunk_expr), (agg, agg_expr) = \
            split(t, summary(total=t.amount.sum()))

    assert chunk_expr.isidentical(summary(total=chunk.amount.sum()))
    assert agg_expr.isidentical(summary(total=agg.total.sum()))
Beispiel #15
0
def test_by_with_single_field_child():
    x = symbol('x', 'var * int')
    (chunk, chunk_expr), (agg, agg_expr) = split(x, by(x, total=x.sum()))

    assert chunk_expr.isidentical(by(chunk, total=chunk.sum()))

    assert agg_expr.isidentical(by(agg[agg.fields[0]],
        total=agg.total.sum()).relabel({agg.fields[0]: 'x'}))
Beispiel #16
0
def test_agg_shape_in_tabular_case_with_explicit_chunk():
    t = symbol('t', '1000 * {name: string, amount: int, id: int}')
    c = symbol('chunk', 100 * t.schema)

    expr = by(t.name, total=t.amount.sum())
    (chunk, chunk_expr), (agg, agg_expr) = split(t, expr, chunk=c)

    assert agg.dshape == dshape('var * {name: string, total: int}')
Beispiel #17
0
def test_sum():
    (chunk, chunk_expr), (agg, agg_expr) = split(t, t.amount.sum())

    assert chunk.schema == t.schema
    assert chunk_expr.isidentical(chunk.amount.sum())

    assert agg.iscolumn
    assert agg_expr.isidentical(sum(agg))
Beispiel #18
0
def test_sum_with_keepdims():
    (chunk, chunk_expr), (agg, agg_expr) = split(t, t.amount.sum(keepdims=True))

    assert chunk.schema == t.schema
    assert chunk_expr.isidentical(chunk.amount.sum(keepdims=True))

    assert isscalar(agg.dshape.measure)
    assert agg_expr.isidentical(sum(agg, keepdims=True))
Beispiel #19
0
def test_sum_with_axis_argument():
    chunk = symbol('chunk', '100 * 100 * {x: float32, y: float32}')
    (chunk, chunk_expr), (agg, agg_expr) = split(a, a.x.sum(axis=0), chunk=chunk)

    assert chunk.schema == a.schema
    assert agg_expr.dshape == a.x.sum(axis=0).dshape

    assert chunk_expr.isidentical(chunk.x.sum(axis=0, keepdims=True))
    assert agg_expr.isidentical(agg.sum(axis=0))
Beispiel #20
0
def test_by_sum():
    (chunk, chunk_expr), (agg, agg_expr) = \
            split(t, by(t.name, total=t.amount.sum()))

    assert chunk.schema == t.schema
    assert chunk_expr.isidentical(by(chunk.name, total=chunk.amount.sum()))

    assert not isscalar(agg.dshape.measure)
    assert agg_expr.isidentical(by(agg.name, total=agg.total.sum()))
Beispiel #21
0
def test_reductions():
    (chunk, chunk_expr), (agg, agg_expr) = split(t, t.amount.nunique())

    assert chunk.schema == t.schema
    assert chunk_expr.isidentical(chunk.amount.distinct())

    assert isscalar(agg.dshape.measure)
    assert agg_expr.isidentical(agg.distinct().count())


    (chunk, chunk_expr), (agg, agg_expr) = \
            split(t, t.amount.nunique(keepdims=True))

    assert chunk.schema == t.schema
    assert chunk_expr.isidentical(chunk.amount.distinct())

    assert isscalar(agg.dshape.measure)
    assert agg_expr.isidentical(agg.distinct().count(keepdims=True))
Beispiel #22
0
def test_nd_chunk_axis_args():
    c = symbol('c', '4 * 4 * int32')

    (chunk, chunk_expr), (agg, agg_expr) = split(x, x.sum(axis=0), chunk=c)

    assert chunk.shape == (4, 4)
    assert chunk_expr.shape == (1, 4)
    assert chunk_expr.isidentical(chunk.sum(keepdims=True, axis=0))

    assert agg.shape == (6, 16)
    assert agg_expr.isidentical(agg.sum(axis=0))

    for func in [var, std, mean]:
        (chunk, chunk_expr), (agg, agg_expr) = split(x, func(x, axis=0), chunk=c)

        assert chunk.shape == (4, 4)
        assert chunk_expr.shape == (1, 4)
        assert agg.shape == (6, 16)
Beispiel #23
0
def test_by_sum():
    (chunk, chunk_expr), (agg, agg_expr) = \
            split(t, by(t.name, total=t.amount.sum()))

    assert chunk.schema == t.schema
    assert chunk_expr.isidentical(by(chunk.name, total=chunk.amount.sum()))

    assert not isscalar(agg.dshape.measure)
    assert agg_expr.isidentical(by(agg.name, total=agg.total.sum()))
Beispiel #24
0
def test_reductions():
    (chunk, chunk_expr), (agg, agg_expr) = split(t, t.amount.nunique())

    assert chunk.schema == t.schema
    assert chunk_expr.isidentical(chunk.amount.distinct())

    assert isscalar(agg.dshape.measure)
    assert agg_expr.isidentical(agg.distinct().count())


    (chunk, chunk_expr), (agg, agg_expr) = \
            split(t, t.amount.nunique(keepdims=True))

    assert chunk.schema == t.schema
    assert chunk_expr.isidentical(chunk.amount.distinct())

    assert isscalar(agg.dshape.measure)
    assert agg_expr.isidentical(agg.distinct().count(keepdims=True))
Beispiel #25
0
def test_by_with_single_field_child():
    x = symbol('x', 'var * int')
    (chunk, chunk_expr), (agg, agg_expr) = split(x, by(x, total=x.sum()))

    assert chunk_expr.isidentical(by(chunk, total=chunk.sum()))

    assert agg_expr.isidentical(
        by(agg[agg.fields[0]],
           total=agg.total.sum()).relabel({agg.fields[0]: 'x'}))
Beispiel #26
0
def test_by():
    (chunk, chunk_expr), (agg, agg_expr) = \
            split(t, by(t.name, total=t.amount.sum()))

    assert chunk.schema == t.schema
    assert chunk_expr.isidentical(by(chunk.name, total=chunk.amount.sum()))

    assert not agg.iscolumn
    assert agg_expr.isidentical(by(agg.name, total=agg.total.sum()))
Beispiel #27
0
def test_by_mean():
    (chunk, chunk_expr), (agg, agg_expr) = \
            split(t, by(t.name, avg=t.amount.mean()))

    assert chunk_expr.isidentical(by(chunk.name,
                                        avg_total=chunk.amount.sum(),
                                        avg_count=chunk.amount.count()))

    assert agg_expr.isidentical(by(agg.name,
        avg=(1.0 * agg.avg_total.sum() / agg.avg_count.sum())))
Beispiel #28
0
def test_nd_chunk():
    c = Symbol('c', '4 * 4 * int32')

    (chunk, chunk_expr), (agg, agg_expr) = split(x, x.sum(), chunk=c)

    assert chunk.shape == (4, 4)
    assert chunk_expr.isidentical(chunk.sum(keepdims=True))

    assert agg.shape == (6, 4)
    assert agg_expr.isidentical(agg.sum())
Beispiel #29
0
def test_splittable_apply():
    def f(x):
        pass

    (chunk, chunk_expr), (agg, agg_expr) = \
            split(t, t.amount.apply(f, 'var * int', splittable=True))
    assert chunk_expr.isidentical(
            chunk.amount.apply(f, 'var * int', splittable=True))

    assert agg_expr.isidentical(agg)
Beispiel #30
0
def test_mean():
    (chunk, chunk_expr), (agg, agg_expr) = split(t, t.amount.mean())

    assert chunk.schema == t.schema
    assert chunk_expr.isidentical(summary(total=chunk.amount.sum(),
                                          count=chunk.amount.count(),
                                          keepdims=True))

    assert isrecord(agg.dshape.measure)
    assert agg_expr.isidentical(1.0 * agg.total.sum() / agg.count.sum())
Beispiel #31
0
def test_nd_chunk():
    c = symbol('c', '4 * 4 * int32')

    (chunk, chunk_expr), (agg, agg_expr) = split(x, x.sum(), chunk=c)

    assert chunk.shape == (4, 4)
    assert chunk_expr.isidentical(chunk.sum(keepdims=True))

    assert agg.shape == (6, 4)
    assert agg_expr.isidentical(agg.sum())
Beispiel #32
0
def test_splittable_apply():
    def f(x):
        pass

    (chunk, chunk_expr), (agg, agg_expr) = \
            split(t, t.amount.apply(f, 'var * int', splittable=True))
    assert chunk_expr.isidentical(
        chunk.amount.apply(f, 'var * int', splittable=True))

    assert agg_expr.isidentical(agg)
Beispiel #33
0
def test_nd_chunk_axis_args():
    c = Symbol('c', '4 * 4 * int32')

    (chunk, chunk_expr), (agg, agg_expr) = split(x, x.sum(axis=0), chunk=c)

    assert chunk.shape == (4, 4)
    assert chunk_expr.shape == (1, 4)
    assert chunk_expr.isidentical(chunk.sum(keepdims=True, axis=0))

    assert agg.shape == (6, 16)
    assert agg_expr.isidentical(agg.sum(axis=0))
Beispiel #34
0
def test_mean():
    (chunk, chunk_expr), (agg, agg_expr) = split(t, t.amount.mean())

    assert chunk.schema == t.schema
    assert chunk_expr.isidentical(
        summary(total=chunk.amount.sum(),
                count=chunk.amount.count(),
                keepdims=True))

    assert isrecord(agg.dshape.measure)
    assert agg_expr.isidentical(agg.total.sum() / agg.count.sum())
Beispiel #35
0
def test_sum_with_axis_argument():
    chunk = Symbol('chunk', '100 * 100 * {x: float32, y: float32}')
    (chunk, chunk_expr), (agg, agg_expr) = split(a,
                                                 a.x.sum(axis=0),
                                                 chunk=chunk)

    assert chunk.schema == a.schema
    assert agg_expr.dshape == a.x.sum(axis=0).dshape

    assert chunk_expr.isidentical(chunk.x.sum(axis=0, keepdims=True))
    assert agg_expr.isidentical(agg.sum(axis=0))
Beispiel #36
0
def test_by_mean():
    (chunk, chunk_expr), (agg, agg_expr) = \
            split(t, by(t.name, avg=t.amount.mean()))

    assert chunk_expr.isidentical(
        by(chunk.name,
           avg_total=chunk.amount.sum(),
           avg_count=chunk.amount.count()))

    assert agg_expr.isidentical(
        by(agg.name, avg=(agg.avg_total.sum() / agg.avg_count.sum())))
Beispiel #37
0
def test_std():
    (chunk, chunk_expr), (agg, agg_expr) = split(t, t.amount.std())

    assert chunk.schema == t.schema
    assert chunk_expr.isidentical(summary(x=chunk.amount.sum(),
                                          x2=(chunk.amount**2).sum(),
                                          n=chunk.amount.count(),
                                          keepdims=True))

    assert isrecord(agg.dshape.measure)
    assert agg_expr.isidentical(sqrt((agg.x2.sum() / (agg.n.sum() * 1.0)
                                   - (agg.x.sum() / (agg.n.sum() * 1.0))**2)))
Beispiel #38
0
def test_summary_with_mean():
    (chunk, chunk_expr), (agg, agg_expr) = split(t, summary(a=t.amount.count(),
                                                            b=t.id.mean() + 1))

    assert chunk.schema == t.schema
    assert chunk_expr.isidentical(summary(a=chunk.amount.count(),
                                          b_total=chunk.id.sum(),
                                          b_count=chunk.id.count(), keepdims=True))

    # assert not agg.schema == dshape('{a: int32, b: int32}')
    assert agg_expr.isidentical(summary(a=agg.a.sum(),
                                        b=(agg.b_total.sum() / agg.b_count.sum()) + 1))
Beispiel #39
0
def test_var():
    (chunk, chunk_expr), (agg, agg_expr) = split(t, t.amount.var())

    assert chunk.schema == t.schema
    assert chunk_expr.isidentical(
        summary(x=chunk.amount.sum(),
                x2=(chunk.amount**2).sum(),
                n=chunk.amount.count(),
                keepdims=True))

    assert isrecord(agg.dshape.measure)
    assert agg_expr.isidentical(
        (agg.x2.sum() / (agg.n.sum()) - (agg.x.sum() / (agg.n.sum()))**2))
Beispiel #40
0
def test_summary_with_mean():
    (chunk, chunk_expr), (agg, agg_expr) = split(
        t, summary(a=t.amount.count(), b=t.id.mean() + 1))

    assert chunk.schema == t.schema
    assert chunk_expr.isidentical(
        summary(a=chunk.amount.count(),
                b_total=chunk.id.sum(),
                b_count=chunk.id.count(),
                keepdims=True))

    # assert not agg.schema == dshape('{a: int32, b: int32}')
    expected = summary(a=agg.a.sum(),
                       b=(agg.b_total.sum() / agg.b_count.sum()) + 1)
    assert agg_expr.isidentical(expected)
Beispiel #41
0
def test_complex_summaries():
    t = symbol('t', '100 * {a: int, b: int}')
    (chunk, chunk_expr), (agg, agg_expr) = split(t, summary(q=t.a.mean(),
                                                            w=t.a.std(),
                                                            e=t.a.sum()))

    assert chunk_expr.isidentical(summary(e=chunk.a.sum(),
                                          q_count=chunk.a.count(),
                                          q_total=chunk.a.sum(),
                                          w_n=chunk.a.count(),
                                          w_x=chunk.a.sum(),
                                          w_x2=(chunk.a**2).sum(),
                                          keepdims=True))

    expected = summary(e=agg.e.sum(),
                       q=agg.q_total.sum() / agg.q_count.sum(),
                       w=sqrt((agg.w_x2.sum() / agg.w_n.sum())
                            - (agg.w_x.sum() / agg.w_n.sum())**2))
    assert agg_expr.isidentical(expected)
Beispiel #42
0
def test_complex_summaries():
    t = symbol('t', '100 * {a: int, b: int}')
    (chunk, chunk_expr), (agg, agg_expr) = split(
        t, summary(q=t.a.mean(), w=t.a.std(), e=t.a.sum()))

    assert chunk_expr.isidentical(
        summary(e=chunk.a.sum(),
                q_count=chunk.a.count(),
                q_total=chunk.a.sum(),
                w_n=chunk.a.count(),
                w_x=chunk.a.sum(),
                w_x2=(chunk.a**2).sum(),
                keepdims=True))

    expected = summary(e=agg.e.sum(),
                       q=agg.q_total.sum() / agg.q_count.sum(),
                       w=sqrt((agg.w_x2.sum() / agg.w_n.sum()) -
                              (agg.w_x.sum() / agg.w_n.sum())**2))
    assert agg_expr.isidentical(expected)
Beispiel #43
0
def test_embarassing_rowwise():
    (chunk, chunk_expr), (agg, agg_expr) = split(t, t.amount + 1)

    assert chunk_expr.isidentical(chunk.amount + 1)
    assert agg_expr.isidentical(agg)
Beispiel #44
0
def test_embarassing_like():
    (chunk, chunk_expr), (agg, agg_expr) = split(t, t.like(name='Alice*'))

    assert chunk_expr.isidentical(chunk.like(name='Alice*'))
    assert agg_expr.isidentical(agg)
Beispiel #45
0
def test_keepdims_equals_true_doesnt_mess_up_agg_shape():
    x = symbol('x', '10 * int')
    (chunk, chunk_expr), (agg, agg_expr) = split(x, x.sum(), keepdims=False)

    assert iscollection(agg.dshape)
Beispiel #46
0
def test_embarassing_selection():
    (chunk, chunk_expr), (agg, agg_expr) = split(t, t[t.amount > 0])

    assert chunk_expr.isidentical(chunk[chunk.amount > 0])
    assert agg_expr.isidentical(agg)
Beispiel #47
0
def test_embarassing_like():
    (chunk, chunk_expr), (agg, agg_expr) = split(t, t.like(name='Alice*'))

    assert chunk_expr.isidentical(chunk.like(name='Alice*'))
    assert agg_expr.isidentical(agg)
Beispiel #48
0
def test_keepdims_equals_true_doesnt_mess_up_agg_shape():
    x = symbol('x', '10 * int')
    (chunk, chunk_expr), (agg, agg_expr) = split(x, x.sum(), keepdims=False)

    assert iscollection(agg.dshape)
Beispiel #49
0
def test_embarassing_rowwise():
    (chunk, chunk_expr), (agg, agg_expr) = split(t, t.amount + 1)

    assert chunk_expr.isidentical(chunk.amount + 1)
    assert agg_expr.isidentical(agg)
Beispiel #50
0
def test_embarassing_selection():
    (chunk, chunk_expr), (agg, agg_expr) = split(t, t[t.amount > 0])

    assert chunk_expr.isidentical(chunk[chunk.amount > 0])
    assert agg_expr.isidentical(agg)