예제 #1
0
inc = lambda x: x + 1


reduction_exprs = [
    t['amount'].sum(),
    t['amount'].min(),
    t['amount'].max(),
    t['amount'].nunique(),
    t['name'].nunique(),
    t['amount'].count(),
    (t['amount'] > 150).any(),
    (t['amount'] > 150).all(),
    t['amount'].mean(),
    t['amount'].var(),
    summary(a=t.amount.sum(), b=t.id.count()),
    t['amount'].std()]


def test_spark_reductions(rdd):
    for expr in reduction_exprs:
        result = compute(expr, rdd)
        expected = compute(expr, data)
        if not result == expected:
            print(result)
            print(expected)
            if isinstance(result, float):
                assert abs(result - expected) < 0.001
            else:
                assert result == expected
예제 #2
0
def test_multicols_projection(rdd):
    result = compute(t[['amount', 'name']], rdd).collect()
    expected = [(100, 'Alice'), (200, 'Bob'), (50, 'Alice')]

    print(result)
    print(expected)

    assert result == expected


reduction_exprs = [
    t['amount'].sum(), t['amount'].min(), t['amount'].max(),
    t['amount'].nunique(), t['name'].nunique(), t['amount'].count(),
    (t['amount'] > 150).any(), (t['amount'] > 150).all(), t['amount'].mean(),
    t['amount'].var(),
    summary(a=t.amount.sum(), b=t.id.count()), t['amount'].std()
]


def test_reductions(rdd):
    for expr in reduction_exprs:
        result = compute(expr, rdd)
        expected = compute(expr, data)
        if not result == expected:
            print(result)
            print(expected)
            if isinstance(result, float):
                assert abs(result - expected) < 0.001
            else:
                assert result == expected