inc = lambda x: x + 1 reduction_exprs = [ t['amount'].sum(), t['amount'].min(), t['amount'].max(), t['amount'].nunique(), t['name'].nunique(), t['amount'].count(), (t['amount'] > 150).any(), (t['amount'] > 150).all(), t['amount'].mean(), t['amount'].var(), summary(a=t.amount.sum(), b=t.id.count()), t['amount'].std()] def test_spark_reductions(rdd): for expr in reduction_exprs: result = compute(expr, rdd) expected = compute(expr, data) if not result == expected: print(result) print(expected) if isinstance(result, float): assert abs(result - expected) < 0.001 else: assert result == expected
def test_multicols_projection(rdd): result = compute(t[['amount', 'name']], rdd).collect() expected = [(100, 'Alice'), (200, 'Bob'), (50, 'Alice')] print(result) print(expected) assert result == expected reduction_exprs = [ t['amount'].sum(), t['amount'].min(), t['amount'].max(), t['amount'].nunique(), t['name'].nunique(), t['amount'].count(), (t['amount'] > 150).any(), (t['amount'] > 150).all(), t['amount'].mean(), t['amount'].var(), summary(a=t.amount.sum(), b=t.id.count()), t['amount'].std() ] def test_reductions(rdd): for expr in reduction_exprs: result = compute(expr, rdd) expected = compute(expr, data) if not result == expected: print(result) print(expected) if isinstance(result, float): assert abs(result - expected) < 0.001 else: assert result == expected