def test_update_dict(): transform = itemsetter( dict(a=chained(op.itemgetter('i'), lambda x: 2 * x)), b=chained(op.itemgetter('i'), lambda x: 3 * x), ) assert transform(dict(i=4)) == dict(i=4, a=8, b=12)
def test_checkpoints__rewrites(): """ Test that only parts that were changed are executed if a pipeline is changed. """ _checkpoints = {} calls = {} transform = chained( _count(calls, 'step 1', lambda x: x * 2), checkpoint(target=_checkpoints), _count(calls, 'step 2', lambda x: x - 3), checkpoint(target=_checkpoints), _count(calls, 'step 3', lambda x: x), ) assert apply(transform, 5, rewrites=[add_checkpoints]) == 7 assert calls == {'step 1': 1, 'step 2': 1, 'step 3': 1} assert apply(transform, 5, rewrites=[add_checkpoints]) == 7 assert calls == {'step 1': 1, 'step 2': 1, 'step 3': 2} transform = chained( _count(calls, 'step 1', lambda x: x * 2), checkpoint(target=_checkpoints), _count(calls, 'alt-step 2', lambda x: x), checkpoint(target=_checkpoints), _count(calls, 'step 3', lambda x: x), ) assert apply(transform, 5, rewrites=[add_checkpoints]) == 10 assert calls == {'step 1': 1, 'step 2': 1, 'alt-step 2': 1, 'step 3': 3} assert apply(transform, 5, rewrites=[add_checkpoints]) == 10 assert calls == {'step 1': 1, 'step 2': 1, 'alt-step 2': 1, 'step 3': 4}
def test_chained__example_iter(): transform = chained(*chained( lambda a: 2 * a, lambda a: a - 3 )) assert transform(5) == 7
def test_flowly_tz_update_dict(): obj = dict(l=db.from_sequence([1, 2, 3, 4], npartitions=3)) transform = itemsetter( # varargs are also allowed dict(max=chained(op.itemgetter('l'), max)), min=chained(op.itemgetter('l'), min), sum=chained(op.itemgetter('l'), sum), ) actual = apply(transform, obj).compute() assert actual == dict(l=[1, 2, 3, 4], min=1, max=4, sum=10)
def test_pipeline_example(): from functools import reduce import operator as op data = range(100) result1 = math.sqrt( reduce( op.add, builtins.map(lambda x: x**2.0, builtins.filter( lambda x: x % 2 == 0, data, )))) from toolz.curried import filter, map, reduce from flowly.tz import chained transform = chained( filter(lambda x: x % 2 == 0), map(lambda x: x**2.0), reduce(op.add), math.sqrt, ) result2 = transform(data) assert result1 == result2
def test_pipeline_example(): from functools import reduce import operator as op data = range(100) result1 = math.sqrt( reduce( op.add, builtins.map( lambda x: x ** 2.0, builtins.filter( lambda x: x % 2 == 0, data, ) ) ) ) from toolz.curried import filter, map, reduce from flowly.tz import chained transform = chained( filter(lambda x: x % 2 == 0), map(lambda x: x ** 2.0), reduce(op.add), math.sqrt, ) result2 = transform(data) assert result1 == result2
def test_dags(executor): # build dags by using itemgetter and dicts scope = dict( a=db.from_sequence(range(0, 10), npartitions=3), b=db.from_sequence(range(10, 20), npartitions=3), c=db.from_sequence(range(20, 30), npartitions=3), ) graph = chained( apply_concat([ chained(op.itemgetter('a'), sum, seq), chained(op.itemgetter('b'), sum, seq), chained(op.itemgetter('c'), sum, seq), ]), apply_concat([ chained(max, seq), chained(min, seq), chained(sum, seq), ])) actual = executor(graph, scope) assert sorted(actual) == sorted([ sum(range(20, 30)), sum(range(0, 10)), sum(range(0, 30)), ])
def test_dags(executor): # build dags by using itemgetter and dicts scope = dict( a=db.from_sequence(range(0, 10), npartitions=3), b=db.from_sequence(range(10, 20), npartitions=3), c=db.from_sequence(range(20, 30), npartitions=3), ) graph = chained( apply_concat([ chained(op.itemgetter('a'), sum, seq), chained(op.itemgetter('b'), sum, seq), chained(op.itemgetter('c'), sum, seq), ]), apply_concat([ chained(max, seq), chained(min, seq), chained(sum, seq), ]) ) actual = executor(graph, scope) assert sorted(actual) == sorted([ sum(range(20, 30)), sum(range(0, 10)), sum(range(0, 30)), ])
def test_checkpoints__repr(): """ Test that only parts that were changed are executed if a pipeline is changed. """ _checkpoints = {} transform = add_checkpoints( chained(lambda x: x * 2, checkpoint(target=_checkpoints), lambda x: x)) repr(transform)
def test_flowly_kv_transform__chained(executor): actual = executor( kv_transform(chained( map(lambda i: 2 * i), map(lambda i: 5 * i), ), ), [(i % 2, i) for i in range(20)], npartitions=10, ) assert sorted(actual) == sorted([(i % 2, 10 * i) for i in range(20)])
def test_flowly_kv_transform__collect(executor): actual = executor( chained(collect, kv_valmap(sorted)), [(i % 2, i) for i in [1, 2, 3, 4, 5, 6, 7]], npartitions=3, ) assert sorted(actual) == sorted([ (0, [2, 4, 6]), (1, [1, 3, 5, 7]), ])
def test_checkpoints__repr(): """ Test that only parts that were changed are executed if a pipeline is changed. """ _checkpoints = {} transform = add_checkpoints(chained( lambda x: x * 2, checkpoint(target=_checkpoints), lambda x: x )) repr(transform)
def test_checkpoints__single_no_rewrite(): _checkpoints = {} calls = {} transform = chained(_count(calls, 'step 1', lambda x: x * 2), checkpoint(target=_checkpoints), _count(calls, 'step 2', lambda x: x - 3)) assert apply(transform, 5) == 7 assert calls == {'step 1': 1, 'step 2': 1} assert apply(transform, 5) == 7 assert calls == {'step 1': 2, 'step 2': 2}
def test_checkpoint__not_rewritten(): """checkpoints are currently ignored. """ _checkpoints = {} transform = chained( map(lambda x: x * 2), checkpoint(target=_checkpoints), map(lambda x: x - 3), ) seq = [1, 2, 3, 4, 5] actual = apply_to_local(transform, seq, npartitions=3) assert actual == [-1, 1, 3, 5, 7]
def test_checkpoints__single_no_rewrite(): _checkpoints = {} calls = {} transform = chained( _count(calls, 'step 1', lambda x: x * 2), checkpoint(target=_checkpoints), _count(calls, 'step 2', lambda x: x - 3) ) assert apply(transform, 5) == 7 assert calls == {'step 1': 1, 'step 2': 1} assert apply(transform, 5) == 7 assert calls == {'step 1': 2, 'step 2': 2}
def test_checkpoints__single(): _checkpoints = {} calls = {} transform = chained(_count(calls, 'step 1', lambda x: x * 2), checkpoint(target=_checkpoints), _count(calls, 'step 2', lambda x: x - 3)) assert apply(transform, 5, rewrites=[add_checkpoints]) == 7 assert calls == {'step 1': 1, 'step 2': 1} assert apply(transform, 5, rewrites=[add_checkpoints]) == 7 assert calls == {'step 1': 1, 'step 2': 2} assert apply(transform, 5, rewrites=[add_checkpoints]) == 7 assert calls == {'step 1': 1, 'step 2': 3} _checkpoints.clear() assert apply(transform, 5, rewrites=[add_checkpoints]) == 7 assert calls == {'step 1': 2, 'step 2': 4}
def test_checkpoints__single(): _checkpoints = {} calls = {} transform = chained( _count(calls, 'step 1', lambda x: x * 2), checkpoint(target=_checkpoints), _count(calls, 'step 2', lambda x: x - 3) ) assert apply(transform, 5, rewrites=[add_checkpoints]) == 7 assert calls == {'step 1': 1, 'step 2': 1} assert apply(transform, 5, rewrites=[add_checkpoints]) == 7 assert calls == {'step 1': 1, 'step 2': 2} assert apply(transform, 5, rewrites=[add_checkpoints]) == 7 assert calls == {'step 1': 1, 'step 2': 3} _checkpoints.clear() assert apply(transform, 5, rewrites=[add_checkpoints]) == 7 assert calls == {'step 1': 2, 'step 2': 4}
def test_chained__composition(): transform = chained(lambda a: a * 2) + chained(lambda a: a - 3) assert transform(5) == 7
def test_checkpoints__no_checkpoints(): transform = chained(lambda x: x * 2, lambda x: x - 3) assert apply(transform, 5, rewrites=[add_checkpoints]) == 7
def test_flowly_tz_chained(executor): actual = executor(chained(it.chain.from_iterable, sum), [[1, 2, 3], [4, 5, 6], [7, 8, 9]], npartitions=3) assert actual == sum(range(1, 10))
def test_checkpoints__empty(): transform = chained() assert apply(transform, 5, rewrites=[add_checkpoints]) == 5
def test_chained__repr(): repr(chained(lambda a: 2 * a, lambda a: a - 3))
def test_chained__hash(): hash_1 = functional_hash(chained(lambda a: 2 * a, lambda a: a - 3)) hash_2 = functional_hash(chained(lambda a: 2 * a, lambda a: a - 3)) assert hash_1 == hash_2
def test_chained__example_iter(): transform = chained(*chained(lambda a: 2 * a, lambda a: a - 3)) assert transform(5) == 7
def test_chained__repr(): repr(chained( lambda a: 2 * a, lambda a: a - 3 ))