def test_spy(): # 'spy' performs an operation on the data streaming through the # pipeline, without changing what is seen downstream. An obvious # use of this would be to insert a 'spy(print)' at some point in # the pipeline to observe the data flow through that point. the_source = list(range(50, 60)) result = [] the_sink = df.sink(result.append) spied = [] the_spy = df.spy(spied.append) df.push(source=the_source, pipe=df.pipe(the_spy, the_sink)) assert spied == result == the_source
def test_filter(): # 'filter' can be used to eliminate data def the_predicate(n): return n % 2 odd = df.filter(the_predicate) the_source = list(range(20, 30)) result = [] the_sink = df.sink(result.append) df.push(source=the_source, pipe=df.pipe(odd, the_sink)) assert result == list(filter(the_predicate, the_source))
def test_push_futures_single(): the_source = list(range(100)) count = df.count() result = df.push(source=the_source, pipe=df.pipe(count.sink), result=count.future) assert result == len(the_source)
def test_simplest_pipeline(): # The simplest possible pipeline has one source directly connected # to one sink. # We avoid using a lazy source so that we can compare the result # with the input the_source = list(range(20)) # In this example the sink will simply collect the data it # receives, into a list. result = [] the_sink = df.sink(result.append) # Use 'push' to feed the source into the pipe. df.push(source=the_source, pipe=the_sink) assert result == the_source
def test_slice_close_all(close_all): the_source = list(range(20)) n_elements = 5 slice = df.slice(n_elements, close_all=close_all) result_branch = [] sink_branch = df.sink(result_branch.append) result_main = [] sink_main = df.sink(result_main.append) df.push(source=the_source, pipe=df.pipe(df.branch(slice, sink_branch), sink_main)) if close_all: assert result_branch == the_source[:n_elements] assert result_main == the_source[:n_elements] else: assert result_branch == the_source[:n_elements] assert result_main == the_source
def test_map(): # The pipelines start to become interesting when the data are # transformed in some way. 'map' transforms every item passing # through the pipe by applying the supplied operation. def the_operation(n): return n * n square = df.map(the_operation) the_source = list(range(1, 11)) result = [] the_sink = df.sink(result.append) df.push(source=the_source, pipe=square(the_sink)) assert result == list(map(the_operation, the_source))
def test_fork_implicit_pipes(): # Arguments can be pipes or tuples. # Tuples get implicitly converted into pipes the_source = list(range(10, 20)) add_1 = df.map(lambda x: 1 + x) implicit_pipe_collector = [] implicit_pipe_sink = df.sink(implicit_pipe_collector.append) explicit_pipe_collector = [] explicit_pipe_sink = df.sink(explicit_pipe_collector.append) df.push(source=the_source, pipe=df.fork((add_1, implicit_pipe_sink), df.pipe(add_1, explicit_pipe_sink))) assert implicit_pipe_collector == explicit_pipe_collector == [ 1 + x for x in the_source ]
def test_push_futures_tuple(): the_source = list(range(100)) count_all = df.count() count_odd = df.count() result = df.push(source=the_source, pipe=df.fork( count_all.sink, df.pipe(df.filter(lambda n: n % 2), count_odd.sink)), result=(count_odd.future, count_all.future)) all_count = len(the_source) odd_count = all_count // 2 assert result == (odd_count, all_count)
def test_pipe(): # The basic syntax requires any element of a pipeline to be passed # as argument to the one that precedes it. This looks strange to # the human reader, especially when using parametrized # components. 'pipe' allows construction of pipes from a sequence # of components. # Using 'pipe', 'test_map' could have been written like this: def the_operation(n): return n * n square = df.map(the_operation) the_source = list(range(1, 11)) result = [] the_sink = df.sink(result.append) df.push(source=the_source, pipe=df.pipe(square, the_sink)) assert result == list(map(the_operation, the_source))
def test_branch(): # 'branch', like 'spy', allows you to insert operations on a copy # of the stream at any point in a network. In contrast to 'spy' # (which accepts a single plain operation), 'branch' accepts an # arbitrary number of pipeline components, which it combines into # a pipeline. It provides a more convenient way of constructing # some graphs that would otherwise be constructed with 'fork'. # Some pipeline components c1 = [] C1 = df.sink(c1.append) c2 = [] C2 = df.sink(c2.append) e1 = [] E1 = df.sink(e1.append) e2 = [] E2 = df.sink(e2.append) A = df.map(lambda n: n + 1) B = df.map(lambda n: n * 2) D = df.map(lambda n: n * 3) # Two eqivalent networks, one constructed with 'fork' the other # with 'branch'. graph1 = df.pipe(A, df.fork(df.pipe(B, C1), df.pipe(D, E1))) graph2 = df.pipe(A, df.branch(B, C2), D, E2) # Feed the same data into the two networks. the_source = list(range(10, 50, 4)) df.push(source=the_source, pipe=graph1) df.push(source=the_source, pipe=graph2) # Confirm that both networks produce the same results. assert c1 == c2 assert e1 == e2
def test_push_futures_mapping(): count_all = df.count() count_odd = df.count() the_source = list(range(100)) result = df.push(source=the_source, pipe=df.fork( count_all.sink, df.pipe(df.filter(lambda n: n % 2), count_odd.sink)), result=dict(odd=count_odd.future, all=count_all.future)) all_count = len(the_source) assert result.odd == all_count // 2 assert result.all == all_count
def test_stop_when(): # 'stop_when' can be used to stop all branches of the network # immediately. countfuture, count = df.count() limit, step = 10, 2 import itertools result = df.push(source=itertools.count(start=0, step=step), pipe=df.fork(df.stop_when(lambda n: n == limit), count), result=(countfuture, )) assert result == (limit // step, )
def test_stateful_stop_when(): @df.coroutine_send def n_items_seen(n): yield # Will stop here on construction for _ in range(n): yield False yield True countfuture, count = df.count() import itertools limit, step = 10, 2 result = df.push(source=itertools.count(start=0, step=step), pipe=df.fork(df.stop_when(n_items_seen(limit)), count), result=(countfuture, )) assert result == (limit, )
def test_spy_count(): # count is a component that can be needed in the middle # of a pipeline. However, because it is a sink it needs # to be plugged into a spy. Thus, the component spy_count # provides a comfortable interface to access the future # and spy objects in a single line. the_source = list(range(20)) count = df.count() spy_count = df.spy_count() result = df.push(source=the_source, pipe=df.pipe(spy_count.spy, count.sink), result=dict(from_count=count.future, from_spy_count=spy_count.future)) assert result.from_count == result.from_spy_count == len(the_source)
def test_push_futures(): # 'push' provides a higher-level interface to using such futures: # it optionally accepts a tuple of futures, and returns a tuple of # their results count_all = df.count() count_odd = df.count() the_source = list(range(100)) result = df.push(source=the_source, pipe=df.fork( count_all.sink, df.pipe(df.filter(lambda n: n % 2), count_odd.sink)), result=(count_odd.future, count_all.future)) all_count = len(the_source) odd_count = all_count // 2 assert result == (odd_count, all_count)
def test_count_filter(): # count_filter provides a future/filter pair. # This is a simple interface to keep track of # how many entries satisfy the predicate and # how many are filtered out. the_source = list(range(21)) predicate = lambda n: n % 2 odd = df.count_filter(predicate) filtered = [] the_sink = df.sink(filtered.append) result = df.push(source=the_source, pipe=df.pipe(odd.filter, the_sink), result=odd.future) expected_result = list(filter(predicate, the_source)) assert filtered == expected_result assert result.n_passed == len(expected_result) assert result.n_failed == len(the_source) - len(expected_result)
def test_pipes_must_end_in_a_sink(): the_source = range(10) sinkless_pipe = df.map(abs) with raises(df.IncompletePipe): df.push(source=the_source, pipe=sinkless_pipe)