Python pipeの例、dataflow.pipe Pythonの例

コード例 #1

0

ファイルを表示

ファイル: dataflow_test.py プロジェクト: jmalbos/dataflow

def test_chain_pipes():

    # Pipelines must end in sinks. If the last component of a pipe is
    # not a sink, the pipe may be used as a component in a bigger
    # pipeline, but it will be impossible to feed any data into it
    # until it is connected to some other component which ends in a
    # sink.

    # Some basic pipeline components
    s1 = []
    sink1 = df.sink(s1.append)
    s2 = []
    sink2 = df.sink(s2.append)

    A = df.map(lambda n: n + 1)
    B = df.map(lambda n: n * 2)
    C = df.map(lambda n: n - 3)

    # Two different ways of creating equivalent networks: one of them
    # groups the basic components into sub-pipes
    graph1 = df.pipe(A, B, C, sink1)
    graph2 = df.pipe(df.pipe(A, B), df.pipe(C, sink2))

    # Feed the same data into the two networks
    the_source = list(range(40))

    df.push(source=the_source, pipe=graph1)
    df.push(source=the_source, pipe=graph2)

    # Confirm that both networks produce the same results.
    assert s1 == s2

コード例 #2

0

ファイルを表示

ファイル: dataflow_test.py プロジェクト: jmalbos/dataflow

def test_reuse_terminated_pipes():

    # Sink-terminated pipes are also reusable, but do note that if
    # such components are reused in the same graph, the sink at the
    # end of the component will receive inputs from more than one
    # branch: they share the sink; the branches are joined.

    def add(n):
        return df.map(lambda x: x + n)

    A, B, C, X, Y, Z = 1, 2, 3, 4, 5, 6

    collected_by_sinks = []
    sink1 = df.sink(collected_by_sinks.append)

    component = df.pipe(add(X), add(Y), add(Z), sink1)

    graph = df.pipe(add(A), df.branch(add(B), component), add(C), component)

    the_source = list(range(10, 20))
    df.push(source=the_source, pipe=graph)

    route1 = [n + A + B + X + Y + Z for n in the_source]
    route2 = [n + A + C + X + Y + Z for n in the_source]

    def intercalate(a, b):
        return [x for pair in zip(a, b) for x in pair]

    assert collected_by_sinks == intercalate(route1, route2)

コード例 #3

0

ファイルを表示

ファイル: dataflow_test.py プロジェクト: jmalbos/dataflow

def test_reuse_unterminated_pipes():

    # Open-ended pipes must be connected to a sink before they can
    # receive any input. Open-ended pipes are reusable components: any
    # such pipe can be used in different points in the same or
    # different networks. They are completely independent.

    def add(n):
        return df.map(lambda x: x + n)

    A, B, C, D, E, X, Y, Z = 1, 2, 3, 4, 5, 6, 7, 8

    component = df.pipe(add(X), add(Y), add(Z))

    s1 = []
    sink1 = df.sink(s1.append)
    s2 = []
    sink2 = df.sink(s2.append)

    # copmonent is being reused twice in this network
    graph = df.pipe(add(A), df.branch(add(B), component, add(C), sink1),
                    add(D), component, add(E), sink2)

    the_source = list(range(10, 20))
    df.push(source=the_source, pipe=graph)

    assert s1 == [n + A + B + X + Y + Z + C for n in the_source]
    assert s2 == [n + A + D + X + Y + Z + E for n in the_source]

コード例 #4

0

ファイルを表示

ファイル: dataflow_test.py プロジェクト: jmalbos/dataflow

def test_reduce():

    # 'reduce' provides a high-level way of creating future-sinks such
    # as 'count'

    # Make a component just like df.sum
    from operator import add
    total = df.reduce(add, initial=0)

    # Create two instances of it, which will be applied to different
    # (forked) sub-streams in the network
    total_all = total()
    total_odd = total()

    N = 15
    the_source = list(range(N))

    result = df.push(source=the_source,
                     pipe=df.fork(
                         total_all.sink,
                         df.pipe(df.filter(lambda n: n % 2), total_odd.sink)),
                     result=(total_all.future, total_odd.future))

    sum_all, sum_odd = sum(the_source), (N // 2)**2
    assert result == (sum_all, sum_odd)

コード例 #5

0

ファイルを表示

ファイル: dataflow_test.py プロジェクト: jmalbos/dataflow

def test_push_futures_single():
    the_source = list(range(100))
    count = df.count()

    result = df.push(source=the_source,
                     pipe=df.pipe(count.sink),
                     result=count.future)

    assert result == len(the_source)

コード例 #6

0

ファイルを表示

ファイル: dataflow_test.py プロジェクト: jmalbos/dataflow

def test_implicit_element_picking_in_pipe():
    the_source_elements = list(range(10))
    the_source = (dict(x=i, y=-i) for i in the_source_elements)

    result = []
    the_sink = df.sink(result.append)
    df.push(source=the_source, pipe=df.pipe("x", the_sink))

    assert result == the_source_elements

コード例 #7

0

ファイルを表示

ファイル: dataflow_test.py プロジェクト: jmalbos/dataflow

def test_slice_downstream(spec):

    the_source = list('abcdefghij')
    result = []
    the_sink = df.sink(result.append)

    df.push(source=the_source, pipe=df.pipe(df.slice(*spec), the_sink))

    specslice = slice(*spec)
    assert result == the_source[specslice]
    assert result == the_source[specslice.start:specslice.stop:specslice.step]

コード例 #8

0

ファイルを表示

ファイル: dataflow_test.py プロジェクト: jmalbos/dataflow

def test_map_with_namespace_args_out():
    letters = string.ascii_lowercase
    the_source = (dict(i=i, x=x) for i, x in enumerate(letters))
    make_upper_case = df.map(str.upper, args="x", out="upper_x")

    result = []
    the_sink = df.sink(result.append, args="upper_x")

    df.push(source=the_source, pipe=df.pipe(make_upper_case, the_sink))

    assert result == list(letters.upper())

コード例 #9

0

ファイルを表示

ファイル: dataflow_test.py プロジェクト: jmalbos/dataflow

def test_filter_with_namespace():
    vowels = "aeiou"
    the_source = (dict(i=i, x=x) for i, x in enumerate(string.ascii_lowercase))
    vowel = df.filter(lambda s: s in vowels, args="x")

    result = []
    the_sink = df.sink(result.append, args="x")

    df.push(source=the_source, pipe=df.pipe(vowel, the_sink))

    assert result == list(vowels)

コード例 #10

0

ファイルを表示

ファイル: dataflow_test.py プロジェクト: jmalbos/dataflow

def test_branch_closes_sideways():
    the_source = range(10)
    branch_result = []
    the_branch_sink = df.sink(branch_result.append)
    main_result = []
    the_main_sink = df.sink(main_result.append)

    df.push(source=the_source,
            pipe=df.pipe(df.branch(the_branch_sink), the_main_sink))

    with raises(StopIteration):
        the_branch_sink.send(99)

コード例 #11

0

ファイルを表示

ファイル: dataflow_exhaustive_test.py プロジェクト: jmalbos/dataflow

def test_string_to_pick():

    # string_to_pick creates a pipe component that picks
    # an item from the namespace and pushes it through the pipe

    the_source_elements = list(range(10))
    the_source          = (dict(x=i**2, y=i) for i in the_source_elements)

    result = []; the_sink = df.sink(result.append)
    df.push(source = the_source,
            pipe   = df.pipe(df._string_to_pick("y"), the_sink))

    assert result == the_source_elements

コード例 #12

0

ファイルを表示

ファイル: dataflow_test.py プロジェクト: jmalbos/dataflow

def test_implicit_element_picking_in_branch():
    the_source_elements = list(range(10))
    the_source = (dict(x=i, y=-i) for i in the_source_elements)

    left = []
    left_sink = df.sink(left.append)
    right = []
    right_sink = df.sink(right.append)

    df.push(source=the_source,
            pipe=df.pipe(df.branch("x", left_sink), right_sink))

    assert left == [-i["y"] for i in right] == the_source_elements

コード例 #13

0

ファイルを表示

ファイル: dataflow_test.py プロジェクト: jmalbos/dataflow

def test_map_with_namespace_item():

    # item replaces the input with the output

    letters = string.ascii_lowercase
    the_source = (dict(i=i, x=x) for i, x in enumerate(letters))
    make_upper_case = df.map(str.upper, item="x")

    result = []
    the_sink = df.sink(result.append, args="x")

    df.push(source=the_source, pipe=df.pipe(make_upper_case, the_sink))

    assert result == list(letters.upper())

コード例 #14

0

ファイルを表示

ファイル: dataflow_test.py プロジェクト: jmalbos/dataflow

def test_push_futures_tuple():
    the_source = list(range(100))
    count_all = df.count()
    count_odd = df.count()

    result = df.push(source=the_source,
                     pipe=df.fork(
                         count_all.sink,
                         df.pipe(df.filter(lambda n: n % 2), count_odd.sink)),
                     result=(count_odd.future, count_all.future))

    all_count = len(the_source)
    odd_count = all_count // 2
    assert result == (odd_count, all_count)

コード例 #15

0

ファイルを表示

ファイル: dataflow_test.py プロジェクト: jmalbos/dataflow

def test_branch():

    # 'branch', like 'spy', allows you to insert operations on a copy
    # of the stream at any point in a network. In contrast to 'spy'
    # (which accepts a single plain operation), 'branch' accepts an
    # arbitrary number of pipeline components, which it combines into
    # a pipeline. It provides a more convenient way of constructing
    # some graphs that would otherwise be constructed with 'fork'.

    # Some pipeline components
    c1 = []
    C1 = df.sink(c1.append)
    c2 = []
    C2 = df.sink(c2.append)
    e1 = []
    E1 = df.sink(e1.append)
    e2 = []
    E2 = df.sink(e2.append)

    A = df.map(lambda n: n + 1)
    B = df.map(lambda n: n * 2)
    D = df.map(lambda n: n * 3)

    # Two eqivalent networks, one constructed with 'fork' the other
    # with 'branch'.
    graph1 = df.pipe(A, df.fork(df.pipe(B, C1), df.pipe(D, E1)))

    graph2 = df.pipe(A, df.branch(B, C2), D, E2)

    # Feed the same data into the two networks.
    the_source = list(range(10, 50, 4))
    df.push(source=the_source, pipe=graph1)
    df.push(source=the_source, pipe=graph2)

    # Confirm that both networks produce the same results.
    assert c1 == c2
    assert e1 == e2

コード例 #16

0

ファイルを表示

ファイル: dataflow_test.py プロジェクト: jmalbos/dataflow

def test_longer_pipeline():

    # Pipelines can have arbitrary lengths

    the_source = list(range(1, 11))

    result = []
    the_sink = df.sink(result.append)

    df.push(source=the_source,
            pipe=df.pipe(df.map(lambda n: n + 1), df.map(lambda n: n * 2),
                         df.map(lambda n: n - 3), df.map(lambda n: n / 4),
                         the_sink))

    assert result == [(((n + 1) * 2) - 3) / 4 for n in the_source]

コード例 #17

0

ファイルを表示

ファイル: dataflow_test.py プロジェクト: jmalbos/dataflow

def test_push_futures_mapping():
    count_all = df.count()
    count_odd = df.count()

    the_source = list(range(100))

    result = df.push(source=the_source,
                     pipe=df.fork(
                         count_all.sink,
                         df.pipe(df.filter(lambda n: n % 2), count_odd.sink)),
                     result=dict(odd=count_odd.future, all=count_all.future))

    all_count = len(the_source)
    assert result.odd == all_count // 2
    assert result.all == all_count

コード例 #18

0

ファイルを表示

ファイル: dataflow_test.py プロジェクト: jmalbos/dataflow

def test_filter():

    # 'filter' can be used to eliminate data

    def the_predicate(n):
        return n % 2

    odd = df.filter(the_predicate)

    the_source = list(range(20, 30))

    result = []
    the_sink = df.sink(result.append)

    df.push(source=the_source, pipe=df.pipe(odd, the_sink))

    assert result == list(filter(the_predicate, the_source))

コード例 #19

0

ファイルを表示

ファイル: dataflow_test.py プロジェクト: jmalbos/dataflow

def test_spy():

    # 'spy' performs an operation on the data streaming through the
    # pipeline, without changing what is seen downstream. An obvious
    # use of this would be to insert a 'spy(print)' at some point in
    # the pipeline to observe the data flow through that point.

    the_source = list(range(50, 60))

    result = []
    the_sink = df.sink(result.append)
    spied = []
    the_spy = df.spy(spied.append)

    df.push(source=the_source, pipe=df.pipe(the_spy, the_sink))

    assert spied == result == the_source

コード例 #20

0

ファイルを表示

ファイル: dataflow_test.py プロジェクト: jmalbos/dataflow

def test_slice_close_all(close_all):
    the_source = list(range(20))
    n_elements = 5
    slice = df.slice(n_elements, close_all=close_all)

    result_branch = []
    sink_branch = df.sink(result_branch.append)
    result_main = []
    sink_main = df.sink(result_main.append)

    df.push(source=the_source,
            pipe=df.pipe(df.branch(slice, sink_branch), sink_main))

    if close_all:
        assert result_branch == the_source[:n_elements]
        assert result_main == the_source[:n_elements]
    else:
        assert result_branch == the_source[:n_elements]
        assert result_main == the_source

コード例 #21

0

ファイルを表示

ファイル: dataflow_test.py プロジェクト: jmalbos/dataflow

def test_spy_count():

    # count is a component that can be needed in the middle
    # of a pipeline. However, because it is a sink it needs
    # to be plugged into a spy. Thus, the component spy_count
    # provides a comfortable interface to access the future
    # and spy objects in a single line.

    the_source = list(range(20))

    count = df.count()
    spy_count = df.spy_count()

    result = df.push(source=the_source,
                     pipe=df.pipe(spy_count.spy, count.sink),
                     result=dict(from_count=count.future,
                                 from_spy_count=spy_count.future))

    assert result.from_count == result.from_spy_count == len(the_source)

コード例 #22

0

ファイルを表示

def test_push_futures():

    # 'push' provides a higher-level interface to using such futures:
    # it optionally accepts a tuple of futures, and returns a tuple of
    # their results

    count_all = df.count()
    count_odd = df.count()

    the_source = list(range(100))

    result = df.push(source=the_source,
                     pipe=df.fork(
                         count_all.sink,
                         df.pipe(df.filter(lambda n: n % 2), count_odd.sink)),
                     result=(count_odd.future, count_all.future))

    all_count = len(the_source)
    odd_count = all_count // 2
    assert result == (odd_count, all_count)

コード例 #23

0

ファイルを表示

ファイル: dataflow_test.py プロジェクト: jmalbos/dataflow

def test_fork_implicit_pipes():

    # Arguments can be pipes or tuples.
    # Tuples get implicitly converted into pipes

    the_source = list(range(10, 20))
    add_1 = df.map(lambda x: 1 + x)

    implicit_pipe_collector = []
    implicit_pipe_sink = df.sink(implicit_pipe_collector.append)
    explicit_pipe_collector = []
    explicit_pipe_sink = df.sink(explicit_pipe_collector.append)

    df.push(source=the_source,
            pipe=df.fork((add_1, implicit_pipe_sink),
                         df.pipe(add_1, explicit_pipe_sink)))

    assert implicit_pipe_collector == explicit_pipe_collector == [
        1 + x for x in the_source
    ]

コード例 #24

0

ファイルを表示

ファイル: dataflow_test.py プロジェクト: jmalbos/dataflow

def test_pipe():

    # The basic syntax requires any element of a pipeline to be passed
    # as argument to the one that precedes it. This looks strange to
    # the human reader, especially when using parametrized
    # components. 'pipe' allows construction of pipes from a sequence
    # of components.

    # Using 'pipe', 'test_map' could have been written like this:

    def the_operation(n):
        return n * n

    square = df.map(the_operation)

    the_source = list(range(1, 11))

    result = []
    the_sink = df.sink(result.append)

    df.push(source=the_source, pipe=df.pipe(square, the_sink))

    assert result == list(map(the_operation, the_source))

コード例 #25

0

ファイルを表示

ファイル: dataflow_test.py プロジェクト: jmalbos/dataflow

def test_count_filter():

    # count_filter provides a future/filter pair.
    # This is a simple interface to keep track of
    # how many entries satisfy the predicate and
    # how many are filtered out.

    the_source = list(range(21))
    predicate = lambda n: n % 2

    odd = df.count_filter(predicate)
    filtered = []
    the_sink = df.sink(filtered.append)

    result = df.push(source=the_source,
                     pipe=df.pipe(odd.filter, the_sink),
                     result=odd.future)

    expected_result = list(filter(predicate, the_source))

    assert filtered == expected_result
    assert result.n_passed == len(expected_result)
    assert result.n_failed == len(the_source) - len(expected_result)

コード例 #26

0

ファイルを表示

ファイル: dataflow_exhaustive_test.py プロジェクト: jmalbos/dataflow

from pytest import mark
parametrize = mark.parametrize

import dataflow as df


@parametrize("component",
             (df.map   (lambda x: x)    ,
              df.filter(lambda x: x > 0),
              df.sink  (print)          ,
              df.branch(df.sink(print)) ,
              df.pipe  (df.map(abs))    ))
def test_string_to_pick_ignores_components(component):
    assert component is df._string_to_pick(component)


def test_string_to_pick():

    # string_to_pick creates a pipe component that picks
    # an item from the namespace and pushes it through the pipe

    the_source_elements = list(range(10))
    the_source          = (dict(x=i**2, y=i) for i in the_source_elements)

    result = []; the_sink = df.sink(result.append)
    df.push(source = the_source,
            pipe   = df.pipe(df._string_to_pick("y"), the_sink))

    assert result == the_source_elements