コード例 #1
0
    def test_producer_consume_2_processes_inline(self):
        data = [1, 2, 3, 4, 5]
        workflow = Iterable(data) | Parallelize(two_split) | (
            Map(add100) | Map(add100)) | Join() | StoreAndPickle()
        workflow.run()

        #workflow ref to StoreAndPickle() instace that is the only leaf of the DAG
        actual = workflow.load()
        #need to sort result because with symmetric parallelims order is not guaranteed
        self.assertEqual(sorted(actual), [d + 200 for d in data])
コード例 #2
0
    def test_producer_map_consume_with_3_process_plus_2_braches(self):
        data = [1, 2, 3, 4, 5]

        workflow = Iterable(data)
        sync_brach = workflow | StoreAndPickle()
        async_branch = workflow | SpawnThread() | Map(add100)
        async_branch1 = async_branch | StoreAndPickle()
        async_branch2 = async_branch | Map(add100) | SpawnThread() | StoreAndPickle()
        workflow.run()

        self.assertEqual(sync_brach.load(), data)
        self.assertEqual(async_branch1.load(), [d+100 for d in data])
        self.assertEqual(async_branch2.load(), [d+200 for d in data])
コード例 #3
0
ファイル: test_dag.py プロジェクト: satishgoda/pypelines
    def test_iter_filter_map_list(self):
        producer = Iterable([1, 2, 3])
        flt = Filter(lambda x: x > 1)
        map = Map(lambda x: x + 10)
        tolist = AsList()

        producer.add_child(flt)
        flt.add_child(map)
        map.add_child(tolist)

        producer.run()

        self.assertEqual(tolist.list, [12, 13])
コード例 #4
0
ファイル: gil.py プロジェクト: satishgoda/pypelines
def main():
    """Demostrate that for I/O-bound operation python interpreter releases the GIL.
    The idea is to implement two operations using pypelines: one slow I/O-bound communication
    followed by one slow CPU-bound computation.

    To emulate an inefficient I/O-bound communication we will use an HTTP GET to an external
    web server which will compute very inefficiently Fibonacci value fib(n):
    HTTPClient("http://127.0.0.1:12345/fib/32").
    To run the web server execute 'python fib_web.py' in a second shell.

    To emulate a CPU-bound long running calculation we will re-compute fib(n) with the same
    value of n: Map(compute_fib).

    Clearly this is a useless example, but it is easy to note that:

    1. if we run this pypeline synchronously, we get as total time approximately
    the sum of the duration of the two operations.

    2. if we run this pypeline asynchronously using asymmetric parallelism  with one thread,
    we can observe that the time is almost half of the synchronous case.

    This means that while the HTTP client is waiting for the results (the web server takes a while
    to compute fib(n)), the Python interpreter release the GIL and the thread running
    compute_fib start to execute."""

    #Execute: python fib_web.py

    print("Run Pypelines synchronously...")
    workflow = Repeat(lambda x: x > 10) | HTTPClient(
        "http://127.0.0.1:12345/fib/32") | Map(compute_fib) | StdOut()
    t1 = time.time()
    workflow.run()
    t2 = time.time()
    print("Took " + str(t2 - t1) + " seconds.")

    print(
        "Now, run Pypelines asynchronously using asymmetric parallelims with one thread, should be faster..."
    )
    workflow = Repeat(lambda x: x > 10) | HTTPClient(
        "http://127.0.0.1:12345/fib/32") | SpawnThread() | Map(
            compute_fib) | StdOut()
    t1 = time.time()
    workflow.run()
    t2 = time.time()
    print("Took " + str(t2 - t1) + " seconds.")
コード例 #5
0
    def test_producer_map_consume_with_3_process(self):
        data = [1, 2, 3, 4, 5]

        workflow = Iterable(data) | SpawnThread() | Map(add100) | SpawnThread() | StoreAndPickle()
        workflow.run()

        #workflow ref to StoreAndPickle() instace that is the only leaf of the DAG
        actual = workflow.load()
        self.assertEqual(actual, [d+100 for d in data])
    def test_lambda_with_func_import(self):
        data = [1, 2, 3, 4, 5]

        workflow = Iterable(data) | SpawnProcess() | Map(lambda x: add100(x)) | StoreAndPickle()
        workflow.run()

        #workflow ref to StoreAndPickle() instace that is the only leaf of the DAG
        actual = workflow.load()
        self.assertEqual(actual, [d+100 for d in data])
コード例 #7
0
    def test_query_search_for_leaf_on_two_branches(self):
        workflow = Iterable(range(10))
        branch1 = workflow | Map(lambda x: x + 1) | StdOut()
        branch2 = workflow | Filter(lambda x: x > 5) | Assert(
            self, [6, 7, 8, 9])

        self.assertEqual(
            workflow.query("Iterable/Map/StdOut").name(), "StdOut")
        self.assertEqual(
            workflow.query("Iterable/Filter/Assert").name(), "Assert")
コード例 #8
0
    def test_producer_consume_10_processes(self):
        data = range(20)
        parallel = Map(add100)
        workflow = Iterable(data) | Parallelize(
            ten_split) | parallel | Join() | StoreAndPickle()
        workflow.run()

        #workflow ref to StoreAndPickle() instace that is the only leaf of the DAG
        actual = workflow.load()
        #need to sort result because with symmetric parallelims order is not guaranteed
        self.assertEqual(sorted(actual), [d + 100 for d in data])
    def test_producer_map_consume_with_2_process(self):
        data = [1, 2, 3, 4, 5]

        #CAUTION!!!!
        #Cannot use lambda (e.g. Map(lambda x: x+100)) yet due to pickle problem in multiprocessing lib
        #possible solution is to hook the import of pickle im multiprocessing lib
        #and substitute with dill.
        #See: #http://chimera.labs.oreilly.com/books/1230000000393/ch10.html#_solution_180
        workflow = Iterable(data) | SpawnProcess() | Map(lambda x: x+100) | StoreAndPickle()
        workflow.run()

        #workflow ref to StoreAndPickle() instace that is the only leaf of the DAG
        actual = workflow.load()
        self.assertEqual(actual, [d+100 for d in data])
コード例 #10
0
 def test_dinasty_third_level(self):
     workflow = Iterable(range(1000)) | Map(lambda x: x + 1) | StdOut()
     self.assertEqual(workflow.dinasty(), "Iterable/Map/StdOut")
コード例 #11
0
ファイル: wordcount.py プロジェクト: satishgoda/pypelines
from pypelines import Map, Filter, Sum, StdOut
from pypelines.io import HTTPClient

workflow = HTTPClient('http://www.gutenberg.org/cache/epub/1232/pg1232.txt',
                      readlines=True) | Filter(lambda line: line != "") | Map(
                          lambda line: line.split(' ')) | Map(
                              lambda words: len(words)) | Sum() | StdOut()
workflow.run()
コード例 #12
0
ファイル: wordcount2.py プロジェクト: satishgoda/pypelines
from pypelines import Map, Filter, Sum, StdOut, CountByKey, FlatMap, Sort, Head
from pypelines.io import HTTPClient, TextFile

workflow = HTTPClient('http://www.gutenberg.org/cache/epub/1232/pg1232.txt',
                      readlines=True) | Filter(lambda line: line != "")
savefile = workflow | TextFile("macchiavelli.txt")
wordcount = workflow | Map(lambda line: line.split(' ')) | Map(
    lambda words: len(words)) | Sum() | StdOut()
histogram = workflow | FlatMap(lambda line: line.split(' ')) | Filter(
    lambda word: word != "") | Map(lambda word:
                                   (word, 1)) | CountByKey() | Sort(
                                       key_func=lambda data: data[1],
                                       reverse=True) | Head(10) | StdOut()
workflow.run()
コード例 #13
0
 def test_leafs_3_nodes_dag(self):
     workflow = Iterable(range(10)) | Map(lambda x: x + 1) | StdOut()
     self.assertEqual([n.name() for n in workflow.leafs()], ["StdOut"])
コード例 #14
0
 def test_leafs_2_breanches_balanced(self):
     workflow = Iterable(range(1000))
     branch1 = workflow | Map(lambda x: x + 1)
     branch2 = workflow | Filter(lambda x: x > 500)
     self.assertEqual([n.name() for n in workflow.leafs()],
                      ["Map", "Filter"])
コード例 #15
0
 def test_depth_second_level_is_1(self):
     workflow = Iterable(range(1000)) | Map(lambda x: x + 1)
     self.assertEqual(workflow.depth(), 1)
コード例 #16
0
 def test_query_search_for_leaf_from_second_level(self):
     workflow = Iterable(range(10))
     map = workflow | Map(lambda x: x + 1)
     stdout = map | StdOut()
     self.assertEqual(map.query("Map/StdOut").name(), "StdOut")
コード例 #17
0
 def test_query_search_for_leaf(self):
     workflow = Iterable(range(1000))
     branch1 = workflow | Map(lambda x: x + 1) | StdOut()
     self.assertAlmostEqual(
         workflow.query("Iterable/Map/StdOut").name(), "StdOut")
コード例 #18
0
 def test_dinasty_third_level_2_two_branches(self):
     workflow = Iterable(range(1000))
     branch1 = workflow | Map(lambda x: x + 1) | StdOut()
     branch2 = workflow | Filter(lambda x: x > 500) | StdOut()
     self.assertEqual(branch1.dinasty(), "Iterable/Map/StdOut")
     self.assertEqual(branch2.dinasty(), "Iterable/Filter/StdOut")
コード例 #19
0
 def test_dinasty_compose_dag(self):
     sub_workflow = Map(lambda x: x + 1) | Map(lambda x: x + 1) | Map(
         lambda x: x + 1)
     workflow = Iterable(range(1000)) | sub_workflow | StdOut()
     self.assertEqual(workflow.dinasty(), "Iterable/Map/Map/Map/StdOut")
コード例 #20
0
 def test_depth_third_level_is_2(self):
     workflow = Iterable(range(1000)) | Map(lambda x: x + 1) | StdOut()
     self.assertEqual(workflow.depth(), 2)
コード例 #21
0
 def test_depth_third_level_2_two_branches(self):
     workflow = Iterable(range(1000))
     branch1 = workflow | Map(lambda x: x + 1) | StdOut()
     branch2 = workflow | Map(lambda x: x + 1) | StdOut()
     self.assertEqual(branch1.depth(), 2)
     self.assertEqual(branch2.depth(), 2)