def test_on_completed(self): producer = Iterable([1, 2, 3]) consumer = Assert(self, [1, 2, 3, None], ignore_on_completed_data=False) producer.add_child(consumer) producer.run()
def test_error(self): producer = Iterable(['a', 'b', 'c']) flt = Filter(lambda x: int(x) > 1) tolist = AsList() producer.add_child(flt) flt.add_child(tolist) with self.assertRaises(ValueError): producer.run()
def test_distincts_tuples(self): result = [] wf = Iterable([(1, "abc"), (2, "abc"), (3, "ccc"), (1, "abc"), (1, "qqq")]) | Distinct() | AsList(result) wf.run() self.assertEqual( sorted([(1, "abc"), (2, "abc"), (3, "ccc"), (1, "qqq")]), sorted(result))
def test_producer_map_consume_with_3_process(self): data = [1, 2, 3, 4, 5] workflow = Iterable(data) | SpawnThread() | Map(add100) | SpawnThread() | StoreAndPickle() workflow.run() #workflow ref to StoreAndPickle() instace that is the only leaf of the DAG actual = workflow.load() self.assertEqual(actual, [d+100 for d in data])
def test_producer_consume(self): data = [1, 2, 3, 4, 5] workflow = Iterable(data) | SpawnThread() | StoreAndPickle() workflow.run() #workflow ref to StoreAndPickle() instace that is the only leaf of the DAG actual = workflow.load() self.assertEqual(actual, data)
def test_lambda_with_func_import(self): data = [1, 2, 3, 4, 5] workflow = Iterable(data) | SpawnProcess() | Map(lambda x: add100(x)) | StoreAndPickle() workflow.run() #workflow ref to StoreAndPickle() instace that is the only leaf of the DAG actual = workflow.load() self.assertEqual(actual, [d+100 for d in data])
def test_distinct2(self): result = [] wf = Iterable([("2016-01-01T10:00:05", 100), ("2016-01-01T10:00:05", 101) ]) | Distinct() | AsList(result) wf.run() self.assertEqual( sorted([("2016-01-01T10:00:05", 100), ("2016-01-01T10:00:05", 101)]), sorted(result))
def test_producer_map_consume_with_3_process_plus_1_brach(self): data = [1, 2, 3, 4, 5] workflow = Iterable(data) sync_brach = workflow | StoreAndPickle() async_branch = workflow | SpawnThread() | Map(add100) | SpawnThread() | StoreAndPickle() workflow.run() self.assertEqual(sync_brach.load(), data) self.assertEqual(async_branch.load(), [d+100 for d in data])
def test_query_search_for_leaf_on_two_branches(self): workflow = Iterable(range(10)) branch1 = workflow | Map(lambda x: x + 1) | StdOut() branch2 = workflow | Filter(lambda x: x > 5) | Assert( self, [6, 7, 8, 9]) self.assertEqual( workflow.query("Iterable/Map/StdOut").name(), "StdOut") self.assertEqual( workflow.query("Iterable/Filter/Assert").name(), "Assert")
def test_iter_filter_list(self): producer = Iterable([1, 2, 3]) flt = Filter(lambda x: x > 1) tolist = AsList() producer.add_child(flt) flt.add_child(tolist) producer.run() self.assertEqual(tolist.list, [2, 3])
def test_producer_consume_2_processes_inline(self): data = [1, 2, 3, 4, 5] workflow = Iterable(data) | Parallelize(two_split) | ( Map(add100) | Map(add100)) | Join() | StoreAndPickle() workflow.run() #workflow ref to StoreAndPickle() instace that is the only leaf of the DAG actual = workflow.load() #need to sort result because with symmetric parallelims order is not guaranteed self.assertEqual(sorted(actual), [d + 200 for d in data])
def test_producer_consume_10_processes(self): data = range(20) parallel = Map(add100) workflow = Iterable(data) | Parallelize( ten_split) | parallel | Join() | StoreAndPickle() workflow.run() #workflow ref to StoreAndPickle() instace that is the only leaf of the DAG actual = workflow.load() #need to sort result because with symmetric parallelims order is not guaranteed self.assertEqual(sorted(actual), [d + 100 for d in data])
def test_iter_filter_map_list(self): producer = Iterable([1, 2, 3]) flt = Filter(lambda x: x > 1) map = Map(lambda x: x + 10) tolist = AsList() producer.add_child(flt) flt.add_child(map) map.add_child(tolist) producer.run() self.assertEqual(tolist.list, [12, 13])
def test_producer_map_consume_with_2_process(self): data = [1, 2, 3, 4, 5] #CAUTION!!!! #Cannot use lambda (e.g. Map(lambda x: x+100)) yet due to pickle problem in multiprocessing lib #possible solution is to hook the import of pickle im multiprocessing lib #and substitute with dill. #See: #http://chimera.labs.oreilly.com/books/1230000000393/ch10.html#_solution_180 workflow = Iterable(data) | SpawnProcess() | Map(lambda x: x+100) | StoreAndPickle() workflow.run() #workflow ref to StoreAndPickle() instace that is the only leaf of the DAG actual = workflow.load() self.assertEqual(actual, [d+100 for d in data])
def test_two_lists(self): producer = Iterable([1, 2, 3]) flt = Filter(lambda x: x > 1) l1 = AsList() l2 = AsList() producer.add_child(l1) producer.add_child(flt) flt.add_child(l2) producer.run() self.assertEqual(l1.list, [1, 2, 3]) self.assertEqual(l2.list, [2, 3])
def test_leafs_2_breanches_balanced(self): workflow = Iterable(range(1000)) branch1 = workflow | Map(lambda x: x + 1) branch2 = workflow | Filter(lambda x: x > 500) self.assertEqual([n.name() for n in workflow.leafs()], ["Map", "Filter"])
def test_depth_third_level_is_2(self): workflow = Iterable(range(1000)) | Map(lambda x: x + 1) | StdOut() self.assertEqual(workflow.depth(), 2)
def test_distinct_two_elements_are_same(self): result = [] wf = Iterable([1, 2, 3, 2]) | Distinct() | AsList(result) wf.run() self.assertEqual([1, 2, 3], result)
def test_distinct(self): # result = [] wf = Iterable([1, 2, 3]) | Distinct() | Assert(self, [1, 2, 3]) wf.run()
def test_depth_third_level_2_two_branches(self): workflow = Iterable(range(1000)) branch1 = workflow | Map(lambda x: x + 1) | StdOut() branch2 = workflow | Map(lambda x: x + 1) | StdOut() self.assertEqual(branch1.depth(), 2) self.assertEqual(branch2.depth(), 2)
def test_iter2list(self): producer = Iterable([1, 2, 3]) tolist = AsList() producer.add_child(tolist) producer.run() self.assertEqual(tolist.list, [1, 2, 3])
def test_dinasty_compose_dag(self): sub_workflow = Map(lambda x: x + 1) | Map(lambda x: x + 1) | Map( lambda x: x + 1) workflow = Iterable(range(1000)) | sub_workflow | StdOut() self.assertEqual(workflow.dinasty(), "Iterable/Map/Map/Map/StdOut")
def test_dinasty_third_level(self): workflow = Iterable(range(1000)) | Map(lambda x: x + 1) | StdOut() self.assertEqual(workflow.dinasty(), "Iterable/Map/StdOut")
def test_sort_non_distinct(self): result = [] wf = Iterable([1, 3, 2, 4, 2, 2]) | Sort() | AsList(result) wf.run() self.assertEqual([1, 2, 2, 2, 3, 4], result)
def test_dinasty_third_level_2_two_branches(self): workflow = Iterable(range(1000)) branch1 = workflow | Map(lambda x: x + 1) | StdOut() branch2 = workflow | Filter(lambda x: x > 500) | StdOut() self.assertEqual(branch1.dinasty(), "Iterable/Map/StdOut") self.assertEqual(branch2.dinasty(), "Iterable/Filter/StdOut")
def test_query_search_for_leaf(self): workflow = Iterable(range(1000)) branch1 = workflow | Map(lambda x: x + 1) | StdOut() self.assertAlmostEqual( workflow.query("Iterable/Map/StdOut").name(), "StdOut")
def test_query_search_for_leaf_from_second_level(self): workflow = Iterable(range(10)) map = workflow | Map(lambda x: x + 1) stdout = map | StdOut() self.assertEqual(map.query("Map/StdOut").name(), "StdOut")
def test_depth_root_is_0(self): workflow = Iterable(range(1000)) self.assertEqual(workflow.depth(), 0)
def test_leafs_single_node_dag(self): workflow = Iterable(range(10)) self.assertEqual(workflow.leafs(), [workflow])
def test_leafs_3_nodes_dag(self): workflow = Iterable(range(10)) | Map(lambda x: x + 1) | StdOut() self.assertEqual([n.name() for n in workflow.leafs()], ["StdOut"])