def test_open_close(self): from math import sqrt def square_root(arg): return sqrt(arg) def cube(arg): return arg ** 3 pl = Pipeline() pl.add(ReplicatingFork(2)) pl.add(Processor(square_root), Processor(cube)) pl.add(Processor(square_root), Pipe()) pl.add(Pipe()) pl.add(Join(2)) pl.add(Processor(print)) self.assertTrue(pl.closed, 'Pipeline should be closed') with pl as pipeline: self.assertTrue(pipeline.opened, 'Pipeline should be opened') pipeline.run([1, 2]) self.assertTrue(pipeline.opened, 'Pipeline should be opened') self.assertFalse(pipeline.closed, 'Pipeline should be opened') pipeline.run([1, 2]) self.assertTrue(pipeline.opened, 'Pipeline should be opened') self.assertTrue(pl.closed)
def test_fork_pipe_mix(self): self.counts = Counter() def count(arg): _, string = arg self.counts[string] += 1 if len(self.counts) == 4: self.assertEqual(self.counts['ttring'], 2) self.assertEqual(self.counts['string'], 2) def manipulate(arg): stage, string = arg l = list(string) l[stage] = chr(ord(l[stage]) + 1) return (stage + 1, ''.join(l)) def dont_manipulate(arg): stage, string = arg return (stage + 1, string) pl = Pipeline() pl.add(ReplicatingFork(2)) pl.add(Pipe(), BalancingFork(2)) pl.add(Processor(dont_manipulate), Processor(manipulate), Processor(manipulate)) pl.add(Join(3)) pl.add(Processor(count)) pl.run([(0, 'string'), (0, 'string')])
def test_no_add_after_run(self): def add(arg): return arg + 1 def sub(arg): return arg - 1 def printer(arg): print(arg) with self.assertRaises(Exception) as e: with Pipeline() as pl: pl.add(BalancingFork(2)) pl.add(Processor(add), Processor(sub)) pl.add(Join(2)) pl.add(Processor(print)) pl.run([3]) pl.add(Processor(printer)) pl.run([10]) self.assertEqual(str(e.exception), 'Pipelines cannot be modified after being run!')
def test_reopening_pipeline(self): def add(arg): return arg + 1 def sub(arg): return arg - 1 with Pipeline() as pl: pl.add(BalancingFork(2)) pl.add(Processor(add), Processor(sub)) pl.add(Join(2)) pl.run([3]) self.assertTrue(pl.opened, 'Pipeline should be open') self.assertFalse(pl.closed, 'Pipeline should be open') pl.run([4]) pl.run([5]) self.assertTrue(pl.opened, 'Pipeline should be open') self.assertFalse(pl.closed, 'Pipeline should be open') pl.run([10]) self.assertTrue(pl.opened, 'Pipeline should be open') self.assertFalse(pl.closed, 'Pipeline should be open') self.assertFalse(pl.opened, 'Pipeline should be closed') self.assertTrue(pl.closed, 'Pipeline should be closed')
def main(): pl = Pipeline() pl.add(Processor(generate_ngrams)) pl.add(ReplicatingFork(3)) pl.add(Processor(train_decision_tree), Processor(train_random_forest), Processor(train_k_neighbors)) with open("sms_data.txt", "r", encoding='latin-1') as file: text = file.read().split('\n') pl.run([text])
def test_too_many_processors(self): pl = Pipeline() pl.add(ReplicatingFork(2)) with self.assertRaises(Exception) as e: pl.add(Processor(dummy_return_arg), Processor(dummy_return_arg), Processor(dummy_return_arg)) self.assertEqual( str(e.exception), 'Ambiguity Error: Jobs cannot be divided among fanout of previous stage' )
def test_pipeline_single_processor(self): def job(arg): return arg + 1 def finalize(arg): self.assertEqual(arg, 4) pl = Pipeline() pl.add(Processor(job)) pl.add(Pipe()) pl.add(Processor(finalize)) pl.run([3])
def test_implicit_pipes(self): pl = Pipeline() pl.add(Processor(dummy_return_arg)) # Add a second processor try: pl.add(Processor(dummy_return_arg)) except Exception: self.fail('Should not raise an exception') # Add another one try: pl.add(Processor(dummy_return_arg)) except Exception: self.fail('Should not raise an exception')
def test_pipeline_basic(self): def finalize(arg): self.assertEqual(arg, 3) pl = Pipeline() pl.add(Processor(finalize)) pl.run([3])
def test_pipeline_valid_type(self): def finalize(arg): return True pl = Pipeline() # String with self.assertRaises(Exception) as e: pl.add("cookie") self.assertEqual( str(e.exception), 'Invalid type! Pipelines must include only Conveyor types!') # List with self.assertRaises(Exception) as e: pl.add([]) self.assertEqual( str(e.exception), 'Invalid type! Pipelines must include only Conveyor types!') # Valid processor try: pl.add(Processor(finalize)) except Exception: self.fail('Should not raise an exception: ' + str(e))
def test_run_single_element(self): with Pipeline() as pl: pl.add(Processor(dummy_return_arg)) try: pl.run('test') except Exception: self.fail('Should not raise an exception: ' + str(e))
def test_pipeline_ambiguity(self): def job1(arg): return 'job1' def job2(arg): return 'job2' pl = Pipeline() pl.add(Processor(job1)) pl.add(ReplicatingFork(2)) pl.add(Processor(job1), Processor(job2)) pl.add(ReplicatingFork(4), ReplicatingFork(2)) pl.add(Processor(job2), Processor(job1)) with self.assertRaises(Exception) as e: pl.add(Join(2), Join(4)) self.assertEqual(str(e.exception), 'Ambiguity Error: Partially joining forks')
def test_pipeline_valid_job(self): def finalize(arg): self.assertEqual(arg, 3) pl = Pipeline() # Processor should not take a non-function as an arg with self.assertRaises(Exception) as e: pl.add(Processor("cookie")) self.assertEqual( str(e.exception), 'Invalid type! Pipeline processors must have a valid job!') # Processor should allow a function as an arg try: pl.add(Processor(finalize)) except Exception: self.fail('Should not raise an exception')
def test_automatic_open_close(self): from math import sqrt def square_root(arg): return sqrt(arg) def cube(arg): return arg ** 3 pl = Pipeline() pl.add(ReplicatingFork(2)) pl.add(Processor(square_root), Processor(cube)) pl.add(Processor(square_root), Pipe()) pl.add(Pipe()) pl.add(Join(2)) pl.add(Processor(print)) self.assertTrue(pl.closed, 'Pipeline should be closed') pl.run([2, 7, 9]) self.assertTrue(pl.closed, 'Pipeline should be closed')
def test_balancing_forks(self): self.counts = Counter() def job1(arg): return 'job1' def job2(arg): return 'job2' def finalize(arg): self.counts[arg] += 1 pl = Pipeline() pl.add(BalancingFork(2)) pl.add(Processor(job1), Processor(job2)) pl.add(Join(2)) pl.add(Processor(finalize)) pl.run([False, False]) self.assertEqual(self.counts['job1'], self.counts['job2'])
def test_allow_multiple_pipeline_runs(self): def job(arg): if arg == 'second': self.lock.release() self.lock = Lock() pl = Pipeline() pl.add(Processor(dummy_return_arg)) pl.add(Pipe()) pl.add(Processor(dummy_return_arg)) pl.add(Pipe()) pl.add(Processor(job)) self.lock.acquire() pl.run(['first']) pl.run(['second']) self.lock.acquire(blocking=False) self.assertTrue(self.lock.locked(), 'The second pipeline run was not successful')
def test_processor_join_mix(self): def job(arg): return 'job' pl = Pipeline() pl.add(ReplicatingFork(3)) with self.assertRaises(Exception) as e: pl.add(Processor(job), Join(2)) self.assertEqual( str(e.exception), 'Invalid types! All non Pipe objects in stage must be in same subclass' )
def test_disallow_inane_opens_and_closes_in_with_statement(self): with Pipeline() as pl: pl.add(Processor(dummy_return_arg)) with self.assertRaises(Exception) as e: pl.open() self.assertEqual( str(e.exception), 'Cannot open a pipeline within a `with` statement!') with self.assertRaises(Exception) as e: pl.close() self.assertEqual( str(e.exception), 'Cannot close a pipeline within a `with` statement!')
def test_open_close_no_with(self): from math import sqrt def square_root(arg): return sqrt(arg) def cube(arg): return arg ** 3 pl = Pipeline() pl.add(ReplicatingFork(2)) pl.add(Processor(square_root), Processor(cube)) pl.add(Processor(square_root), Pipe()) pl.add(Join(2)) pl.add(Processor(print)) self.assertTrue(pl.closed, 'Pipeline should be closed') pl.open() pl.run([16, 3, 81]) self.assertTrue(pl.opened, 'Pipeline should be open') pl.close() self.assertTrue(pl.closed, 'Pipeline should be closed') pl.open() # Leave it open -- daemon children should be cleaned up self.assertTrue(pl.opened, 'Pipeline should be open')
def test_processor_shared_memory(self): def worker1_task(args): shmem = shared_memory.SharedMemory(name=common_memory) buffer = shmem.buf buffer[:4] = bytearray([00, 11, 22, 33]) shmem.close() return args def worker2_task(args): shmem = shared_memory.SharedMemory(name=common_memory) buffer = shmem.buf buffer[0] = 44 shmem.close() return args def cleanup_task(args): shmem = shared_memory.SharedMemory(name=common_memory) import array print(array.array('b', shmem.buf[:4])) assert shmem.buf[0] == 44 assert shmem.buf[1] == 11 assert shmem.buf[2] == 22 assert shmem.buf[3] == 33 shmem.close() shmem.unlink() return args pl = Pipeline(shared_memory_amt=10) pl.add(Processor(worker1_task)) pl.add(Processor(worker2_task)) pl.add(Processor(cleanup_task)) pl.run(['abc'])