Exemple #1
0
 def test_disallow_abstract_Fork(self):
     pl = Pipeline()
     with self.assertRaises(Exception) as e:
         pl.add(_Fork(2))
     self.assertEqual(
         str(e.exception),
         '_Fork is an abstract class. Use ReplicatingFork or BalancingFork instead.'
     )
Exemple #2
0
def main():
    pl = Pipeline()
    pl.add(Processor(generate_ngrams))
    pl.add(ReplicatingFork(3))
    pl.add(Processor(train_decision_tree), Processor(train_random_forest),
           Processor(train_k_neighbors))

    with open("sms_data.txt", "r", encoding='latin-1') as file:
        text = file.read().split('\n')
        pl.run([text])
Exemple #3
0
    def test_pipeline_valid_start(self):
        def finalize(arg):
            return True

        pl = Pipeline()

        # Join (should not be allowed at pipeline head)
        with self.assertRaises(Exception) as e:
            pl.add(Join(2))
        self.assertEqual(
            str(e.exception),
            'A pipeline cannot start with a Join (nothing to join to!)')
Exemple #4
0
    def test_pipeline_single_processor(self):
        def job(arg):
            return arg + 1

        def finalize(arg):
            self.assertEqual(arg, 4)

        pl = Pipeline()
        pl.add(Processor(job))
        pl.add(Pipe())
        pl.add(Processor(finalize))
        pl.run([3])
Exemple #5
0
    def test_pipeline_valid_type(self):
        def finalize(arg):
            return True

        pl = Pipeline()

        # String
        with self.assertRaises(Exception) as e:
            pl.add("cookie")
        self.assertEqual(
            str(e.exception),
            'Invalid type! Pipelines must include only Conveyor types!')

        # List
        with self.assertRaises(Exception) as e:
            pl.add([])
        self.assertEqual(
            str(e.exception),
            'Invalid type! Pipelines must include only Conveyor types!')

        # Valid processor
        try:
            pl.add(Processor(finalize))
        except Exception:
            self.fail('Should not raise an exception: ' + str(e))
Exemple #6
0
    def test_no_add_after_run(self):
        def add(arg):
            return arg + 1

        def sub(arg):
            return arg - 1

        def printer(arg):
            print(arg)

        with self.assertRaises(Exception) as e:
            with Pipeline() as pl:
                pl.add(BalancingFork(2))
                pl.add(Processor(add), Processor(sub))
                pl.add(Join(2))
                pl.add(Processor(print))

                pl.run([3])

                pl.add(Processor(printer))

                pl.run([10])

        self.assertEqual(str(e.exception),
                         'Pipelines cannot be modified after being run!')
Exemple #7
0
    def test_reopening_pipeline(self):
        def add(arg):
            return arg + 1

        def sub(arg):
            return arg - 1

        with Pipeline() as pl:
            pl.add(BalancingFork(2))
            pl.add(Processor(add), Processor(sub))
            pl.add(Join(2))

            pl.run([3])

            self.assertTrue(pl.opened, 'Pipeline should be open')
            self.assertFalse(pl.closed, 'Pipeline should be open')

            pl.run([4])
            pl.run([5])

            self.assertTrue(pl.opened, 'Pipeline should be open')
            self.assertFalse(pl.closed, 'Pipeline should be open')

            pl.run([10])

            self.assertTrue(pl.opened, 'Pipeline should be open')
            self.assertFalse(pl.closed, 'Pipeline should be open')

        self.assertFalse(pl.opened, 'Pipeline should be closed')
        self.assertTrue(pl.closed, 'Pipeline should be closed')
Exemple #8
0
 def test_run_single_element(self):
     with Pipeline() as pl:
         pl.add(Processor(dummy_return_arg))
         try:
             pl.run('test')
         except Exception:
             self.fail('Should not raise an exception: ' + str(e))
Exemple #9
0
    def test_pipeline_basic(self):
        def finalize(arg):
            self.assertEqual(arg, 3)

        pl = Pipeline()
        pl.add(Processor(finalize))
        pl.run([3])
Exemple #10
0
    def test_disallow_inane_opens_and_closes_in_with_statement(self):
        with Pipeline() as pl:
            pl.add(Processor(dummy_return_arg))

            with self.assertRaises(Exception) as e:
                pl.open()
            self.assertEqual(
                str(e.exception), 'Cannot open a pipeline within a `with` statement!')

            with self.assertRaises(Exception) as e:
                pl.close()
            self.assertEqual(
                str(e.exception), 'Cannot close a pipeline within a `with` statement!')
Exemple #11
0
    def test_implicit_pipes(self):
        pl = Pipeline()
        pl.add(Processor(dummy_return_arg))

        # Add a second processor
        try:
            pl.add(Processor(dummy_return_arg))
        except Exception:
            self.fail('Should not raise an exception')

        # Add another one
        try:
            pl.add(Processor(dummy_return_arg))
        except Exception:
            self.fail('Should not raise an exception')
Exemple #12
0
 def test_fork_join_mix(self):
     pl = Pipeline()
     pl.add(ReplicatingFork(3))
     with self.assertRaises(Exception) as e:
         pl.add(Join(2), BalancingFork(2))
     self.assertEqual(
         str(e.exception),
         'Invalid types! All non Pipe objects in stage must be in same subclass'
     )
Exemple #13
0
    def test_too_many_processors(self):
        pl = Pipeline()
        pl.add(ReplicatingFork(2))

        with self.assertRaises(Exception) as e:
            pl.add(Processor(dummy_return_arg), Processor(dummy_return_arg),
                   Processor(dummy_return_arg))
        self.assertEqual(
            str(e.exception),
            'Ambiguity Error: Jobs cannot be divided among fanout of previous stage'
        )
Exemple #14
0
    def test_processor_join_mix(self):
        def job(arg):
            return 'job'

        pl = Pipeline()
        pl.add(ReplicatingFork(3))
        with self.assertRaises(Exception) as e:
            pl.add(Processor(job), Join(2))
        self.assertEqual(
            str(e.exception),
            'Invalid types! All non Pipe objects in stage must be in same subclass'
        )
Exemple #15
0
    def test_pipes_connect_to_pipes(self):
        pl = Pipeline()

        # Pipes can connect to pipes
        try:
            pl.add(Pipe())
        except Exception:
            self.fail('Should not raise an exception')

        # Add another one
        try:
            pl.add(Pipe())
        except Exception:
            self.fail('Should not raise an exception')
Exemple #16
0
    def test_processor_shared_memory(self):
        def worker1_task(args):
            shmem = shared_memory.SharedMemory(name=common_memory)
            buffer = shmem.buf
            buffer[:4] = bytearray([00, 11, 22, 33])
            shmem.close()

            return args

        def worker2_task(args):
            shmem = shared_memory.SharedMemory(name=common_memory)
            buffer = shmem.buf
            buffer[0] = 44
            shmem.close()

            return args

        def cleanup_task(args):
            shmem = shared_memory.SharedMemory(name=common_memory)
            import array
            print(array.array('b', shmem.buf[:4]))
            assert shmem.buf[0] == 44
            assert shmem.buf[1] == 11
            assert shmem.buf[2] == 22
            assert shmem.buf[3] == 33

            shmem.close()
            shmem.unlink()

            return args

        pl = Pipeline(shared_memory_amt=10)
        pl.add(Processor(worker1_task))
        pl.add(Processor(worker2_task))
        pl.add(Processor(cleanup_task))
        pl.run(['abc'])
Exemple #17
0
    def test_pipeline_valid_job(self):
        def finalize(arg):
            self.assertEqual(arg, 3)

        pl = Pipeline()

        # Processor should not take a non-function as an arg
        with self.assertRaises(Exception) as e:
            pl.add(Processor("cookie"))
        self.assertEqual(
            str(e.exception),
            'Invalid type! Pipeline processors must have a valid job!')

        # Processor should allow a function as an arg
        try:
            pl.add(Processor(finalize))
        except Exception:
            self.fail('Should not raise an exception')
Exemple #18
0
    def test_disallow_inane_opens_and_closes_normal(self):
        pl = Pipeline()

        with self.assertRaises(Exception) as e:
            pl.close()
        self.assertEqual(
            str(e.exception), 'Cannot close a Pipeline that is already closed!')

        pl.open()

        with self.assertRaises(Exception) as e:
            pl.open()
        self.assertEqual(
            str(e.exception), 'Cannot open a Pipeline that is already open!')

        pl.close()

        with self.assertRaises(Exception) as e:
            pl.close()
        self.assertEqual(
            str(e.exception), 'Cannot close a Pipeline that is already closed!')
Exemple #19
0
    def test_open_close_no_with(self):
        from math import sqrt

        def square_root(arg):
            return sqrt(arg)

        def cube(arg):
            return arg ** 3

        pl = Pipeline()
        pl.add(ReplicatingFork(2))
        pl.add(Processor(square_root), Processor(cube))
        pl.add(Processor(square_root), Pipe())
        pl.add(Join(2))
        pl.add(Processor(print))
        self.assertTrue(pl.closed, 'Pipeline should be closed')
        pl.open()
        pl.run([16, 3, 81])
        self.assertTrue(pl.opened, 'Pipeline should be open')
        pl.close()
        self.assertTrue(pl.closed, 'Pipeline should be closed')
        pl.open() # Leave it open -- daemon children should be cleaned up
        self.assertTrue(pl.opened, 'Pipeline should be open')
Exemple #20
0
    def test_pipeline_ambiguity(self):
        def job1(arg):
            return 'job1'

        def job2(arg):
            return 'job2'

        pl = Pipeline()
        pl.add(Processor(job1))
        pl.add(ReplicatingFork(2))
        pl.add(Processor(job1), Processor(job2))
        pl.add(ReplicatingFork(4), ReplicatingFork(2))
        pl.add(Processor(job2), Processor(job1))
        with self.assertRaises(Exception) as e:
            pl.add(Join(2), Join(4))
        self.assertEqual(str(e.exception),
                         'Ambiguity Error: Partially joining forks')
Exemple #21
0
    def test_automatic_open_close(self):
        from math import sqrt

        def square_root(arg):
            return sqrt(arg)

        def cube(arg):
            return arg ** 3

        pl = Pipeline()
        pl.add(ReplicatingFork(2))
        pl.add(Processor(square_root), Processor(cube))
        pl.add(Processor(square_root), Pipe())
        pl.add(Pipe())
        pl.add(Join(2))
        pl.add(Processor(print))
        self.assertTrue(pl.closed, 'Pipeline should be closed')
        pl.run([2, 7, 9])
        self.assertTrue(pl.closed, 'Pipeline should be closed')
Exemple #22
0
    def test_open_close(self):
        from math import sqrt

        def square_root(arg):
            return sqrt(arg)

        def cube(arg):
            return arg ** 3

        pl = Pipeline()
        pl.add(ReplicatingFork(2))
        pl.add(Processor(square_root), Processor(cube))
        pl.add(Processor(square_root), Pipe())
        pl.add(Pipe())
        pl.add(Join(2))
        pl.add(Processor(print))
        self.assertTrue(pl.closed, 'Pipeline should be closed')

        with pl as pipeline:
            self.assertTrue(pipeline.opened, 'Pipeline should be opened')
            pipeline.run([1, 2])
            self.assertTrue(pipeline.opened, 'Pipeline should be opened')
            self.assertFalse(pipeline.closed, 'Pipeline should be opened')
            pipeline.run([1, 2])
            self.assertTrue(pipeline.opened, 'Pipeline should be opened')

        self.assertTrue(pl.closed)
Exemple #23
0
    def test_replicating_forks(self):
        self.counts = Counter()

        def job1(arg):
            return 'job1'

        def job2(arg):
            return 'job2'

        def finalize(arg):
            self.counts[arg] += 1

        pl = Pipeline()
        pl.add(BalancingFork(2))
        pl.add(Processor(job1), Processor(job2))
        pl.add(Join(2))
        pl.add(Processor(finalize))
        pl.run([False, False])
        print(pl)

        self.assertEqual(self.counts['job1'], self.counts['job2'])
Exemple #24
0
    def test_allow_multiple_pipeline_runs(self):

        def job(arg):
            if arg == 'second':
                self.lock.release()


        self.lock = Lock()
        pl = Pipeline()

        pl.add(Processor(dummy_return_arg))
        pl.add(Pipe())
        pl.add(Processor(dummy_return_arg))
        pl.add(Pipe())
        pl.add(Processor(job))
        self.lock.acquire()
        pl.run(['first'])
        pl.run(['second'])
        self.lock.acquire(blocking=False)
        self.assertTrue(self.lock.locked(), 'The second pipeline run was not successful')
Exemple #25
0
    def test_fork_and_join1(self):
        self.counts = Counter()

        def count(arg):
            _, string = arg
            self.counts[string] += 1
            if len(self.counts) == 4:
                self.assertEqual(self.counts['ttring'], 1)
                self.assertEqual(self.counts['turing'], 1)
                self.assertEqual(self.counts['suring'], 1)
                self.assertEqual(self.counts['string'], 1)

        def dont_manipulate(arg):
            stage, string = arg
            return (stage + 1, string)

        def manipulate(arg):
            stage, string = arg
            l = list(string)
            l[stage] = chr(ord(l[stage]) + 1)
            return (stage + 1, ''.join(l))

        pl = Pipeline()
        pl.add(BalancingFork(2))
        pl.add(Pipe())
        pl.add(Processor(manipulate),
               Processor(dont_manipulate))
        pl.add(ReplicatingFork(2))
        pl.add(Processor(manipulate), Processor(dont_manipulate),
             Processor(manipulate), Processor(dont_manipulate))
        pl.add(Join(4))
        pl.add(Processor(count))
        pl.run([(0, 'string'), (0, 'string')])
Exemple #26
0
    def test_processor_pipe_mix(self):
        self.counts = Counter()

        def count(arg):
            _, string = arg
            self.counts[string] += 1
            if len(self.counts) == 3:
                self.assertEqual(self.counts['ttring'], 1)
                self.assertEqual(self.counts['string'], 2)

        def manipulate(arg):
            stage, string = arg
            l = list(string)
            l[stage] = chr(ord(l[stage]) + 1)
            return (stage + 1, ''.join(l))

        pl = Pipeline()
        pl.add(ReplicatingFork(3))
        pl.add(Processor(manipulate), Pipe(), Pipe())
        pl.add(Join(3))
        pl.add(Processor(count))
        pl.run([(0, 'string')])