Ejemplo n.º 1
0
    def test_fork_and_join1(self):
        self.counts = Counter()

        def count(arg):
            _, string = arg
            self.counts[string] += 1
            if len(self.counts) == 4:
                self.assertEqual(self.counts['ttring'], 1)
                self.assertEqual(self.counts['turing'], 1)
                self.assertEqual(self.counts['suring'], 1)
                self.assertEqual(self.counts['string'], 1)

        def dont_manipulate(arg):
            stage, string = arg
            return (stage + 1, string)

        def manipulate(arg):
            stage, string = arg
            l = list(string)
            l[stage] = chr(ord(l[stage]) + 1)
            return (stage + 1, ''.join(l))

        pl = Pipeline()
        pl.add(BalancingFork(2))
        pl.add(Pipe())
        pl.add(Processor(manipulate),
               Processor(dont_manipulate))
        pl.add(ReplicatingFork(2))
        pl.add(Processor(manipulate), Processor(dont_manipulate),
             Processor(manipulate), Processor(dont_manipulate))
        pl.add(Join(4))
        pl.add(Processor(count))
        pl.run([(0, 'string'), (0, 'string')])
Ejemplo n.º 2
0
    def test_pipeline_basic(self):
        def finalize(arg):
            self.assertEqual(arg, 3)

        pl = Pipeline()
        pl.add(Processor(finalize))
        pl.run([3])
Ejemplo n.º 3
0
def main():
    pl = Pipeline()
    pl.add(Processor(generate_ngrams))
    pl.add(ReplicatingFork(3))
    pl.add(Processor(train_decision_tree), Processor(train_random_forest),
           Processor(train_k_neighbors))

    with open("sms_data.txt", "r", encoding='latin-1') as file:
        text = file.read().split('\n')
        pl.run([text])
Ejemplo n.º 4
0
    def test_pipeline_single_processor(self):
        def job(arg):
            return arg + 1

        def finalize(arg):
            self.assertEqual(arg, 4)

        pl = Pipeline()
        pl.add(Processor(job))
        pl.add(Pipe())
        pl.add(Processor(finalize))
        pl.run([3])
Ejemplo n.º 5
0
    def test_automatic_open_close(self):
        from math import sqrt

        def square_root(arg):
            return sqrt(arg)

        def cube(arg):
            return arg ** 3

        pl = Pipeline()
        pl.add(ReplicatingFork(2))
        pl.add(Processor(square_root), Processor(cube))
        pl.add(Processor(square_root), Pipe())
        pl.add(Pipe())
        pl.add(Join(2))
        pl.add(Processor(print))
        self.assertTrue(pl.closed, 'Pipeline should be closed')
        pl.run([2, 7, 9])
        self.assertTrue(pl.closed, 'Pipeline should be closed')
Ejemplo n.º 6
0
    def test_balancing_forks(self):
        self.counts = Counter()

        def job1(arg):
            return 'job1'

        def job2(arg):
            return 'job2'

        def finalize(arg):
            self.counts[arg] += 1

        pl = Pipeline()
        pl.add(BalancingFork(2))
        pl.add(Processor(job1), Processor(job2))
        pl.add(Join(2))
        pl.add(Processor(finalize))
        pl.run([False, False])

        self.assertEqual(self.counts['job1'], self.counts['job2'])
Ejemplo n.º 7
0
    def test_allow_multiple_pipeline_runs(self):

        def job(arg):
            if arg == 'second':
                self.lock.release()


        self.lock = Lock()
        pl = Pipeline()

        pl.add(Processor(dummy_return_arg))
        pl.add(Pipe())
        pl.add(Processor(dummy_return_arg))
        pl.add(Pipe())
        pl.add(Processor(job))
        self.lock.acquire()
        pl.run(['first'])
        pl.run(['second'])
        self.lock.acquire(blocking=False)
        self.assertTrue(self.lock.locked(), 'The second pipeline run was not successful')
Ejemplo n.º 8
0
    def test_processor_pipe_mix(self):
        self.counts = Counter()

        def count(arg):
            _, string = arg
            self.counts[string] += 1
            if len(self.counts) == 3:
                self.assertEqual(self.counts['ttring'], 1)
                self.assertEqual(self.counts['string'], 2)

        def manipulate(arg):
            stage, string = arg
            l = list(string)
            l[stage] = chr(ord(l[stage]) + 1)
            return (stage + 1, ''.join(l))

        pl = Pipeline()
        pl.add(ReplicatingFork(3))
        pl.add(Processor(manipulate), Pipe(), Pipe())
        pl.add(Join(3))
        pl.add(Processor(count))
        pl.run([(0, 'string')])
Ejemplo n.º 9
0
    def test_open_close_no_with(self):
        from math import sqrt

        def square_root(arg):
            return sqrt(arg)

        def cube(arg):
            return arg ** 3

        pl = Pipeline()
        pl.add(ReplicatingFork(2))
        pl.add(Processor(square_root), Processor(cube))
        pl.add(Processor(square_root), Pipe())
        pl.add(Join(2))
        pl.add(Processor(print))
        self.assertTrue(pl.closed, 'Pipeline should be closed')
        pl.open()
        pl.run([16, 3, 81])
        self.assertTrue(pl.opened, 'Pipeline should be open')
        pl.close()
        self.assertTrue(pl.closed, 'Pipeline should be closed')
        pl.open() # Leave it open -- daemon children should be cleaned up
        self.assertTrue(pl.opened, 'Pipeline should be open')
Ejemplo n.º 10
0
    def test_processor_shared_memory(self):
        def worker1_task(args):
            shmem = shared_memory.SharedMemory(name=common_memory)
            buffer = shmem.buf
            buffer[:4] = bytearray([00, 11, 22, 33])
            shmem.close()

            return args

        def worker2_task(args):
            shmem = shared_memory.SharedMemory(name=common_memory)
            buffer = shmem.buf
            buffer[0] = 44
            shmem.close()

            return args

        def cleanup_task(args):
            shmem = shared_memory.SharedMemory(name=common_memory)
            import array
            print(array.array('b', shmem.buf[:4]))
            assert shmem.buf[0] == 44
            assert shmem.buf[1] == 11
            assert shmem.buf[2] == 22
            assert shmem.buf[3] == 33

            shmem.close()
            shmem.unlink()

            return args

        pl = Pipeline(shared_memory_amt=10)
        pl.add(Processor(worker1_task))
        pl.add(Processor(worker2_task))
        pl.add(Processor(cleanup_task))
        pl.run(['abc'])