Example #1
0
 def setUp(self):
     self.context = zmq.Context()
     self.start_router_sockets()
     self.pipeline = Pipeline({
         Job(u'worker_1'): Job(u'worker_2'),
         Job(u'worker_2'): Job(u'worker_3')
     })
Example #2
0
    def test_equal_not_equal_hash(self):
        job_1, job_2, job_3, job_4 = (Job('spam'), Job('eggs'), Job('ham'),
                                      Job('python'))
        pipeline_1 = Pipeline({job_1: job_2, job_2: (job_3, job_4)})
        pipeline_2 = Pipeline({job_1: job_2, job_2: (job_3, job_4)})
        pipeline_3 = Pipeline({job_1: job_2, job_2: job_3, job_3: job_4})
        self.assertTrue(pipeline_1 == pipeline_2)
        self.assertTrue(pipeline_2 == pipeline_1)
        self.assertTrue(pipeline_1 != pipeline_3)
        self.assertTrue(pipeline_3 != pipeline_1)

        my_set = set([pipeline_1, pipeline_2, pipeline_3])  #test __hash__
        self.assertIn(pipeline_1, my_set)
        self.assertIn(pipeline_2, my_set)
        self.assertIn(pipeline_3, my_set)

        pipeline_with_data = Pipeline({
            job_1: job_2,
            job_2: (job_3, job_4)
        },
                                      data={'python': 42})
        pipeline_with_data_2 = Pipeline({
            job_1: job_2,
            job_2: (job_3, job_4)
        },
                                        data={'python': 42})
        self.assertTrue(pipeline_with_data == pipeline_with_data_2)
        self.assertTrue(pipeline_with_data_2 == pipeline_with_data)
        self.assertTrue(pipeline_1 != pipeline_with_data)
        self.assertTrue(pipeline_with_data != pipeline_1)
Example #3
0
def send_pipeline_and_wait_finished():
    pipeline_manager = PipelineManager(api=API_ADDRESS,
                                       broadcast=BROADCAST_ADDRESS)
    pipelines = []
    for i in range(10):
        pipeline = Pipeline(
            {
                Job(u'worker_1'): Job(u'worker_2'),
                Job(u'worker_2'): Job(u'worker_3')
            },
            data={'index': i})
        pipeline_manager.start(pipeline)
        pipelines.append(pipeline)
    assert pipeline_manager.started_pipelines == 10
    assert pipeline_manager.finished_pipelines == 0
    start = time()
    pipeline_manager.finished(pipelines[0])  # only for testing this method
    while pipeline_manager.finished_pipelines < pipeline_manager.started_pipelines:
        pipeline_manager.update(0.5)
    end = time()
    pipeline_manager.disconnect()
    return {
        'duration': pipeline.duration,
        'real_duration': end - start,
        'finished_pipelines': pipeline_manager.finished_pipelines,
        'started_pipelines': pipeline_manager.started_pipelines
    }
Example #4
0
    def test_repr(self):
        pipeline_manager = PipelineManager(api=API_ADDRESS,
                                           broadcast=BROADCAST_ADDRESS)
        pipeline_ids = [uuid4().hex for i in range(10)]
        pipeline_ids_copy = pipeline_ids[:]
        pipeline_manager.send_api_request = lambda x: None
        pipeline_manager.get_api_reply = \
                lambda: {'pipeline id': pipeline_ids.pop()}
        pipelines = [Pipeline({Job('A', data={'index': i}): Job('B')}) \
                     for i in range(10)]
        for pipeline in pipelines:
            pipeline_manager.start(pipeline)

        result = repr(pipeline_manager)
        self.assertEqual(result, '<PipelineManager: 10 submitted, 0 finished>')

        messages = [
            'pipeline finished: id={}, duration=0.1'.format(pipeline_id)
            for pipeline_id in pipeline_ids_copy[:3]
        ]
        poll = [False, True, True, True]

        def new_poll(timeout):
            return poll.pop()

        def new_broadcast_receive():
            return messages.pop()

        pipeline_manager.broadcast_poll = new_poll
        pipeline_manager.broadcast_receive = new_broadcast_receive
        pipeline_manager.update(0.1)
        result = repr(pipeline_manager)
        self.assertEqual(result, '<PipelineManager: 10 submitted, 3 finished>')
Example #5
0
    def test_pipeline_add_finished_job(self):
        job_1 = Job('w1')
        job_2 = Job('w2')
        job_3 = Job('w3')
        pipeline_data = {'python': 42}
        pipeline = PipelineForPipeliner({
            job_1: job_2,
            job_2: job_3
        },
                                        data=pipeline_data)
        job_4 = Job('w4')

        self.assertFalse(pipeline.finished_job(job_1))
        self.assertFalse(pipeline.finished_job(job_2))
        self.assertFalse(pipeline.finished_job(job_3))

        pipeline.add_finished_job(job_1)
        self.assertTrue(pipeline.finished_job(job_1))
        self.assertFalse(pipeline.finished_job(job_2))
        self.assertFalse(pipeline.finished_job(job_3))

        pipeline.add_finished_job(job_2)
        self.assertTrue(pipeline.finished_job(job_1))
        self.assertTrue(pipeline.finished_job(job_2))
        self.assertFalse(pipeline.finished_job(job_3))

        pipeline.add_finished_job(job_3)
        self.assertTrue(pipeline.finished_job(job_1))
        self.assertTrue(pipeline.finished_job(job_2))
        self.assertTrue(pipeline.finished_job(job_3))

        with self.assertRaises(ValueError):
            pipeline.add_finished_job(job_4)  # job not in pipeline
        with self.assertRaises(RuntimeError):
            pipeline.add_finished_job(job_3)  # already finished
Example #6
0
 def test_equal_not_equal_and_hash(self):
     job_1 = Job('qwe')
     job_2 = Job('qwe')
     job_3 = Job('bla')
     self.assertTrue(job_1 == job_2)
     self.assertTrue(job_2 == job_1)
     self.assertTrue(job_1 != job_3)
     self.assertTrue(job_3 != job_1)
     self.assertEqual(hash(job_1), hash(job_2))
     self.assertNotEqual(hash(job_1), hash(job_3))
Example #7
0
def send_pipeline():
    pipeline = Pipeline({
        Job(u'worker_1'): Job(u'worker_2'),
        Job(u'worker_2'): Job(u'worker_3')
    })
    pipeline_manager = PipelineManager(api=API_ADDRESS,
                                       broadcast=BROADCAST_ADDRESS)
    before = pipeline.id
    pipeline_id = pipeline_manager.start(pipeline)
    pipeline_manager.disconnect()
    return before, pipeline_id, pipeline.id
Example #8
0
def main():
    pipeline_definition = {Job('Downloader'): (Job('GetTextAndWords'),
                                               Job('GetLinks'))}
    urls = ['http://www.fsf.org', 'https://creativecommons.org',
            'http://emap.fgv.br', 'https://twitter.com/turicas',
            'http://www.pypln.org', 'http://www.zeromq.org',
            'http://www.python.org', 'http://www.mongodb.org',
            'http://github.com', 'http://pt.wikipedia.org']

    pipeline_manager = PipelineManager(api='tcp://127.0.0.1:5555',
                                       broadcast='tcp://127.0.0.1:5556')
    print 'Sending pipelines...'
    start_time = time()
    my_pipelines = []
    for index, url in enumerate(urls):
        filename = '/tmp/{}.dat'.format(index)
        data = json.dumps({'url': url})
        with open(filename, 'w') as fp:
            fp.write(data)
        pipeline = Pipeline(pipeline_definition, data={'filename': filename})
        pipeline_manager.start(pipeline)
        print '  Sent pipeline for url={}'.format(url)

    print
    print 'Waiting for pipelines to finish...'
    total_pipelines = pipeline_manager.started_pipelines
    finished_pipelines = 0
    while finished_pipelines < total_pipelines:
        pipeline_manager.update(0.5)
        finished_pipelines = pipeline_manager.finished_pipelines
        percentual = 100 * (float(finished_pipelines) / total_pipelines)
        sys.stdout.write('\rFinished pipelines: {}/{} ({:5.2f}%)'\
                         .format(finished_pipelines, total_pipelines,
                                 percentual))
        sys.stdout.flush()
    end_time = time()
    print '\rAll pipelines finished in {} seconds'.format(end_time - start_time)

    durations = [pipeline.duration for pipeline in pipeline_manager.pipelines]
    average_duration = sum(durations) / len(durations)
    print 'Average pipeline duration (seconds) = {} (min={}, max={})'\
          .format(average_duration, min(durations), max(durations))
    print

    print 'Some data saved by store:'
    for index, url in enumerate(urls):
        filename = '/tmp/{}.dat'.format(index)
        with open(filename) as fp:
            data = json.loads(fp.read())
        print ('  url={url}, download_duration={download_duration}, '
               'number_of_words={number_of_words}, '
               'number_of_links={number_of_links}'.format(**data))
Example #9
0
 def test_pipeline_should_propagate_data_among_jobs(self):
     job_1 = Job('w1')
     job_2 = Job('w2')
     job_3 = Job('w3')
     pipeline_data = {'python': 42}
     pipeline = Pipeline({job_1: job_2, job_2: job_3}, data=pipeline_data)
     self.assertEqual(pipeline.data, pipeline_data)
     self.assertEqual(job_1.data, pipeline_data)
     self.assertEqual(job_2.data, pipeline_data)
     self.assertEqual(job_3.data, pipeline_data)
     self.assertEqual(job_1.pipeline, pipeline)
     self.assertEqual(job_2.pipeline, pipeline)
     self.assertEqual(job_3.pipeline, pipeline)
Example #10
0
 def test_deserialize(self):
     job_1, job_2, job_3, job_4, job_5 = (Job('spam'), Job('eggs'),
                                          Job('ham'), Job('python'),
                                          Job('answer_42'))
     pipeline = Pipeline({
         job_1: job_2,
         job_2: (job_3, job_4),
         job_5: None
     },
                         data={'key': 42})
     serialized = pipeline.serialize()
     new_pipeline = Pipeline.deserialize(serialized)
     self.assertEqual(pipeline, new_pipeline)
     self.assertEqual(serialized, new_pipeline.serialize())
Example #11
0
    def test_repr(self):
        result = repr(Pipeline({Job('A'): Job('B'), Job('B'): Job('C')}))
        expected_list = []
        expected_list.append('<Pipeline: A, B, C>')
        expected_list.append('<Pipeline: A, C, B>')
        expected_list.append('<Pipeline: B, A, C>')
        expected_list.append('<Pipeline: B, C, A>')
        expected_list.append('<Pipeline: C, A, B>')
        expected_list.append('<Pipeline: C, B, A>')
        self.assertIn(result, expected_list)

        result = repr(Pipeline({Job('A'): None}, data={'a': 'test'}))
        expected = '<Pipeline: A, data=...>'
        self.assertEqual(expected, result)
Example #12
0
    def test_pipeliner_should_send_pipeline_finished_when_router_sends_job_finished(
            self):
        self.broadcast.send('job finished: {}'.format(uuid4().hex))
        self.ignore_get_pipeline()
        self.broadcast.send('new pipeline')
        message = self.get_api_request(ignore_get_pipeline=False)
        pipeline_id = uuid4().hex
        pipeline = {
            'graph': {
                Job('Dummy'): None
            },
            'data': {},
            'pipeline id': pipeline_id
        }
        self.send_pipeline(pipeline)

        message = self.get_api_request()
        job_id = uuid4().hex
        self.api.send_json({'answer': 'job accepted', 'job id': job_id})
        self.broadcast.send('job finished: {}'.format(job_id))

        self.ignore_get_pipeline()
        message = self.get_api_request()
        self.assertIn('command', message)
        self.assertIn('pipeline id', message)
        self.assertIn('duration', message)
        self.assertEquals(message['command'], 'pipeline finished')
        self.assertEquals(message['pipeline id'], pipeline_id)
Example #13
0
 def test_should_create_a_job_request_after_getting_a_pipeline(self):
     job_counter = 0
     for index in range(20):
         if index < 10:
             self.broadcast.send('new pipeline')
         message = self.get_api_request(ignore_get_pipeline=False)
         if message == {'command': 'get pipeline'}:
             pipeline = {
                 'graph': {
                     Job('Dummy'): None
                 },
                 'data': {
                     'index': index
                 },
                 'pipeline id': uuid4().hex
             }
             self.send_pipeline(pipeline)
         elif message['command'] == 'add job':
             job_counter += 1
             self.assertEqual(message['worker'], 'Dummy')
             self.api.send_json({
                 'answer': 'job accepted',
                 'job id': uuid4().hex
             })
     self.assertEqual(job_counter, 10)
Example #14
0
    def test_str_and_save_dot(self):
        pipeline = Pipeline({Job('A'): Job('B'), Job('C'): None})
        result = str(pipeline)
        expected = dedent('''
        digraph graphname {
            "A";
            "C";
            "B";

            "A" -> "B";
            "C" -> "(None)";
        }
        ''').strip()
        self.assertEqual(result, expected)

        pipeline = Pipeline({
            (Job('A'), Job('B'), Job('C')): [Job('D')],
            Job('E'): (Job('B'), Job('F'))
        })
        result = str(pipeline)
        expected = dedent('''
        digraph graphname {
            "A";
            "C";
            "B";
            "E";
            "D";
            "F";

            "A" -> "D";
            "B" -> "D";
            "C" -> "D";
            "E" -> "B";
            "E" -> "F";
        }
        ''').strip()

        self.assertEqual(result, expected)
        temp_file = NamedTemporaryFile(delete=False)
        temp_file.close()
        pipeline.save_dot(temp_file.name)
        temp_file = open(temp_file.name)
        file_contents = temp_file.read()
        temp_file.close()
        self.assertEqual(expected + '\n', file_contents)
        unlink(temp_file.name)
Example #15
0
 def test_should_return_all_pipelines(self):
     pipeline_manager = PipelineManager(api=API_ADDRESS,
                                        broadcast=BROADCAST_ADDRESS)
     pipeline_manager.send_api_request = lambda x: None
     pipeline_manager.get_api_reply = lambda: {'pipeline id': uuid4().hex}
     iterations = 10
     pipelines = []
     for i in range(iterations):
         pipeline = Pipeline(
             {
                 Job(u'worker_1'): Job(u'worker_2'),
                 Job(u'worker_2'): Job(u'worker_3')
             },
             data={'index': i})
         pipeline_manager.start(pipeline)
         pipelines.append(pipeline)
     self.assertEqual(set(pipeline_manager.pipelines), set(pipelines))
Example #16
0
    def test_pipeliner_should_be_able_to_add_jobs_in_sequence(self):
        self.broadcast.send('new pipeline')
        message = self.get_api_request(ignore_get_pipeline=False)
        pipeline_id = uuid4().hex
        pipeline_graph = {
            Job('Dummy'): Job('Dummy2'),
            Job('Dummy2'): Job('Dummy3')
        }
        pipeline = {
            'graph': pipeline_graph,
            'data': {},
            'pipeline id': pipeline_id
        }
        self.send_pipeline(pipeline)
        start_time = time()

        job_workers = []
        finished_job_counter = 0
        while finished_job_counter < 3:
            message = self.get_api_request()
            if message['command'] == 'add job':
                job_id = uuid4().hex
                self.api.send_json({
                    'answer': 'job accepted',
                    'job id': job_id
                })
                job_workers.append(message['worker'])
                self.ignore_get_pipeline()
                finished_job_counter += 1
                self.broadcast.send('job finished: {}'.format(job_id))
            elif message['command'] == 'get pipeline':
                self.send_no_pipeline()
        self.assertEqual(finished_job_counter, 3)
        # then, check order of jobs sent
        self.assertEqual(job_workers, ['Dummy', 'Dummy2', 'Dummy3'])

        message = self.get_api_request()
        end_time = time()
        total_time = end_time - start_time
        self.assertIn('command', message)
        self.assertIn('pipeline id', message)
        self.assertIn('duration', message)
        self.assertEqual(message['command'], 'pipeline finished')
        self.assertEqual(message['pipeline id'], pipeline_id)
        self.assertTrue(message['duration'] <= total_time)
Example #17
0
    def test_pipeline_finished(self):
        job_1 = Job('w1')
        job_2 = Job('w2')
        job_3 = Job('w3')
        pipeline_data = {'python': 42}
        pipeline = PipelineForPipeliner({
            job_1: job_2,
            job_2: job_3
        },
                                        data=pipeline_data)

        self.assertFalse(pipeline.finished())
        pipeline.add_finished_job(job_1)
        self.assertFalse(pipeline.finished())
        pipeline.add_finished_job(job_2)
        self.assertFalse(pipeline.finished())
        pipeline.add_finished_job(job_3)
        self.assertTrue(pipeline.finished())
Example #18
0
def verify_PipelineManager_exceptions():
    pipeline_1 = Pipeline({
        Job(u'worker_1'): Job(u'worker_2'),
        Job(u'worker_2'): Job(u'worker_3')
    })
    pipeline_2 = Pipeline({Job(u'worker_1'): Job(u'worker_2')})
    pipeline_manager = PipelineManager(api=API_ADDRESS,
                                       broadcast=BROADCAST_ADDRESS)
    pipeline_manager.start(pipeline_1)
    raise_1, raise_2 = False, False
    try:
        pipeline_manager.start(pipeline_1)
    except ValueError:
        raise_1 = True
    try:
        pipeline_manager.finished(pipeline_2)
    except ValueError:
        raise_2 = True

    pipeline_manager.disconnect()
    return {
        'raise_1': raise_1,
        'raise_2': raise_2,
        'started_at': pipeline_1.started_at
    }
Example #19
0
def main():
    stdout_write = sys.stdout.write
    stdout_flush = sys.stdout.flush
    pipeline_manager = PipelineManager(api=ROUTER_API,
                                       broadcast=ROUTER_BROADCAST)
    pipeline_definition = {Job('Dummy1'): Job('Dummy2')}
    process = psutil.Process(os.getpid())
    version = sys.argv[1]
    filename = 'test-{}_pipelines-pypelinin-{}.dat'.format(
        NUMBER_OF_PIPELINES, version)
    data = open(filename, 'w')
    my_pipelines = []
    for i in xrange(NUMBER_OF_PIPELINES):
        pipeline = Pipeline(pipeline_definition, data={'index': i})
        start_time = time()
        pipeline_manager.start(pipeline)
        end_time = time()
        my_pipelines.append(pipeline)
        memory_info = process.get_memory_info()
        info = (i + 1, end_time - start_time, memory_info.vms, memory_info.rss)
        data.write('{}\t{}\t{}\t{}\n'.format(*info))
        if (i + 1) % UPDATE_INTERVAL == 0:
            stdout_write('\r{} out of {}'.format(i + 1, NUMBER_OF_PIPELINES))
            stdout_flush()
    stdout_write('\rfinished sending pipelines! \o/\n')

    stdout_write('Waiting for pipelines to finish...\n')
    pipelines_finished = 0
    finished = pipeline_manager.finished
    while pipelines_finished < NUMBER_OF_PIPELINES:
        finished(my_pipelines[0])  # just need one call to update state of all
        counter = [pipeline.finished for pipeline in my_pipelines].count(True)
        if counter != pipelines_finished:
            stdout_write('\r # of finished pipelines: {}/{}'.format(
                counter, NUMBER_OF_PIPELINES))
            stdout_flush()
            pipelines_finished = counter
    stdout_write('\n')
    data.close()
Example #20
0
    def test_serialize(self):
        job_1, job_2, job_3, job_4 = (Job('spam'), Job('eggs'), Job('ham'),
                                      Job('python'))
        pipeline = Pipeline({job_1: job_2, job_2: (job_3, job_4)})
        result = pipeline.serialize()
        expected = {
            'graph':
            ((job_1.serialize(), job_2.serialize()),
             (job_2.serialize(), job_3.serialize()), (job_2.serialize(),
                                                      job_4.serialize())),
            'data':
            None
        }
        expected = tuple(expected.items())

        result = dict(result)
        expected = dict(expected)
        result['graph'] = dict(result['graph'])
        expected['graph'] = dict(expected['graph'])
        self.assertEqual(result, expected)

        pipeline = Pipeline({job_1: job_2}, data={'python': 42})
        self.assertEqual(pipeline, Pipeline.deserialize(pipeline.serialize()))
Example #21
0
    def test_pipeliner_should_be_able_to_add_jobs_in_parallel(self):
        self.broadcast.send('new pipeline')
        message = self.get_api_request(ignore_get_pipeline=False)
        pipeline_id = uuid4().hex
        pipeline = {
            Job('Dummy'): None,
            Job('Dummy2'): None,
            Job('Dummy3'): None
        }
        pipeline = {'graph': pipeline, 'data': {}, 'pipeline id': pipeline_id}
        self.send_pipeline(pipeline)
        start_time = time()

        job_ids = []
        while len(job_ids) < 3:
            message = self.get_api_request()
            if message['command'] == 'add job':
                job_id = uuid4().hex
                self.api.send_json({
                    'answer': 'job accepted',
                    'job id': job_id
                })
                job_ids.append(job_id)
            elif message['command'] == 'get pipeline':
                self.send_no_pipeline()
        for job_id in job_ids:
            self.broadcast.send('job finished: {}'.format(job_id))

        message = self.get_api_request()
        end_time = time()
        total_time = end_time - start_time
        self.assertIn('command', message)
        self.assertIn('pipeline id', message)
        self.assertIn('duration', message)
        self.assertEqual(message['command'], 'pipeline finished')
        self.assertEqual(message['pipeline id'], pipeline_id)
        self.assertTrue(message['duration'] <= total_time)
Example #22
0
    def test_pipeliner_should_be_able_to_add_jobs_in_sequence_and_parallel_mixed(
            self):
        self.broadcast.send('new pipeline')
        message = self.get_api_request(ignore_get_pipeline=False)
        pipeline_id = uuid4().hex
        pipeline_graph = {
            Job('w1'): (Job('w2.1'), Job('w2.2'), Job('w2.3')),
            (Job('w2.1'), Job('w2.2'), Job('w2.3')): Job('w3')
        }
        pipeline = {
            'graph': pipeline_graph,
            'data': {},
            'pipeline id': pipeline_id
        }
        self.send_pipeline(pipeline)
        start_time = time()

        message, job_id = self.check_add_job()
        expected = {'command': 'add job', 'worker': 'w1', 'data': {}}
        self.assertEqual(message, expected)
        self.broadcast.send('job finished: {}'.format(job_id))

        message_1, job_id_2_1 = self.check_add_job()
        message_2, job_id_2_2 = self.check_add_job()
        message_3, job_id_2_3 = self.check_add_job()
        workers = set(
            [message_1['worker'], message_2['worker'], message_3['worker']])
        self.assertEqual(workers, set(['w2.1', 'w2.2', 'w2.3']))

        self.ignore_get_pipeline()
        self.broadcast.send('job finished: {}'.format(job_id_2_1))
        self.broadcast.send('job finished: {}'.format(job_id_2_2))
        self.broadcast.send('job finished: {}'.format(job_id_2_3))

        message, job_id = self.check_add_job()
        self.assertEqual(message['worker'], 'w3')

        end_time = time()
        total_time = end_time - start_time
        self.broadcast.send('job finished: {}'.format(job_id))
        message = self.get_api_request()
        self.assertEqual(message['command'], 'pipeline finished')
        self.assertEqual(message['pipeline id'], pipeline_id)
        self.assertTrue(message['duration'] > total_time)
        self.assertTrue(message['duration'] < 1.5 * total_time)
Example #23
0
 def test_default_attributes(self):
     pipeline = Pipeline({Job('test'): None})
     self.assertEqual(pipeline.data, None)
     self.assertEqual(pipeline.id, None)
     self.assertEqual(pipeline.jobs, (Job('test'), ))
     self.assertEqual(pipeline.sent_jobs, set())
Example #24
0
    def test_get_starters(self):
        result = Pipeline({Job('A'): []}).starters
        expected = (Job('A'), )
        self.assertEqual(set(result), set(expected))

        result = Pipeline({Job('A'): [], Job('B'): []}).starters
        expected = (Job('A'), Job('B'))
        self.assertEqual(set(result), set(expected))

        result = Pipeline({Job('A'): [Job('B')], Job('B'): []}).starters
        expected = (Job('A'), )
        self.assertEqual(set(result), set(expected))

        result = Pipeline({
            Job('A'): [Job('B')],
            Job('B'): [Job('C'), Job('D'), Job('E')],
            Job('Z'): [Job('W')]
        }).starters
        expected = (Job('A'), Job('Z'))
        self.assertEqual(set(result), set(expected))

        result = Pipeline({(Job('A'), Job('B'), Job('C')): Job('D')}).starters
        expected = [Job('A'), Job('B'), Job('C')]
        self.assertEqual(set(result), set(expected))

        result = Pipeline({
            (Job('A'), Job('B'), Job('C')): [Job('D')],
            Job('E'): (Job('B'), Job('F'))
        }).starters
        expected = (Job('A'), Job('C'), Job('E'))
        self.assertEqual(set(result), set(expected))
Example #25
0
 def test_jobs(self):
     result = Pipeline({
         Job('A'): [Job('B')],
         Job('B'): [Job('C'), Job('D'), Job('E')],
         Job('Z'): [Job('W')],
         Job('W'): Job('A')
     }).jobs
     expected = (Job('A'), Job('B'), Job('C'), Job('D'), Job('E'), Job('W'),
                 Job('Z'))
     self.assertEqual(set(result), set(expected))
Example #26
0
 def test_worker_name(self):
     self.assertEqual(Job('ABC').worker_name, 'ABC')
Example #27
0
 def test_should_start_with_no_data(self):
     self.assertEqual(Job('ABC').data, None)
Example #28
0
 def test_repr(self):
     self.assertEqual(repr(Job('ABC')), "<Job worker=ABC>")
     self.assertEqual(repr(Job('ABC', data={'a': 'b'})),
                      "<Job worker=ABC, data=...>")
Example #29
0
    def test_serialize_and_deserialize(self):
        with self.assertRaises(ValueError):
            Job.deserialize({}) # no key 'worker_name'

        job = Job('test')
        expected = tuple({'worker_name': 'test'}.items())
        self.assertEqual(job.serialize(), expected)
        self.assertEqual(Job.deserialize(expected), job)

        job_with_data = Job('testing', data={'python': 42, 'spam': 'eggs'})
        expected_with_data = {'worker_name': 'testing',
                              'data': tuple({'python': 42,
                                             'spam': 'eggs'}.items())}
        expected_with_data = tuple(expected_with_data.items())
        self.assertEqual(job_with_data.serialize(), expected_with_data)
        self.assertEqual(Job.deserialize(expected_with_data), job_with_data)
        self.assertEqual(Job.deserialize(job_with_data.serialize()).serialize(),
                         job_with_data.serialize())
Example #30
0
    def test_available_jobs(self):
        job_1 = Job('w1')
        job_2 = Job('w2')
        job_3 = Job('w3')
        pipeline_data = {'python': 42}
        pipeline = PipelineForPipeliner({
            job_1: job_2,
            job_2: job_3
        },
                                        data=pipeline_data)

        expected = [job_1]
        self.assertEqual(pipeline.available_jobs(), set(expected))

        pipeline.add_finished_job(job_1)
        expected = [job_2]
        self.assertEqual(pipeline.available_jobs(), set(expected))

        pipeline.add_finished_job(job_2)
        expected = [job_3]
        self.assertEqual(pipeline.available_jobs(), set(expected))

        pipeline.add_finished_job(job_3)
        self.assertEqual(pipeline.available_jobs(), set())

        job_4, job_5, job_6, job_7 = Job('w4'), Job('w5'), Job('w6'), Job('w7')
        job_8, job_9, job_10 = Job('8'), Job('9'), Job('10')
        job_11, job_12, job_13 = Job('11'), Job('12'), Job('13')
        job_14, job_15, job_16 = Job('14'), Job('15'), Job('16')
        pipeline_data = {'python': 42}
        pipeline = PipelineForPipeliner(
            {
                job_1: (job_2, job_3),
                job_2: (job_4, job_16),
                job_3: job_4,
                job_4: job_5,
                job_5: (job_6, job_7, job_8, job_9),
                (job_6, job_7, job_8): job_10,
                (job_10, job_11): (job_12, job_13, job_14),
                job_15: None
            },
            data=pipeline_data)

        expected = [job_1, job_11, job_15]
        self.assertEqual(pipeline.available_jobs(), set(expected))
        self.assertEqual(pipeline.available_jobs(), set(pipeline.starters))

        pipeline.add_finished_job(job_1)
        expected = [job_11, job_15, job_2, job_3]
        self.assertEqual(pipeline.available_jobs(), set(expected))

        pipeline.add_finished_job(job_2)
        expected = [job_11, job_15, job_3, job_16]
        self.assertEqual(pipeline.available_jobs(), set(expected))

        pipeline.add_finished_job(job_3)
        expected = [job_11, job_15, job_4, job_16]
        self.assertEqual(pipeline.available_jobs(), set(expected))

        pipeline.add_finished_job(job_16)
        expected = [job_11, job_15, job_4]
        self.assertEqual(pipeline.available_jobs(), set(expected))

        pipeline.add_finished_job(job_4)
        expected = [job_11, job_15, job_5]
        self.assertEqual(pipeline.available_jobs(), set(expected))

        pipeline.add_finished_job(job_11)
        expected = [job_15, job_5]
        self.assertEqual(pipeline.available_jobs(), set(expected))

        pipeline.add_finished_job(job_5)
        expected = [job_15, job_6, job_7, job_8, job_9]
        self.assertEqual(pipeline.available_jobs(), set(expected))

        pipeline.add_finished_job(job_6)
        expected = [job_15, job_7, job_8, job_9]
        self.assertEqual(pipeline.available_jobs(), set(expected))

        pipeline.add_finished_job(job_15)
        expected = [job_7, job_8, job_9]
        self.assertEqual(pipeline.available_jobs(), set(expected))

        pipeline.add_finished_job(job_7)
        expected = [job_8, job_9]
        self.assertEqual(pipeline.available_jobs(), set(expected))

        pipeline.add_finished_job(job_9)
        expected = [job_8]
        self.assertEqual(pipeline.available_jobs(), set(expected))

        pipeline.add_finished_job(job_8)
        expected = [job_10]
        self.assertEqual(pipeline.available_jobs(), set(expected))

        pipeline.add_finished_job(job_10)
        expected = [job_12, job_13, job_14]
        self.assertEqual(pipeline.available_jobs(), set(expected))

        pipeline.add_finished_job(job_13)
        expected = [job_12, job_14]
        self.assertEqual(pipeline.available_jobs(), set(expected))

        pipeline.add_finished_job(job_12)
        expected = [job_14]
        self.assertEqual(pipeline.available_jobs(), set(expected))

        pipeline.add_finished_job(job_14)
        expected = []
        self.assertEqual(pipeline.available_jobs(), set(expected))

        self.assertTrue(pipeline.finished())
Example #31
0
    def test_serialize_and_deserialize(self):
        with self.assertRaises(ValueError):
            Job.deserialize({})  # no key 'worker_name'

        job = Job('test')
        expected = tuple({'worker_name': 'test'}.items())
        self.assertEqual(job.serialize(), expected)
        self.assertEqual(Job.deserialize(expected), job)

        job_with_data = Job('testing', data={'python': 42, 'spam': 'eggs'})
        expected_with_data = {
            'worker_name': 'testing',
            'data': tuple({
                'python': 42,
                'spam': 'eggs'
            }.items())
        }
        expected_with_data = tuple(expected_with_data.items())
        self.assertEqual(job_with_data.serialize(), expected_with_data)
        self.assertEqual(Job.deserialize(expected_with_data), job_with_data)
        self.assertEqual(
            Job.deserialize(job_with_data.serialize()).serialize(),
            job_with_data.serialize())