Ejemplo n.º 1
0
 def test_no_input_data(self):
     """
     Test with empty iterable (this failed before)
     """
     print()
     with WorkerPool() as pool:
         self.assertListEqual(pool.map(square, [], progress_bar=True), [])
Ejemplo n.º 2
0
    def test_dictionary_input(self):
        """
        Test map with dictionary input
        """
        def subtract(x, y):
            return x - y

        with WorkerPool(n_jobs=1) as pool:

            # Should work
            with self.subTest('correct input'):
                results_list = pool.map(subtract, [{
                    'x': 5,
                    'y': 2
                }, {
                    'y': 5,
                    'x': 2
                }])
                self.assertEqual(results_list, [3, -3])

            # Should throw
            with self.subTest("missing 'y', unknown parameter 'z'"
                              ), self.assertRaises(TypeError):
                pool.map(subtract, [{'x': 5, 'z': 2}])

            # Should throw
            with self.subTest("unknown parameter 'z'"), self.assertRaises(
                    TypeError):
                pool.map(subtract, [{'x': 5, 'y': 2, 'z': 2}])
Ejemplo n.º 3
0
    def test_invalid_input(self):
        """
        Test that when parameters are invalid, an error is raised
        """
        for n_jobs, cpu_ids in product(
            [None, 1, 2, 4], [[0, 1], [0, 1, 2, 3], [[0, 1], [0, 1]]]):
            if len(cpu_ids) != (n_jobs or cpu_count()):
                with self.subTest(
                        n_jobs=n_jobs,
                        cpu_ids=cpu_ids), self.assertRaises(ValueError):
                    WorkerPool(n_jobs=n_jobs, cpu_ids=cpu_ids)

        # Should raise when CPU IDs are out of scope
        with self.assertRaises(ValueError):
            WorkerPool(n_jobs=1, cpu_ids=[-1])
        with self.assertRaises(ValueError):
            WorkerPool(n_jobs=1, cpu_ids=[cpu_count()])
Ejemplo n.º 4
0
 def test_deamon_nested_workerpool(self):
     """
     Tests nested WorkerPools when daemon==True, which should not work
     """
     with self.assertRaises(AssertionError), WorkerPool(
             n_jobs=4, daemon=True) as pool:
         pool.map(self._square_daemon,
                  ((X, ) for X in repeat(self.test_data, 4)),
                  chunk_size=1)
Ejemplo n.º 5
0
    def test_invalid_progress_bar_position(self):
        """
        Test different values of progress_bar_position, which should be positive integer >= 0
        """
        for progress_bar_position, error in [(-1, ValueError),
                                             ('numero uno', TypeError)]:
            with self.subTest(input='regular input', progress_bar_position=progress_bar_position), \
                    self.assertRaises(error), WorkerPool(n_jobs=1) as pool:
                pool.map(square,
                         self.test_data,
                         progress_bar=True,
                         progress_bar_position=progress_bar_position)

            with self.subTest(input='numpy input', progress_bar_position=progress_bar_position), \
                    self.assertRaises(error), WorkerPool(n_jobs=1) as pool:
                pool.map(square_numpy,
                         self.test_data_numpy,
                         progress_bar=True,
                         progress_bar_position=progress_bar_position)
Ejemplo n.º 6
0
 def test_start_method(self):
     """
     Test different start methods. All should work just fine
     """
     for n_jobs, start_method in product(
         [1, 3], ['fork', 'forkserver', 'spawn', 'threading']):
         with self.subTest(n_jobs=n_jobs, start_method=start_method), \
              WorkerPool(n_jobs, start_method=start_method) as pool:
             self.assertListEqual(pool.map(square, self.test_data),
                                  self.test_desired_output)
Ejemplo n.º 7
0
    def test_valid_input(self):
        """
        Test that when parameters are valid, nothing breaks. We don't actually check if CPU pinning is happening
        """
        for n_jobs, cpu_ids in product([None, 1, 2, 4], [
                None, [0], [1337], [0, 1], [0, 1, 2, 3], [[0, 3]],
            [[0, 1], [0, 1]]
        ]):

            # Things should work fine when cpu_ids is None or number of cpu_ids given is one or equals the number of
            # jobs
            if cpu_ids is None or len(cpu_ids) == 1 or len(cpu_ids) == (
                    n_jobs or cpu_count()):

                # When CPU IDs exceed the number of CPUs it should raise
                if cpu_ids is not None and np.array(
                        cpu_ids).max() >= cpu_count():
                    with self.subTest(n_jobs=n_jobs, cpu_ids=cpu_ids), self.assertRaises(ValueError), \
                            WorkerPool(n_jobs=n_jobs, cpu_ids=cpu_ids) as pool:
                        pool.map(square, self.test_data)

                else:
                    with self.subTest(n_jobs=n_jobs, cpu_ids=cpu_ids), patch('subprocess.call') as p, \
                            WorkerPool(n_jobs=n_jobs, cpu_ids=cpu_ids) as pool:

                        # Verify results
                        results_list = pool.map(square, self.test_data)
                        self.assertTrue(isinstance(results_list, list))
                        self.assertEqual(self.test_desired_output,
                                         results_list)

                        # Verify that CPU pinning is used, is called as many times as there are jobs and is called for
                        # each worker process ID
                        if cpu_ids is None:
                            self.assertEqual(p.call_args_list, [])
                        else:
                            self.assertEqual(p.call_count, pool.n_jobs)
                            pids = {
                                call[0][0].rsplit(" ", 1)[-1]
                                for call in p.call_args_list
                            }
                            self.assertEqual(len(pids), pool.n_jobs)
Ejemplo n.º 8
0
    def test_exceptions(self):
        """
        Tests if MPIRE can handle exceptions well
        """
        # This print statement is intentional as it will print multiple progress bars
        print()
        for n_jobs, n_tasks_max_active, worker_lifespan, progress_bar in product(
            [1, 20], [None, 1], [None, 1], [False, True]):
            with WorkerPool(n_jobs=n_jobs) as pool:

                # Should work for map like functions
                with self.subTest(n_jobs=n_jobs, n_tasks_max_active=n_tasks_max_active, worker_lifespan=worker_lifespan,
                                  progress_bar=progress_bar, function='square_raises', map='map'), \
                     self.assertRaises(ValueError):
                    pool.map(self._square_raises,
                             self.test_data,
                             max_tasks_active=n_tasks_max_active,
                             worker_lifespan=worker_lifespan,
                             progress_bar=progress_bar)

                # Should work for imap like functions
                with self.subTest(n_jobs=n_jobs, n_tasks_max_active=n_tasks_max_active, worker_lifespan=worker_lifespan,
                                  progress_bar=progress_bar, function='square_raises', map='imap'), \
                     self.assertRaises(ValueError):
                    list(
                        pool.imap_unordered(
                            self._square_raises,
                            self.test_data,
                            max_tasks_active=n_tasks_max_active,
                            worker_lifespan=worker_lifespan,
                            progress_bar=progress_bar))

                # Should work for map like functions
                with self.subTest(n_jobs=n_jobs, n_tasks_max_active=n_tasks_max_active, worker_lifespan=worker_lifespan,
                                  progress_bar=progress_bar, function='square_raises_on_idx', map='map'), \
                     self.assertRaises(ValueError):
                    pool.map(self._square_raises_on_idx,
                             self.test_data,
                             max_tasks_active=n_tasks_max_active,
                             worker_lifespan=worker_lifespan,
                             progress_bar=progress_bar)

                # Should work for imap like functions
                with self.subTest(n_jobs=n_jobs, n_tasks_max_active=n_tasks_max_active, worker_lifespan=worker_lifespan,
                                  progress_bar=progress_bar, function='square_raises_on_idx', map='imap'), \
                     self.assertRaises(ValueError):
                    list(
                        pool.imap_unordered(
                            self._square_raises_on_idx,
                            self.test_data,
                            max_tasks_active=n_tasks_max_active,
                            worker_lifespan=worker_lifespan,
                            progress_bar=progress_bar))
Ejemplo n.º 9
0
    def test_by_constructor(self):
        """
        Test setting passing on the worker ID in the constructor
        """
        for n_jobs, pass_worker_id in product([1, 2, 4], [True, False]):

            with self.subTest(n_jobs=n_jobs, pass_worker_id=pass_worker_id, config_type='constructor'), \
                 WorkerPool(n_jobs=n_jobs, pass_worker_id=pass_worker_id) as pool:

                # Tests should fail when number of arguments in function is incorrect, worker ID is not within range,
                # or when the shared objects are not equal to the given arguments
                f = self._f1 if pass_worker_id else self._f2
                pool.map(f, ((n_jobs, ) for _ in range(10)), iterable_len=10)
Ejemplo n.º 10
0
    def test_non_deamon_nested_workerpool(self):
        """
        Tests nested WorkerPools when daemon==False, which should work
        """
        with WorkerPool(n_jobs=4, daemon=False) as pool:
            # Obtain results using nested WorkerPools
            results = pool.map(self._square_daemon,
                               ((X, ) for X in repeat(self.test_data, 4)),
                               chunk_size=1)

            # Each of the results should match
            for results_list in results:
                self.assertTrue(isinstance(results_list, list))
                self.assertEqual(self.test_desired_output, results_list)
Ejemplo n.º 11
0
    def test_valid_progress_bars_regular_input(self):
        """
        Valid progress bars are either False/True
        """
        print()
        for n_jobs, progress_bar in product([None, 1, 2], [True, False]):

            with self.subTest(n_jobs=n_jobs), WorkerPool(
                    n_jobs=n_jobs) as pool:
                results_list = pool.map(square,
                                        self.test_data,
                                        progress_bar=progress_bar)
                self.assertTrue(isinstance(results_list, list))
                self.assertEqual(self.test_desired_output, results_list)
Ejemplo n.º 12
0
    def test_by_constructor(self):
        """
        Tests passing shared objects in the constructor
        """
        for n_jobs, shared_objects in product(
            [1, 2, 4], [None, (37, 42), ({'1', '2', '3'})]):

            # Pass on arguments using the constructor instead
            with self.subTest(n_jobs=n_jobs, shared_objects=shared_objects, config_type='constructor'), \
                 WorkerPool(n_jobs=n_jobs, shared_objects=shared_objects) as pool:

                # Tests should fail when number of arguments in function is incorrect, worker ID is not within range,
                # or when the shared objects are not equal to the given arguments
                f = self._f1 if shared_objects else self._f2
                pool.map(f, ((shared_objects, n_jobs) for _ in range(10)),
                         iterable_len=10)
Ejemplo n.º 13
0
    def predict(self,
                texts: List[str],
                pbar: bool = True) -> List[List[Instance]]:
        """
        Predict the semtypes for each text.

        :param texts: The texts for which to predict.
        :return: A list of tuples containing start and end index and label.
        """
        with WorkerPool(n_jobs=self.n_workers) as pool:
            pred = pool.map(self._predict_single,
                            texts,
                            chunk_size=1,
                            progress_bar=pbar)

        return pred
Ejemplo n.º 14
0
    def test_valid_progress_bars_numpy_input(self):
        """
        Test with numpy, as that will change the number of tasks
        """
        print()
        for n_jobs, progress_bar in product([None, 1, 2], [True, False]):

            # Should work just fine
            with self.subTest(n_jobs=n_jobs,
                              progress_bar=progress_bar), WorkerPool(
                                  n_jobs=n_jobs) as pool:
                results = pool.map(square_numpy,
                                   self.test_data_numpy,
                                   progress_bar=progress_bar)
                self.assertTrue(isinstance(results, np.ndarray))
                np.testing.assert_array_equal(results,
                                              self.test_desired_output_numpy)
Ejemplo n.º 15
0
    def test_faulty_parameters(self):
        """
        Should raise when wrong parameter values are used
        """
        with WorkerPool(n_jobs=4) as pool:

            # Zero (or a negative number of) active tasks/lifespan should result in a value error
            for n, map_function in product(
                [-3, -1, 0, 3.14],
                [pool.map, pool.map_unordered, pool.imap, pool.imap_unordered
                 ]):
                # max_tasks_active
                with self.subTest(max_tasks_active=n, map_function=map_function), \
                     self.assertRaises(ValueError if isinstance(n, int) else TypeError):
                    list(
                        map_function(square,
                                     self.test_data,
                                     max_tasks_active=n))

                # worker_lifespan
                with self.subTest(worker_lifespan=n, map_function=map_function), \
                     self.assertRaises(ValueError if isinstance(n, int) else TypeError):
                    list(
                        map_function(square, self.test_data,
                                     worker_lifespan=n))

            # chunk_size should be an integer or None
            with self.subTest(chunk_size='3'), self.assertRaises(TypeError):
                for _ in pool.imap(square, self.test_data, chunk_size='3'):
                    pass

            # chunk_size should be a positive integer
            with self.subTest(chunk_size=-5), self.assertRaises(ValueError):
                for _ in pool.imap(square, self.test_data, chunk_size=-5):
                    pass

            # n_splits should be an integer or None
            with self.subTest(n_splits='3'), self.assertRaises(TypeError):
                for _ in pool.imap(square, self.test_data, n_splits='3'):
                    pass

            # n_splits should be a positive integer
            with self.subTest(n_splits=-5), self.assertRaises(ValueError):
                for _ in pool.imap(square, self.test_data, n_splits=-5):
                    pass
Ejemplo n.º 16
0
    def test_enable_insights(self):
        """
        Insight containers are initially set to None values. When enabled they should be changed to appropriate
        containers. When a second task is started it should reset them. If disabled, they should remain None
        """
        with WorkerPool(n_jobs=2) as pool:

            # We run this a few times to see if it resets properly. We only verify this by checking the
            # n_completed_tasks
            for idx in range(3):
                with self.subTest('enabled', idx=idx):

                    pool.map(square, range(10), enable_insights=True, worker_init=self._init, worker_exit=self._exit)

                    # Basic sanity checks for the values. Some max task args can be empty, in that case the duration
                    # should be 0 (= no data)
                    self.assertGreater(sum(pool._worker_insights.worker_start_up_time), 0)
                    self.assertGreater(sum(pool._worker_insights.worker_init_time), 0)
                    self.assertEqual(sum(pool._worker_insights.worker_n_completed_tasks), 10)
                    self.assertGreater(sum(pool._worker_insights.worker_waiting_time), 0)
                    self.assertGreater(sum(pool._worker_insights.worker_working_time), 0)
                    self.assertGreater(sum(pool._worker_insights.worker_exit_time), 0)
                    self.assertGreater(max(pool._worker_insights.max_task_duration), 0)
                    for duration, args in zip(pool._worker_insights.max_task_duration,
                                              pool._worker_insights.max_task_args):
                        if duration == 0:
                            self.assertEqual(args, '')
                        else:
                            self.assertIn(args, {'Arg 0: 0', 'Arg 0: 1', 'Arg 0: 2', 'Arg 0: 3', 'Arg 0: 4',
                                                 'Arg 0: 5', 'Arg 0: 6', 'Arg 0: 7', 'Arg 0: 8', 'Arg 0: 9'})

            # Disabling should set things to None again
            with self.subTest('disable'):
                pool.map(square, range(10), enable_insights=False)
                self.assertIsNone(pool._worker_insights.insights_manager)
                self.assertIsNone(pool._worker_insights.insights_manager_lock)
                self.assertIsNone(pool._worker_insights.worker_start_up_time)
                self.assertIsNone(pool._worker_insights.worker_init_time)
                self.assertIsNone(pool._worker_insights.worker_n_completed_tasks)
                self.assertIsNone(pool._worker_insights.worker_waiting_time)
                self.assertIsNone(pool._worker_insights.worker_working_time)
                self.assertIsNone(pool._worker_insights.worker_exit_time)
                self.assertIsNone(pool._worker_insights.max_task_duration)
                self.assertIsNone(pool._worker_insights.max_task_args)
Ejemplo n.º 17
0
    def test_by_constructor(self):
        """
        Tests setting worker state in the constructor
        """
        for n_jobs, use_worker_state, n_tasks in product([1, 2, 4],
                                                         [False, True],
                                                         [0, 1, 3, 150]):

            with self.subTest(n_jobs=n_jobs, use_worker_state=use_worker_state, n_tasks=n_tasks), \
                 WorkerPool(n_jobs=n_jobs, pass_worker_id=True, use_worker_state=use_worker_state) as pool:

                # When use_worker_state is set, the final (worker_id, n_args) of each worker should add up to the
                # number of given tasks
                f = self._f1 if use_worker_state else self._f2
                results = pool.map(f, range(n_tasks), chunk_size=2)
                if use_worker_state:
                    n_processed_per_worker = [0] * n_jobs
                    for wid, n_processed in results:
                        n_processed_per_worker[wid] = n_processed
                    self.assertEqual(sum(n_processed_per_worker), n_tasks)
Ejemplo n.º 18
0
    futures = client.map(slow_fn, jobs)
    res = client.gather(futures)
    print(f'Elapsed time for {name.upper()}: {time.time() - start_time}')
    print(f'\n{"=" * 53}\n')

    # 7: p_tqdm
    start_time = time.time()
    name = 'p_tqdm'
    print(f'Start {name.upper()} execution...')
    res = p_map(slow_fn, jobs)
    print(f'Elapsed time for {name.upper()}: {time.time() - start_time}')
    print(f'\n{"=" * 53}\n')

    # 8: pathos
    start_time = time.time()
    name = 'pathos'
    print(f'Start {name.upper()} execution...')
    pool = ProcessPool()
    res = pool.map(slow_fn, jobs, chunksize=16)
    print(f'Elapsed time for {name.upper()}: {time.time() - start_time}')
    print(f'\n{"=" * 53}\n')

    # 9: mpire
    start_time = time.time()
    name = 'mpire'
    print(f'Start {name.upper()} execution...')
    with WorkerPool(n_jobs=5) as pool:
        res = pool.map(slow_fn, jobs, chunk_size=16)
    print(f'Elapsed time for {name.upper()}: {time.time() - start_time}')
    print(f'\n{"=" * 53}\n')
Ejemplo n.º 19
0
    def test_numpy_input(self):
        """
        Test map with numpy input
        """
        for n_jobs, n_tasks_max_active, worker_lifespan, chunk_size, n_splits in \
                product([1, 2, None], [None, 2], [None, 2], [None, 3], [None, 3]):

            with WorkerPool(n_jobs=n_jobs) as pool:

                # Test numpy input. map should concatenate chunks of numpy output to a single output array if we
                # instruct it to
                with self.subTest(concatenate_numpy_output=True,
                                  map_function='map',
                                  n_jobs=n_jobs,
                                  n_tasks_max_active=n_tasks_max_active,
                                  worker_lifespan=worker_lifespan,
                                  chunk_size=chunk_size,
                                  n_splits=n_splits):
                    results = pool.map(square_numpy,
                                       self.test_data_numpy,
                                       max_tasks_active=n_tasks_max_active,
                                       worker_lifespan=worker_lifespan,
                                       concatenate_numpy_output=True)
                    self.assertTrue(isinstance(results, np.ndarray))
                    np.testing.assert_array_equal(
                        results, self.test_desired_output_numpy)

                # If we disable it we should get back chunks of the original array
                with self.subTest(concatenate_numpy_output=False,
                                  map_function='map',
                                  n_jobs=n_jobs,
                                  n_tasks_max_active=n_tasks_max_active,
                                  worker_lifespan=worker_lifespan,
                                  chunk_size=chunk_size,
                                  n_splits=n_splits):
                    results = pool.map(square_numpy,
                                       self.test_data_numpy,
                                       max_tasks_active=n_tasks_max_active,
                                       worker_lifespan=worker_lifespan,
                                       concatenate_numpy_output=False)
                    self.assertTrue(isinstance(results, list))
                    np.testing.assert_array_equal(
                        np.concatenate(results),
                        self.test_desired_output_numpy)

                # Numpy concatenation doesn't exist for the other functions
                with self.subTest(map_function='imap',
                                  n_jobs=n_jobs,
                                  n_tasks_max_active=n_tasks_max_active,
                                  worker_lifespan=worker_lifespan,
                                  chunk_size=chunk_size,
                                  n_splits=n_splits):
                    results = pool.imap(square_numpy,
                                        self.test_data_numpy,
                                        max_tasks_active=n_tasks_max_active,
                                        worker_lifespan=worker_lifespan)
                    self.assertTrue(isinstance(results, types.GeneratorType))
                    np.testing.assert_array_equal(
                        np.concatenate(list(results)),
                        self.test_desired_output_numpy)

                # map_unordered and imap_unordered cannot be checked for correctness as we don't know the order of the
                # returned results, except when n_jobs=1. In the other cases we could, however, check if all the values
                # (numpy rows) that are returned are present (albeit being in a different order)
                for map_func, result_type in ((pool.map_unordered, list),
                                              (pool.imap_unordered,
                                               types.GeneratorType)):

                    with self.subTest(map_function=map_func,
                                      n_jobs=n_jobs,
                                      n_tasks_max_active=n_tasks_max_active,
                                      worker_lifespan=worker_lifespan,
                                      chunk_size=chunk_size,
                                      n_splits=n_splits):

                        results = map_func(square_numpy,
                                           self.test_data_numpy,
                                           max_tasks_active=n_tasks_max_active,
                                           worker_lifespan=worker_lifespan)
                        self.assertTrue(isinstance(results, result_type))
                        concattenated_results = np.concatenate(list(results))
                        if n_jobs == 1:
                            np.testing.assert_array_equal(
                                concattenated_results,
                                self.test_desired_output_numpy)
                        else:
                            # We sort the expected and actual results using lexsort, which sorts using a sequence of
                            # keys. We transpose the array to sort on columns instead of rows.
                            np.testing.assert_array_equal(
                                concattenated_results[np.lexsort(
                                    concattenated_results.T)],
                                self.test_desired_output_numpy[np.lexsort(
                                    self.test_desired_output_numpy.T)])
Ejemplo n.º 20
0
    def test_all_maps(self):
        """
        Tests the map related functions
        """
        def get_generator(iterable):
            yield from iterable

        # Test results for different number of jobs to run in parallel and the maximum number of active tasks in the
        # queue
        for n_jobs, n_tasks_max_active, worker_lifespan, chunk_size, n_splits in \
                product([1, 2, None], [None, 2], [None, 2], [None, 3], [None, 3]):

            with WorkerPool(n_jobs=n_jobs) as pool:

                for map_func, sort, result_type in ((pool.map, False, list),
                                                    (pool.map_unordered, True,
                                                     list),
                                                    (pool.imap, False,
                                                     types.GeneratorType),
                                                    (pool.imap_unordered, True,
                                                     types.GeneratorType)):

                    with self.subTest(map_func=map_func,
                                      input='list',
                                      n_jobs=n_jobs,
                                      n_tasks_max_active=n_tasks_max_active,
                                      worker_lifespan=worker_lifespan,
                                      chunk_size=chunk_size,
                                      n_splits=n_splits):

                        # Test if parallel map results in the same as ordinary map function. Should work both for
                        # generators and iterators. Also check if an empty list works as desired.
                        results_list = map_func(
                            square,
                            self.test_data,
                            max_tasks_active=n_tasks_max_active,
                            worker_lifespan=worker_lifespan)
                        self.assertTrue(isinstance(results_list, result_type))
                        self.assertEqual(
                            self.test_desired_output,
                            sorted(results_list, key=lambda tup: tup[0])
                            if sort else list(results_list))

                    with self.subTest(map_func=map_func,
                                      input='generator',
                                      n_jobs=n_jobs,
                                      n_tasks_max_active=n_tasks_max_active,
                                      worker_lifespan=worker_lifespan,
                                      chunk_size=chunk_size,
                                      n_splits=n_splits):

                        results_list = map_func(
                            square,
                            get_generator(self.test_data),
                            iterable_len=self.test_data_len,
                            max_tasks_active=n_tasks_max_active,
                            worker_lifespan=worker_lifespan)
                        self.assertTrue(isinstance(results_list, result_type))
                        self.assertEqual(
                            self.test_desired_output,
                            sorted(results_list, key=lambda tup: tup[0])
                            if sort else list(results_list))

                    with self.subTest(map_func=map_func,
                                      input='empty list',
                                      n_jobs=n_jobs,
                                      n_tasks_max_active=n_tasks_max_active,
                                      worker_lifespan=worker_lifespan,
                                      chunk_size=chunk_size,
                                      n_splits=n_splits):

                        results_list = map_func(
                            square, [],
                            max_tasks_active=n_tasks_max_active,
                            worker_lifespan=worker_lifespan)
                        self.assertTrue(isinstance(results_list, result_type))
                        self.assertEqual([], list(results_list))
Ejemplo n.º 21
0
 def _square_daemon(X):
     with WorkerPool(n_jobs=4) as pool:
         return pool.map(square, X, chunk_size=1)