Ejemplo n.º 1
0
    def test_update_task_insights(self):
        """
        Test whether task insights are being updated correctly
        """
        MockDatetimeNow.RETURN_VALUES = [datetime(1970, 1, 1, 0, 0, 1, 0),
                                         datetime(1970, 1, 1, 0, 0, 2, 0)]
        MockDatetimeNow.CURRENT_IDX = 0

        insights = WorkerInsights(mp.get_context('fork'), n_jobs=2)
        insights.max_task_duration_last_updated = datetime(1970, 1, 1, 0, 0, 0, 0)

        # Shouldn't do anything when insights haven't been enabled
        with self.subTest(insights_enabled=False), patch('mpire.insights.datetime', new=MockDatetimeNow):
            for worker_id in range(2):
                insights.init_worker(worker_id)
                insights.update_task_insights()
            self.assertIsNone(insights.max_task_duration)
            self.assertIsNone(insights.max_task_args)
            self.assertEqual(insights.max_task_duration_last_updated, datetime(1970, 1, 1, 0, 0, 0, 0))

        insights.reset_insights(enable_insights=True)
        insights.max_task_duration_last_updated = datetime(1970, 1, 1, 0, 0, 0, 0)
        MockDatetimeNow.CURRENT_IDX = 0

        with self.subTest(insights_enabled=True), patch('mpire.insights.datetime', new=MockDatetimeNow):
            for worker_id, max_task_duration_list in [(0, [(5, '5'), (6, '6'), (7, '7'), (8, '8'), (9, '9')]),
                                                      (1, [(0, '0'), (1, '1'), (2, '2'), (3, '3'), (4, '4')])]:
                insights.init_worker(worker_id)
                insights.max_task_duration_list = max_task_duration_list
                insights.update_task_insights()
                self.assertEqual(insights.max_task_duration_last_updated, datetime(1970, 1, 1, 0, 0, worker_id + 1, 0))
            self.assertListEqual(list(insights.max_task_duration), [5, 6, 7, 8, 9, 0, 1, 2, 3, 4])
            self.assertListEqual(list(insights.max_task_args), ['5', '6', '7', '8', '9', '0', '1', '2', '3', '4'])
Ejemplo n.º 2
0
    def test_update_task_insights_once_in_a_while(self):
        """
        Test whether the update_task_insights is triggered correctly. It should be called two times based on the
        mocked datetime.now() values: for the last two entries the time difference compared to
        max_task_duration_last_updated is bigger than two seconds.
        """
        MockDatetimeNow.RETURN_VALUES = [datetime(1970, 1, 1, 0, 0, 0, 0),
                                         datetime(1970, 1, 1, 0, 0, 2, 0),
                                         datetime(1970, 1, 1, 0, 0, 3, 0),
                                         datetime(1970, 1, 1, 0, 0, 7, 0),
                                         datetime(1970, 1, 1, 0, 0, 8, 0)]
        MockDatetimeNow.CURRENT_IDX = 0

        insights = WorkerInsights(mp.get_context('fork'), n_jobs=5)
        insights.max_task_duration_last_updated = datetime(1970, 1, 1, 0, 0, 1, 0)

        # Shouldn't do anything when insights haven't been enabled
        with self.subTest(insights_enabled=False), patch('mpire.insights.datetime', new=MockDatetimeNow), \
                patch.object(insights, 'update_task_insights') as p:
            for worker_id in range(5):
                insights.init_worker(worker_id)
                insights.update_task_insights_once_in_a_while()
            self.assertEqual(p.call_count, 0)

        insights.reset_insights(enable_insights=True)
        insights.max_task_duration_last_updated = datetime(1970, 1, 1, 0, 0, 1, 0)
        MockDatetimeNow.CURRENT_IDX = 0

        with self.subTest(insights_enabled=True), patch('mpire.insights.datetime', new=MockDatetimeNow), \
                patch.object(insights, 'update_task_insights') as p:
            for worker_id in range(5):
                insights.init_worker(worker_id)
                insights.update_task_insights_once_in_a_while()
            self.assertEqual(p.call_count, 2)
Ejemplo n.º 3
0
    def test_update_start_up_time(self):
        """
        Test that the start up time is correctly added to worker_start_up_time for the right index
        """
        MockDatetimeNow.RETURN_VALUES = [datetime(1970, 1, 1, 0, 0, 0, 0),
                                         datetime(1970, 1, 1, 0, 0, 2, 0),
                                         datetime(1970, 1, 1, 0, 0, 3, 0),
                                         datetime(1970, 1, 1, 0, 0, 7, 0),
                                         datetime(1970, 1, 1, 0, 0, 8, 0)]
        MockDatetimeNow.CURRENT_IDX = 0

        insights = WorkerInsights(mp.get_context('fork'), n_jobs=5)

        # Shouldn't do anything when insights haven't been enabled
        with self.subTest(insights_enabled=False), patch('mpire.insights.datetime', new=MockDatetimeNow):
            for worker_id in range(5):
                insights.init_worker(worker_id)
                insights.update_start_up_time(datetime(1970, 1, 1, 0, 0, 0, 0))
            self.assertIsNone(insights.worker_start_up_time)

        insights.reset_insights(enable_insights=True)
        insights.max_task_duration_last_updated = datetime(1970, 1, 1, 0, 0, 0, 0)
        MockDatetimeNow.CURRENT_IDX = 0

        with self.subTest(insights_enabled=True), patch('mpire.insights.datetime', new=MockDatetimeNow):
            for worker_id in range(5):
                insights.init_worker(worker_id)
                insights.update_start_up_time(datetime(1970, 1, 1, 0, 0, 0, 0))
            self.assertListEqual(list(insights.worker_start_up_time), [0, 2, 3, 7, 8])
Ejemplo n.º 4
0
    def test_init_worker(self):
        """
        Test that the right containers are selected given a worker ID
        """
        insights = WorkerInsights(mp.get_context('fork'), n_jobs=5)

        # Should've only set worker ID when insights haven't been enabled
        with self.subTest(insights_enabled=False):
            insights.init_worker(3)
            self.assertEqual(insights.worker_id, 3)
            self.assertIsNone(insights.max_task_duration_list)

        with self.subTest(insights_enabled=True):
            insights.reset_insights(enable_insights=True)
            insights.max_task_duration[:] = range(25)
            insights.max_task_args[:] = map(str, range(25))

            for worker_id, expected_task_duration_list in [
                (0, [(0.0, '0'), (1.0, '1'), (2.0, '2'), (3.0, '3'), (4.0, '4')]),
                (1, [(5.0, '5'), (6.0, '6'), (7.0, '7'), (8.0, '8'), (9.0, '9')]),
                (4, [(20.0, '20'), (21.0, '21'), (22.0, '22'), (23.0, '23'), (24.0, '24')])
            ]:
                with self.subTest(worker_id=worker_id):
                    insights.init_worker(worker_id)
                    self.assertEqual(insights.worker_id, worker_id)
                    self.assertListEqual(insights.max_task_duration_list, expected_task_duration_list)
Ejemplo n.º 5
0
    def test_get_insights(self):
        """
        Test if the insights are properly processed
        """
        insights = WorkerInsights(mp.get_context('fork'), n_jobs=2)

        with self.subTest(enable_insights=False):
            insights.reset_insights(enable_insights=False)
            self.assertDictEqual(insights.get_insights(), {})

        with self.subTest(enable_insights=True):
            insights.reset_insights(enable_insights=True)
            insights.worker_start_up_time[:] = [0.1, 0.2]
            insights.worker_init_time[:] = [0.11, 0.22]
            insights.worker_n_completed_tasks[:] = [2, 3]
            insights.worker_waiting_time[:] = [0.4, 0.3]
            insights.worker_working_time[:] = [42.0, 37.0]
            insights.worker_exit_time[:] = [0.33, 0.44]

            # Durations that are zero or args that are empty are skipped
            insights.max_task_duration[:] = [0.0, 0.0, 1.0, 2.0, 0.0, 6.0, 0.8, 0.0, 0.1, 0.0]
            insights.max_task_args[:] = ['', '', '1', '2', '', '3', '4', '', '5', '']
            insights_dict = insights.get_insights()

            # Test ratios separately because of rounding errors
            total_time = 0.3 + 0.33 + 0.7 + 79.0 + 0.77
            self.assertAlmostEqual(insights_dict['start_up_ratio'], 0.3 / total_time)
            self.assertAlmostEqual(insights_dict['init_ratio'], 0.33 / total_time)
            self.assertAlmostEqual(insights_dict['waiting_ratio'], 0.7 / total_time)
            self.assertAlmostEqual(insights_dict['working_ratio'], 79.0 / total_time)
            self.assertAlmostEqual(insights_dict['exit_ratio'], 0.77 / total_time)
            del (insights_dict['start_up_ratio'], insights_dict['init_ratio'], insights_dict['waiting_ratio'],
                 insights_dict['working_ratio'], insights_dict['exit_ratio'])

            self.assertDictEqual(insights_dict, {
                'n_completed_tasks': [2, 3],
                'start_up_time': ['0:00:00.100', '0:00:00.200'],
                'init_time': ['0:00:00.110', '0:00:00.220'],
                'waiting_time': ['0:00:00.400', '0:00:00.300'],
                'working_time': ['0:00:42', '0:00:37'],
                'exit_time': ['0:00:00.330', '0:00:00.440'],
                'total_start_up_time': '0:00:00.300',
                'total_init_time': '0:00:00.330',
                'total_waiting_time': '0:00:00.700',
                'total_working_time': '0:01:19',
                'total_exit_time': '0:00:00.770',
                'top_5_max_task_durations': ['0:00:06', '0:00:02', '0:00:01', '0:00:00.800', '0:00:00.100'],
                'top_5_max_task_args': ['3', '2', '1', '4', '5'],
                'total_time': '0:01:21.100',
                'start_up_time_mean': '0:00:00.150', 'start_up_time_std': '0:00:00.050',
                'init_time_mean': '0:00:00.165', 'init_time_std': '0:00:00.055',
                'waiting_time_mean': '0:00:00.350', 'waiting_time_std': '0:00:00.050',
                'working_time_mean': '0:00:39.500', 'working_time_std': '0:00:02.500',
                'exit_time_mean': '0:00:00.385', 'exit_time_std': '0:00:00.055'
            })
Ejemplo n.º 6
0
    def __init__(self,
                 n_jobs: Optional[int] = None,
                 daemon: bool = True,
                 cpu_ids: CPUList = None,
                 shared_objects: Any = None,
                 pass_worker_id: bool = False,
                 use_worker_state: bool = False,
                 start_method: str = 'fork',
                 keep_alive: bool = False) -> None:
        """
        :param n_jobs: Number of workers to spawn. If ``None``, will use ``cpu_count()``
        :param daemon: Whether to start the child processes as daemon
        :param cpu_ids: List of CPU IDs to use for pinning child processes to specific CPUs. The list must be as long as
            the number of jobs used (if ``n_jobs`` equals ``None`` it must be equal to ``mpire.cpu_count()``), or the
            list must have exactly one element. In the former case, element x specifies the CPU ID(s) to use for child
            process x. In the latter case the single element specifies the CPU ID(s) for all child  processes to use. A
            single element can be either a single integer specifying a single CPU ID, or a list of integers specifying
            that a single child process can make use of multiple CPU IDs. If ``None``, CPU pinning will be disabled.
            Note that CPU pinning may only work on Linux based systems
        :param shared_objects: ``None`` or any other type of object (multiple objects can be wrapped in a single tuple).
            Shared objects is only passed on to the user function when it's not ``None``
        :param pass_worker_id: Whether to pass on a worker ID to the user function or not
        :param use_worker_state: Whether to let a worker have a worker state or not
        :param start_method: What process start method to use. Options for multiprocessing: ``'fork'`` (default),
            ``'forkserver'`` and ``'spawn'``. For multithreading use ``'threading'``. See
            https://docs.python.org/3/library/multiprocessing.html#contexts-and-start-methods for more information and
            https://docs.python.org/3/library/multiprocessing.html#the-spawn-and-forkserver-start-methods for some
            caveats when using the ``'spawn'`` or ``'forkserver'`` methods
        :param keep_alive: When True it will keep workers alive after completing a map call, allowing to reuse workers
            when map is called with the same function and worker lifespan multiple times in a row
        """
        # Set parameters
        self.params = WorkerPoolParams(n_jobs, daemon, cpu_ids, shared_objects,
                                       pass_worker_id, use_worker_state,
                                       start_method, keep_alive)

        # Multiprocessing context
        self.ctx = MP_CONTEXTS[start_method]

        # Worker factory
        self.Worker = worker_factory(start_method)

        # Container of the child processes and corresponding communication objects
        self._workers = []
        self._worker_comms = WorkerComms(self.ctx, self.params.n_jobs)
        self._exit_results = None

        # Worker insights, used for profiling
        self._worker_insights = WorkerInsights(self.ctx, self.params.n_jobs)
Ejemplo n.º 7
0
    def test_update_n_completed_tasks(self):
        """
        Test that the number of completed tasks is correctly added to worker_n_completed_tasks for the right index
        """
        insights = WorkerInsights(mp.get_context('fork'), n_jobs=5)

        # Shouldn't do anything when insights haven't been enabled
        with self.subTest(insights_enabled=False):
            for worker_id, call_n_times in [(0, 1), (1, 0), (2, 5), (3, 8), (4, 11)]:
                insights.init_worker(worker_id)
                for _ in range(call_n_times):
                    insights.update_n_completed_tasks()
            self.assertIsNone(insights.worker_n_completed_tasks)

        with self.subTest(insights_enabled=True):
            insights.reset_insights(enable_insights=True)
            for worker_id, call_n_times in [(0, 1), (1, 0), (2, 5), (3, 8), (4, 11)]:
                insights.init_worker(worker_id)
                for _ in range(call_n_times):
                    insights.update_n_completed_tasks()
            self.assertListEqual(list(insights.worker_n_completed_tasks), [1, 0, 5, 8, 11])
Ejemplo n.º 8
0
    def test_reset_insights(self):
        """
        Test if resetting the insights is done properly
        """
        for n_jobs in [1, 2, 4]:
            insights = WorkerInsights(mp.get_context('fork'), n_jobs)
            self.assertEqual(insights.ctx, mp.get_context('fork'))
            self.assertEqual(insights.n_jobs, n_jobs)

            with self.subTest('initialized', n_jobs=n_jobs):
                self.assertFalse(insights.insights_enabled)
                self.assertIsNone(insights.insights_manager)
                self.assertIsNone(insights.insights_manager_lock)
                self.assertIsNone(insights.worker_start_up_time)
                self.assertIsNone(insights.worker_init_time)
                self.assertIsNone(insights.worker_n_completed_tasks)
                self.assertIsNone(insights.worker_waiting_time)
                self.assertIsNone(insights.worker_working_time)
                self.assertIsNone(insights.worker_exit_time)
                self.assertIsNone(insights.max_task_duration)
                self.assertIsNone(insights.max_task_args)
                self.assertIsNone(insights.worker_id)
                self.assertIsNone(insights.max_task_duration_list)
                self.assertIsNone(insights.max_task_duration_last_updated)

            # Containers should be properly initialized
            MockDatetimeNow.RETURN_VALUES = [datetime(1970, 1, 1, 1, 2, 3, 4)]
            MockDatetimeNow.CURRENT_IDX = 0
            with self.subTest('without initial values', n_jobs=n_jobs, enable_insights=True), \
                    patch('mpire.insights.datetime', new=MockDatetimeNow):
                insights.reset_insights(enable_insights=True)
                self.assertTrue(insights.insights_enabled)
                self.assertIsInstance(insights.insights_manager_lock, mp.synchronize.Lock)
                self.assertIsInstance(insights.insights_manager, managers.SyncManager)
                self.assertIsInstance(insights.worker_start_up_time, ctypes.Array)
                self.assertIsInstance(insights.worker_init_time, ctypes.Array)
                self.assertIsInstance(insights.worker_n_completed_tasks, ctypes.Array)
                self.assertIsInstance(insights.worker_waiting_time, ctypes.Array)
                self.assertIsInstance(insights.worker_working_time, ctypes.Array)
                self.assertIsInstance(insights.worker_exit_time, ctypes.Array)
                self.assertIsInstance(insights.max_task_duration, ctypes.Array)
                self.assertIsInstance(insights.max_task_args, managers.ListProxy)

                # Basic sanity checks for the values
                self.assertEqual(sum(insights.worker_start_up_time), 0)
                self.assertEqual(sum(insights.worker_init_time), 0)
                self.assertEqual(sum(insights.worker_n_completed_tasks), 0)
                self.assertEqual(sum(insights.worker_waiting_time), 0)
                self.assertEqual(sum(insights.worker_working_time), 0)
                self.assertEqual(sum(insights.worker_exit_time), 0)
                self.assertEqual(sum(insights.max_task_duration), 0)
                self.assertListEqual(list(insights.max_task_args), [''] * n_jobs * 5)
                self.assertIsNone(insights.worker_id)
                self.assertIsNone(insights.max_task_duration_list)
                self.assertEqual(insights.max_task_duration_last_updated, datetime(1970, 1, 1, 1, 2, 3, 4))

            # Set some values so we can test if the containers will be properly resetted
            insights.worker_start_up_time[0] = 1
            insights.worker_init_time[0] = 2
            insights.worker_n_completed_tasks[0] = 3
            insights.worker_waiting_time[0] = 4
            insights.worker_working_time[0] = 5
            insights.worker_exit_time[0] = 6
            insights.max_task_duration[0] = 7
            insights.max_task_args[0] = '8'
            insights.worker_id = 4
            insights.max_task_duration_list = 'clearly a list'
            insights.max_task_duration_last_updated = datetime(2020, 12, 11, 10, 9, 8, 7)

            # Containers should be properly initialized
            MockDatetimeNow.CURRENT_IDX = 0
            with self.subTest('with initial values', n_jobs=n_jobs, enable_insights=True), \
                    patch('mpire.insights.datetime', new=MockDatetimeNow):
                insights.reset_insights(enable_insights=True)
                # Basic sanity checks for the values
                self.assertEqual(sum(insights.worker_start_up_time), 0)
                self.assertEqual(sum(insights.worker_init_time), 0)
                self.assertEqual(sum(insights.worker_n_completed_tasks), 0)
                self.assertEqual(sum(insights.worker_waiting_time), 0)
                self.assertEqual(sum(insights.worker_working_time), 0)
                self.assertEqual(sum(insights.worker_exit_time), 0)
                self.assertEqual(sum(insights.max_task_duration), 0)
                self.assertListEqual(list(insights.max_task_args), [''] * n_jobs * 5)
                self.assertIsNone(insights.worker_id)
                self.assertIsNone(insights.max_task_duration_list)
                self.assertEqual(insights.max_task_duration_last_updated, datetime(1970, 1, 1, 1, 2, 3, 4))

            # Set some values so we can test if the containers will be properly resetted
            insights.worker_id = 4
            insights.max_task_duration_list = 'clearly a list'
            insights.max_task_duration_last_updated = datetime(2020, 12, 11, 10, 9, 8, 7)

            # Disabling should set things to None again
            with self.subTest(n_jobs=n_jobs, enable_insights=False):
                insights.reset_insights(enable_insights=False)
                self.assertFalse(insights.insights_enabled)
                self.assertIsNone(insights.insights_manager)
                self.assertIsNone(insights.insights_manager_lock)
                self.assertIsNone(insights.worker_start_up_time)
                self.assertIsNone(insights.worker_init_time)
                self.assertIsNone(insights.worker_n_completed_tasks)
                self.assertIsNone(insights.worker_waiting_time)
                self.assertIsNone(insights.worker_working_time)
                self.assertIsNone(insights.worker_exit_time)
                self.assertIsNone(insights.max_task_duration)
                self.assertIsNone(insights.max_task_args)
                self.assertIsNone(insights.worker_id)
                self.assertIsNone(insights.max_task_duration_list)
                self.assertIsNone(insights.max_task_duration_last_updated)