def test_cum_time(self): """ Using TimeIt multiple times should increase the cum_time_array """ # These return values are used by TimeIt in order: start, end, start, end, ... So the first time the duration # will be 1 second, then 2 seconds, and 3 seconds. MockDatetimeNow.RETURN_VALUES = [ datetime(1970, 1, 1, 0, 0, 0, 0), datetime(1970, 1, 1, 0, 0, 1, 0), datetime(1970, 1, 1, 0, 0, 0, 0), datetime(1970, 1, 1, 0, 0, 2, 0), datetime(1970, 1, 1, 0, 0, 0, 0), datetime(1970, 1, 1, 0, 0, 3, 0) ] MockDatetimeNow.CURRENT_IDX = 0 cum_time_array = [0] with patch('mpire.utils.datetime', new=MockDatetimeNow): with TimeIt(cum_time_array, 0): pass self.assertEqual(cum_time_array[0], 1.0) with TimeIt(cum_time_array, 0): pass self.assertEqual(cum_time_array[0], 3.0) with TimeIt(cum_time_array, 0): pass self.assertEqual(cum_time_array[0], 6.0)
def test_format_args(self): """ The format args func should be called when provided """ for format_func, formatted in [(lambda: "1", "1"), (lambda: 2, 2), (lambda: "foo", "foo")]: # These return values are used by TimeIt in order: start, end, start, end, ... So the first time the # duration will be 1 second, then 2 seconds, and 3 seconds. MockDatetimeNow.RETURN_VALUES = [ datetime(1970, 1, 1, 0, 0, 0, 0), datetime(1970, 1, 1, 0, 0, 1, 0), datetime(1970, 1, 1, 0, 0, 0, 0), datetime(1970, 1, 1, 0, 0, 2, 0), datetime(1970, 1, 1, 0, 0, 0, 0), datetime(1970, 1, 1, 0, 0, 3, 0) ] MockDatetimeNow.CURRENT_IDX = 0 with self.subTest(format_func=format_func), patch( 'mpire.utils.datetime', new=MockDatetimeNow): cum_time_array = [0] max_time_array = [(0, ''), (0, '')] for _ in range(3): with TimeIt(cum_time_array, 0, max_time_array, format_func): pass # The heapq only had room for two entries. The highest durations should be kept self.assertListEqual(max_time_array, [(2.0, formatted), (3.0, formatted)])
def test_max_time(self): """ Using TimeIt multiple times should store the max duration value in the max_time_array using heapq. There's only room for the highest 5 values, while it is called 6 times. The smallest duration shouldn't be present. """ # These return values are used by TimeIt in order: start, end, start, end, ... So the first time the duration # will be 1 second, then 2 seconds, 3 seconds, 3 seconds again, 0.5 seconds, and 10 seconds. MockDatetimeNow.RETURN_VALUES = [ datetime(1970, 1, 1, 0, 0, 0, 0), datetime(1970, 1, 1, 0, 0, 1, 0), datetime(1970, 1, 1, 0, 0, 0, 0), datetime(1970, 1, 1, 0, 0, 2, 0), datetime(1970, 1, 1, 0, 0, 0, 0), datetime(1970, 1, 1, 0, 0, 3, 0), datetime(1970, 1, 1, 0, 0, 0, 0), datetime(1970, 1, 1, 0, 0, 3, 0), datetime(1970, 1, 1, 0, 0, 0, 0), datetime(1970, 1, 1, 0, 0, 0, 500000), datetime(1970, 1, 1, 0, 0, 0, 0), datetime(1970, 1, 1, 0, 0, 10, 0) ] MockDatetimeNow.CURRENT_IDX = 0 cum_time_array = [0] max_time_array = [(0, ''), (0, ''), (0, ''), (0, ''), (0, '')] with patch('mpire.utils.datetime', new=MockDatetimeNow): for _ in range(6): with TimeIt(cum_time_array, 0, max_time_array): pass self.assertListEqual(max_time_array, [(1.0, None), (2.0, None), (10.0, None), (3.0, None), (3.0, None)])
def _func(): with TimeIt(self.worker_insights.worker_working_time, self.worker_id, self.worker_insights.max_task_duration_list, lambda: self._format_args(args, separator=' | ')): results = func(args) self.worker_insights.update_n_completed_tasks() return results
def test_array_storage(self): """ TimeIt should write to the correct idx in the cum_time_array container. The max_time_array is a min-heap container, so the lowest value is stored at index 0. The single highest value in this case is stored at index 2 """ for array_idx in range(5): cum_time_array = [0, 0, 0, 0, 0] max_time_array = [(0, ''), (0, ''), (0, ''), (0, ''), (0, '')] MockDatetimeNow.RETURN_VALUES = [ datetime(1970, 1, 1, 0, 0, 0, 0), datetime(1970, 1, 1, 0, 0, 4, 200000) ] MockDatetimeNow.CURRENT_IDX = 0 with self.subTest(array_idx=array_idx), patch('mpire.utils.datetime', new=MockDatetimeNow), \ TimeIt(cum_time_array, array_idx, max_time_array): pass self.assertListEqual([ t for idx, t in enumerate(cum_time_array) if idx != array_idx ], [0, 0, 0, 0]) self.assertListEqual( [t for idx, t in enumerate(max_time_array) if idx != 2], [(0, ''), (0, ''), (0, ''), (0, '')]) self.assertEqual(cum_time_array[array_idx], 4.2) self.assertGreaterEqual(max_time_array[2], (4.2, None))
def run(self) -> None: """ Continuously asks the tasks queue for new task arguments. When not receiving a poisonous pill or when the max life span is not yet reached it will execute the new task and put the results in the results queue. """ self.worker_comms.set_worker_alive() try: # Store how long it took to start up self.worker_insights.update_start_up_time(self.start_time) # Obtain additional args to pass to the function additional_args = [] if self.params.pass_worker_id: additional_args.append(self.worker_id) if self.params.shared_objects is not None: additional_args.append(self.params.shared_objects) if self.params.use_worker_state: additional_args.append(self.worker_state) # Run initialization function. If it returns True it means an exception occurred and we should exit if self.params.worker_init and self._run_init_func( additional_args): return # Determine what function to call. If we have to keep in mind the order (for map) we use the helper function # with idx support which deals with the provided idx variable. func = partial( self._helper_func_with_idx if self.worker_comms.keep_order() else self._helper_func, partial(self.params.func, *additional_args)) n_tasks_executed = 0 while self.params.worker_lifespan is None or n_tasks_executed < self.params.worker_lifespan: # Obtain new chunk of jobs with TimeIt(self.worker_insights.worker_waiting_time, self.worker_id): next_chunked_args = self.worker_comms.get_task() # If we obtained a poison pill, we stop. When the _retrieve_task function returns None this means we # stop because of an exception in the main process if next_chunked_args == POISON_PILL or next_chunked_args is None: self.worker_insights.update_task_insights() if next_chunked_args == POISON_PILL: self.worker_comms.task_done() # Run exit function when a poison pill was received if self.params.worker_exit: self._run_exit_func(additional_args) return # Execute jobs in this chunk try: results = [] for args in next_chunked_args: # Try to run this function and save results results_part, should_return = self._run_func( func, args) if should_return: return results.append(results_part) # Notify that we've completed a task once in a while (only when using a progress bar) if self.worker_comms.has_progress_bar(): self.worker_comms.task_completed_progress_bar() # Send results back to main process self.worker_comms.add_results(results) n_tasks_executed += len(results) # In case an exception occurred and we need to return, we want to call task_done no matter what finally: self.worker_comms.task_done() # Update task insights every once in a while (every 2 seconds) self.worker_insights.update_task_insights_once_in_a_while() # Run exit function and store results if self.params.worker_exit and self._run_exit_func( additional_args): return # Notify WorkerPool to start a new worker if max lifespan is reached self.worker_insights.update_task_insights() if self.params.worker_lifespan is not None and n_tasks_executed == self.params.worker_lifespan: self.worker_comms.signal_worker_restart() # Force update the number of tasks completed for this worker (only when using a progress bar) if self.worker_comms.has_progress_bar(): self.worker_comms.task_completed_progress_bar( force_update=True) finally: self.worker_comms.set_worker_dead()
def _exit_func(): with TimeIt(self.worker_insights.worker_exit_time, self.worker_id): return self.params.worker_exit(*additional_args)