def fork_analysis(slices, analysis_func, kw, preserve_result, output_fds): from multiprocessing import Process, Queue q = Queue() children = [] t = time() pid = os.getpid() for i in range(slices): p = Process(target=call_analysis, args=(analysis_func, i, q, preserve_result, pid, output_fds), kwargs=kw, name='analysis-%d' % (i,)) p.start() children.append(p) for fd in output_fds: os.close(fd) per_slice = [] temp_files = {} no_children_no_messages = False while len(per_slice) < slices: still_alive = [] for p in children: if p.is_alive(): still_alive.append(p) else: p.join() if p.exitcode: raise Exception("%s terminated with exitcode %d" % (p.name, p.exitcode,)) children = still_alive # If a process dies badly we may never get a message here. # No need to handle that very quickly though, 10 seconds is fine. # (Typically this is caused by running out of memory.) try: s_no, s_t, s_temp_files, s_dw_lens, s_dw_minmax, s_tb = q.get(timeout=10) except QueueEmpty: if not children: # No children left, so they must have all sent their messages. # Still, just to be sure there isn't a race, wait one iteration more. if no_children_no_messages: raise Exception("All analysis processes exited cleanly, but not all returned a result.") else: no_children_no_messages = True continue if s_tb: data = [{'analysis(%d)' % (s_no,): s_tb}, None] os.write(_prof_fd, json.dumps(data).encode('utf-8')) exitfunction() per_slice.append((s_no, s_t)) temp_files.update(s_temp_files) for name, lens in s_dw_lens.items(): dataset._datasetwriters[name]._lens.update(lens) for name, minmax in s_dw_minmax.items(): dataset._datasetwriters[name]._minmax.update(minmax) g.update_top_status("Waiting for all slices to finish cleanup") for p in children: p.join() if preserve_result: res_seq = ResultIterMagic(slices, reuse_msg="analysis_res is an iterator, don't re-use it") else: res_seq = None return [v - t for k, v in sorted(per_slice)], temp_files, res_seq
def fork_analysis(slices, concurrency, analysis_func, kw, preserve_result, output_fds, q): from multiprocessing import Process import gc children = [] t = monotonic() pid = os.getpid() if hasattr(gc, 'freeze'): # See https://bugs.python.org/issue31558 # (Though we keep the gc disabled by default.) gc.freeze() delayed_start = False delayed_start_todo = 0 for i in range(slices): if i == concurrency: assert concurrency != 0 # The rest will wait on this queue delayed_start = os.pipe() delayed_start_todo = slices - i p = SimplifiedProcess(target=call_analysis, args=(analysis_func, i, delayed_start, q, preserve_result, pid, output_fds), kwargs=kw, name='analysis-%d' % (i, )) children.append(p) for fd in output_fds: os.close(fd) if delayed_start: os.close(delayed_start[0]) q.make_reader() per_slice = [] temp_files = {} no_children_no_messages = False reap_time = monotonic() + 5 exit_count = 0 while len(per_slice) < slices: if exit_count > 0 or reap_time <= monotonic(): still_alive = [] for p in children: if p.is_alive(): still_alive.append(p) else: exit_count -= 1 if p.exitcode: raise AcceleratorError( "%s terminated with exitcode %d" % ( p.name, p.exitcode, )) children = still_alive reap_time = monotonic() + 5 # If a process dies badly we may never get a message here. # (iowrapper tries to tell us though.) # No need to handle that very quickly though, 10 seconds is fine. # (Typically this is caused by running out of memory.) try: msg = q.get(timeout=10) if not msg: # Notification from iowrapper, so we wake up (quickly) even if # the process died badly (e.g. from running out of memory). exit_count += 1 continue s_no, s_t, s_temp_files, s_dw_lens, s_dw_minmax, s_dw_compressions, s_tb = msg except QueueEmpty: if not children: # No children left, so they must have all sent their messages. # Still, just to be sure there isn't a race, wait one iteration more. if no_children_no_messages: raise AcceleratorError( "All analysis processes exited cleanly, but not all returned a result." ) else: no_children_no_messages = True continue if s_tb: data = [{'analysis(%d)' % (s_no, ): s_tb}, None] writeall(_prof_fd, json.dumps(data).encode('utf-8')) exitfunction() if delayed_start_todo: # Another analysis is allowed to run now os.write(delayed_start[1], b'a') delayed_start_todo -= 1 per_slice.append((s_no, s_t)) temp_files.update(s_temp_files) for name, lens in s_dw_lens.items(): dataset._datasetwriters[name]._lens.update(lens) for name, minmax in s_dw_minmax.items(): dataset._datasetwriters[name]._minmax.update(minmax) for name, compressions in s_dw_compressions.items(): dataset._datasetwriters[name]._compressions.update(compressions) g.update_top_status("Waiting for all slices to finish cleanup") q.close() if delayed_start: os.close(delayed_start[1]) for p in children: p.join() if preserve_result: res_seq = ResultIterMagic( slices, reuse_msg="analysis_res is an iterator, don't re-use it") else: res_seq = None return [v - t for k, v in sorted(per_slice)], temp_files, res_seq