class UIProgress(object): """多进程socket通信下的进度显示类""" def __init__(self, a_pid): """通过进程pid初始化ui组件""" self.progress_widget = FloatProgress(value=0, min=0, max=100) self.text_widget = Text('pid={} begin work'.format(a_pid)) # 通过box容器都放到一个里面 self.progress_box = Box([self.text_widget, self.progress_widget]) display(self.progress_box) def update(self, p_progress, p_progress_text): """进度条更新以及对应文字更新""" self.progress_widget.value = p_progress self.text_widget.value = p_progress_text def close(self): """关闭ui显示""" self.progress_box.close()
class AbuMulPidProgress(object): """多进程进度显示控制类""" def __init__(self, total, label, show_progress=True): """ 外部使用eg: with AbuMulPidProgress(len(self.choice_symbols), 'pick stocks complete') as progress: for epoch, target_symbol in enumerate(self.choice_symbols): progress.show(epoch + 1) :param total: 总任务数量 :param label: 进度显示label """ self._total = total self._label = label self.epoch = 0 self.display_step = 1 self.progress_widget = None self.text_widget = None self.progress_box = None self.show_progress = show_progress # 不管ui进度条有什么问题,也不能影响任务工作的进度执行,反正有文字进度会始终显示 @catch_error(log=False) def init_ui_progress(self): """初始化ui进度条""" if not self.show_progress: return if not ABuEnv.g_is_ipython or self._total < 2: return if ABuEnv.g_main_pid == os.getpid(): # 如果是在主进程下显示那就直接来 self.progress_widget = FloatProgress(value=0, min=0, max=100) self.text_widget = Text('pid={} begin work'.format(os.getpid())) self.progress_box = Box([self.text_widget, self.progress_widget]) display(self.progress_box) else: if g_show_ui_progress and g_socket_fn is not None: # 子进程下通过socket通信将pid给到主进程,主进程创建ui进度条 ABuOsUtil.socket_send_msg(g_socket_fn, '{}|init'.format(os.getpid())) # 不管ui进度条有什么问题,也不能影响任务工作的进度执行,反正有文字进度会始终显示 @catch_error(log=False) def update_ui_progress(self, ps, ps_text): """更新文字进度条""" if not self.show_progress: return if not ABuEnv.g_is_ipython or self._total < 2: return if ABuEnv.g_main_pid == os.getpid(): # 如果是在主进程下显示那就直接来 if self.progress_widget is not None: self.progress_widget.value = ps if self.text_widget is not None: self.text_widget.value = ps_text else: if g_show_ui_progress and g_socket_fn is not None: # 子进程下通过socket通信将pid给到主进程,主进程通过pid查找对应的进度条对象后更新进度 ABuOsUtil.socket_send_msg( g_socket_fn, '{}|{}|{}'.format(os.getpid(), ps, ps_text)) # 不管ui进度条有什么问题,也不能影响任务工作的进度执行,反正有文字进度会始终显示 @catch_error(log=False) def close_ui_progress(self): """关闭ui进度条显示""" if not self.show_progress: return if not ABuEnv.g_is_ipython or self._total < 2: return if ABuEnv.g_main_pid == os.getpid(): # 如果是在主进程下显示那就直接来 if self.progress_box is not None: self.progress_box.close() else: if g_show_ui_progress and g_socket_fn is not None: # 子进程下通过socket通信将pid给到主进程,主进程通过pid查找对应的进度条对象后关闭对象,且弹出 ABuOsUtil.socket_send_msg(g_socket_fn, '{}|close'.format(os.getpid())) def __enter__(self): """ 以上下文管理器类方式实现__enter__,针对self._total分配self.display_step """ if self.show_progress: self.display_step = 1 if self._total >= 5000: self.display_step = 50 elif self._total >= 3000: self.display_step = 30 elif self._total >= 2000: self.display_step = 20 elif self._total > 1000: self.display_step = 10 elif self._total >= 600: self.display_step = 6 elif self._total >= 300: self.display_step = 3 elif self._total >= 100: self.display_step = 2 elif self._total >= 20: self.display_step = 2 self.epoch = 0 self.init_ui_progress() return self def show(self, epoch=None, clear=True): """ 进行进度控制显示主方法 :param epoch: 默认None, 即使用类内部计算的迭代次数进行进度显示 :param clear: 默认True, 子进程显示新的进度前,先do_clear_output所有输出 :return: """ if not self.show_progress: return self.epoch = epoch if epoch is not None else self.epoch + 1 if self.epoch % self.display_step == 0: ps = round(self.epoch / self._total * 100, 2) ps = 100 if ps > 100 else ps ps_text = "pid:{} {}:{}%".format(os.getpid(), self._label, ps) if not ABuEnv.g_is_ipython or self._total < 2: if clear: do_clear_output() # clear_std_output() print(ps_text) self.update_ui_progress(ps, ps_text) def __exit__(self, exc_type, exc_val, exc_tb): """ 以上下文管理器类方式实现__exit__,针对在子进城中的输出显示进度进行do_clear_output扫尾工作 """ if not self.show_progress: return clear = False if clear: # clear在mac上应该打开, 由于windows某些版本浏览器wait=True会有阻塞情况,如果wait=False, 有clear之后的风险, do_clear_output(wait=True) # wait 需要同步否则会延迟clear else: # print("pid:{} done!".format(os.getpid())) pass self.close_ui_progress()
class Parallel(object): """ Connect or launch ipcluster and wrap jobs running on Client engines so that engines can be interrupted or killed with a pleasant cleanup. """ def __init__(self, tool, rkwargs=None, ipyclient=None, show_cluster=True, quiet=False, auto=False): # if no kwargs then empty dict if rkwargs is None: rkwargs = {} self._quiet = quiet # the tool with a ._run() func and its run kwargs to be parallelized self.tool = tool self.rkwargs = rkwargs # parallel client connect or launch params self.ipyclient = ipyclient self.show_cluster = show_cluster self.auto = auto # setup the widget message to be passed on to _run self.message = HTML(layout={"height": "25px", "margin": "0px"}) self.widget = Box(children=[self.message], layout={"margin": "5px 0px 5px 0px"}) self.update_message("Establishing parallel connection: ...") if not self._quiet: # show the widget message display(self.widget) def update_message(self, inner): s1 = "<span style='font-size:14px; font-family:monospace'>" s2 = "</span>" self.message.value = s1 + inner + s2 def start_ipcluster(self): """ The name is a unique id that keeps this __init__ of ipyrad distinct from interfering with other ipcontrollers. Run statements are wrapped so that ipcluster SHOULD be killed on exit. """ # use random num for to cluster_id rand = getrandbits(32) self.tool.ipcluster["cluster_id"] = "ipp-{}".format(rand) # if engines=="MPI" then add --ip arg to view all sockets iparg = ("--ip=*" if "MPI" in self.tool.ipcluster["engines"] else "") # make ipcluster arg call standard = [ IPCLUSTERBIN, "start", "--daemonize", "--cluster-id={}".format(self.tool.ipcluster["cluster_id"]), "--engines={}".format(self.tool.ipcluster["engines"]), "--profile={}".format(self.tool.ipcluster["profile"]), "--n={}".format(self.tool.ipcluster["cores"]), "{}".format(iparg), ] # wrap ipcluster start try: subprocess.check_call(standard, stderr=subprocess.STDOUT, stdout=subprocess.PIPE) # if cluster with THIS ID is running then kill it and try again except subprocess.CalledProcessError: subprocess.check_call( [ IPCLUSTERBIN, "stop", "--cluster-id", self.tool.ipcluster["cluster_id"], ], stderr=subprocess.STDOUT, stdout=subprocess.PIPE, ) # after waiting try again to start it time.sleep(3) try: subprocess.check_call(standard, stderr=subprocess.STDOUT, stdout=subprocess.PIPE) # if fails again then report it except subprocess.CalledProcessError as inst: print(inst) raise except Exception as inst: sys.exit("Error launching ipcluster for parallelization:\n({})\n". format(inst)) def wait_for_connection(self): """ Creates a client to view ipcluster engines for a given profile and returns it with at least one engine spun up and ready to go. If no engines are found after nwait amount of time then an error is raised. If engines==MPI it waits a bit longer to find engines. If the number of engines is set then it waits even longer to try to find that number of engines. """ # save stds for later, hide here to prevent ipp enforced print() save_stdout = sys.stdout save_stderr = sys.stderr sys.stdout = StringIO() sys.stderr = StringIO() # wrapped search for ipcluster try: args = { "profile": self.tool.ipcluster["profile"], "timeout": self.tool.ipcluster["timeout"], "cluster_id": self.tool.ipcluster["cluster_id"], } ipyclient = ipp.Client(**args) # restore std printing now that Client print statement has passed # sys.stdout = save_stdout # sys.stderr = save_stderr # allow time to find the connection; count cores to break for _ in range(6000): # how many cores can we find right now? ncores = len(ipyclient) # self.update_message( # "Establishing parallel connection: {} cores" # .format(ncores)) time.sleep(0.01) # are all cores found yet? if so, break. if self.tool.ipcluster["cores"]: time.sleep(0.1) if ncores == self.tool.ipcluster["cores"]: break # If MPI and not all found, break if no more found in 3 secs elif self.tool.ipcluster["engines"] == "MPI": # are any cores found yet? do long wait. if ncores: time.sleep(1) if len(ipyclient) == ncores: break # if Local then if at least one core we're happy to move on. else: if ncores: time.sleep(0.5) break except KeyboardInterrupt as inst: raise inst except (IOError, OSError, ipp.TimeoutError, ipp.NoEnginesRegistered): raise SimcatError( "\nipcluster not found, use 'auto=True' or see docs.") finally: # no matter what we reset the stds sys.stdout = save_stdout sys.stderr = save_stderr self.update_message("Parallel connection: {}".format(len(ipyclient))) return ipyclient def get_cluster_info(self): """ reports host and engine info for an ipyclient """ # get engine data, skips busy engines. hosts = [] for eid in self.ipyclient.ids: engine = self.ipyclient[eid] if not engine.outstanding: hosts.append(engine.apply(socket.gethostname)) # report it hosts = [i.get() for i in hosts] hostdict = {} for hostname in set(hosts): hostdict[hostname] = hosts.count(hostname) hpairs = [ "<i>{}</i>: {} cores".format(key, val) for (key, val) in hostdict.items() ] self.update_message("Parallelization: {}".format(", ".join(hpairs))) def store_pids_for_shutdown(self): "reset tool ipcluster dict pids dict and set with current engine pids" self.tool.ipcluster["pids"] = {} for eid in self.ipyclient.ids: engine = self.ipyclient[eid] if not engine.outstanding: pid = engine.apply(os.getpid).get() self.tool.ipcluster["pids"][eid] = pid def wrap_run(self, dry_run=False): """ Takes an analysis tools object with an associated _ipcluster attribute dictionary and either launches an ipcluster instance or connects to a running one. The ipyclient arg overrides the auto arg. """ try: # check that ipyclient is connected (3 seconds tries) if self.ipyclient: for i in range(3): if len(self.ipyclient): break else: time.sleep(1) assert len(self.ipyclient), "ipcluster not connected/running." # set ncores to max if user did not set else: if not self.tool.ipcluster["cores"]: self.tool.ipcluster["cores"] = detect_cpus() # launch ipcluster and get the parallel client with ipp-{} id if self.auto: # start ipcluster and attach ipyrad-cli cluster-id self.start_ipcluster() self.ipyclient = self.wait_for_connection() # neither auto or ipyclient we'll still look for default # profile running ipcluster. else: self.ipyclient = self.wait_for_connection() # print cluster stats at this point self.widget.close() self.get_cluster_info() # before running any jobs store engine pids for hard shutdown self.store_pids_for_shutdown() # run the job if not dry_run: self.tool._run( **self.rkwargs, ipyclient=self.ipyclient, children=[self.message], ) # print the error and cleanup except KeyboardInterrupt: print("\nKeyboard Interrupt by user\n") except Exception as inst: print("\nEncountered an error:\n{}\n".format(inst)) raise # cancel/kill any unfinished jobs and shutdown hub if 'auto=True' finally: self.cleanup() def cleanup(self): "Cancel or kill unfinished jobs and shutdown hub if auto=True" try: # can't close client if it was never open if self.ipyclient: # Interrupt: send SIGINT (2) to all engines if any engines try: self.ipyclient.abort() time.sleep(1) for eid, pid in self.tool.ipcluster["pids"].items(): if self.ipyclient.queue_status()[eid]["tasks"]: # hard kill the process os.kill(pid, 2) time.sleep(3) except ipp.NoEnginesRegistered: pass # Cleanup: purge memory so we can reuse the Client if not self.ipyclient.outstanding: self.ipyclient.purge_everything() else: self.auto = True self.update_message( "Error: ipcluster shutdown and must be restarted") # print("\nerror: ipcluster shutdown and must be restarted") # Shutdown the hub if it was auto-launched or broken if self.auto: self.ipyclient.shutdown(hub=True, block=False) self.ipyclient.close() if self.show_cluster: self.update_message("Parallel connection closed.") time.sleep(0.5) # close the cluster info self.widget.close() except Exception as inst2: print("warning: error during shutdown:\n{}".format(inst2))
class AbuMulPidProgress(object): """多进程进度显示控制类""" def __init__(self, total, label, show_progress=True): """ 外部使用eg: with AbuMulPidProgress(len(self.choice_symbols), 'pick stocks complete') as progress: for epoch, target_symbol in enumerate(self.choice_symbols): progress.show(epoch + 1) :param total: 总任务数量 :param label: 进度显示label """ self._total = total self._label = label self.epoch = 0 self.display_step = 1 self.progress_widget = None self.text_widget = None self.progress_box = None self.show_progress = show_progress # 不管ui进度条有什么问题,也不能影响任务工作的进度执行,反正有文字进度会始终显示 @catch_error(log=False) def init_ui_progress(self): """初始化ui进度条""" if not self.show_progress: return if not ABuEnv.g_is_ipython or self._total < 2: return if ABuEnv.g_main_pid == os.getpid(): # 如果是在主进程下显示那就直接来 self.progress_widget = FloatProgress(value=0, min=0, max=100) self.text_widget = Text('pid={} begin work'.format(os.getpid())) self.progress_box = Box([self.text_widget, self.progress_widget]) display(self.progress_box) else: if g_show_ui_progress and g_socket_fn is not None: # 子进程下通过socket通信将pid给到主进程,主进程创建ui进度条 ABuOsUtil.socket_send_msg(g_socket_fn, '{}|init'.format(os.getpid())) # 不管ui进度条有什么问题,也不能影响任务工作的进度执行,反正有文字进度会始终显示 @catch_error(log=False) def update_ui_progress(self, ps, ps_text): """更新文字进度条""" if not self.show_progress: return if not ABuEnv.g_is_ipython or self._total < 2: return if ABuEnv.g_main_pid == os.getpid(): # 如果是在主进程下显示那就直接来 if self.progress_widget is not None: self.progress_widget.value = ps if self.text_widget is not None: self.text_widget.value = ps_text else: if g_show_ui_progress and g_socket_fn is not None: # 子进程下通过socket通信将pid给到主进程,主进程通过pid查找对应的进度条对象后更新进度 ABuOsUtil.socket_send_msg(g_socket_fn, '{}|{}|{}'.format(os.getpid(), ps, ps_text)) # 不管ui进度条有什么问题,也不能影响任务工作的进度执行,反正有文字进度会始终显示 @catch_error(log=False) def close_ui_progress(self): """关闭ui进度条显示""" if not self.show_progress: return if not ABuEnv.g_is_ipython or self._total < 2: return if ABuEnv.g_main_pid == os.getpid(): # 如果是在主进程下显示那就直接来 if self.progress_box is not None: self.progress_box.close() else: if g_show_ui_progress and g_socket_fn is not None: # 子进程下通过socket通信将pid给到主进程,主进程通过pid查找对应的进度条对象后关闭对象,且弹出 ABuOsUtil.socket_send_msg(g_socket_fn, '{}|close'.format(os.getpid())) def __enter__(self): """ 以上下文管理器类方式实现__enter__,针对self._total分配self.display_step """ if self.show_progress: self.display_step = 1 if self._total >= 5000: self.display_step = 50 elif self._total >= 3000: self.display_step = 30 elif self._total >= 2000: self.display_step = 20 elif self._total > 1000: self.display_step = 10 elif self._total >= 600: self.display_step = 6 elif self._total >= 300: self.display_step = 3 elif self._total >= 100: self.display_step = 2 elif self._total >= 20: self.display_step = 2 self.epoch = 0 self.init_ui_progress() return self def show(self, epoch=None, clear=True): """ 进行进度控制显示主方法 :param epoch: 默认None, 即使用类内部计算的迭代次数进行进度显示 :param clear: 默认True, 子进程显示新的进度前,先do_clear_output所有输出 :return: """ if not self.show_progress: return self.epoch = epoch if epoch is not None else self.epoch + 1 if self.epoch % self.display_step == 0: ps = round(self.epoch / self._total * 100, 2) ps = 100 if ps > 100 else ps ps_text = "pid:{} {}:{}%".format(os.getpid(), self._label, ps) if not ABuEnv.g_is_ipython or self._total < 2: if clear: do_clear_output() # clear_std_output() print(ps_text) self.update_ui_progress(ps, ps_text) def __exit__(self, exc_type, exc_val, exc_tb): """ 以上下文管理器类方式实现__exit__,针对在子进城中的输出显示进度进行do_clear_output扫尾工作 """ if not self.show_progress: return clear = False if clear: # clear在mac上应该打开, 由于windows某些版本浏览器wait=True会有阻塞情况,如果wait=False, 有clear之后的风险, do_clear_output(wait=True) # wait 需要同步否则会延迟clear else: # print("pid:{} done!".format(os.getpid())) pass self.close_ui_progress()