def _init_compute_log(self): log_memory_usage(time_as_str(2) + ": starting execution. mem usage") logger.info(f" topology: {str_short(type(self.topology))}") logger.info(f" executor: {str_short(type(self))}") logger.info(f" nb_cpus_allowed = {nb_cores}") logger.info(f" nb_max_workers = {self.nb_max_workers}") logger.info(f" path_dir_result = {self.path_dir_result}")
def __init__(self, params, work, async_proc_class, logging_level="info"): self.params = params self.async_process_class = async_proc_class self.images_path = os.path.join(params.series.path) images_dir_name = self.params.series.path.split("/")[-1] self.saving_path = os.path.join( os.path.dirname(params.series.path), str(images_dir_name) + "." + params.saving.postfix, ) self.series = [] self.processes = [] self.async_process = [] self.work = work # Logger log = os.path.join( self.saving_path, "log_" + time_as_str() + "_" + str(os.getpid()) + ".txt", ) if not os.path.exists(self.saving_path): os.makedirs(self.saving_path) log_file = open(log, "w") sys.stdout = MultiFile([sys.stdout, log_file]) config_logging("info", file=sys.stdout) # Managing dir paths assert os.listdir(self.images_path) if not os.path.exists(self.saving_path): os.makedirs(self.saving_path)
def __init__(self, queues, path_output=None, logging_level="info", nb_max_workers=None): if path_output is not None: if not os.path.exists(path_output): os.makedirs(path_output) self.path_output = path_output log = os.path.join( path_output, "log_" + time_as_str() + "_" + str(os.getpid()) + ".txt", ) self._log_file = open(log, "w") stdout = sys.stdout if isinstance(stdout, MultiFile): stdout = sys.__stdout__ stderr = sys.stderr if isinstance(stderr, MultiFile): stderr = sys.__stderr__ sys.stdout = MultiFile([stdout, self._log_file]) sys.stderr = MultiFile([stderr, self._log_file]) if logging_level is not None: reset_logger() config_logging(logging_level, file=sys.stdout) if nb_max_workers is None: nb_max_workers = _nb_max_workers self.nb_max_workers_io = max(int(nb_max_workers * 0.8), 2) self.nb_max_launch = max(int(self.nb_max_workers_io), 1) if nb_max_workers < 1: raise ValueError("nb_max_workers < 1") logger.info(f" nb_cpus_allowed = {nb_cores}") logger.info(f" nb_max_workers = {nb_max_workers}") logger.info(f" nb_max_workers_io = {self.nb_max_workers_io}") self.queues = queues self.nb_max_workers = nb_max_workers self.nb_cores = nb_cores self.nb_items_lim = max(2 * nb_max_workers, 2) self._has_to_stop = False if sys.platform != "win32": def handler_signals(signal_number, stack): print("signal {} received: set _has_to_stop to True".format( signal_number)) self._has_to_stop = True signal.signal(12, handler_signals)
def main(): # Define path sub_path_image = "Images2" path_save = "../../image_samples/Karman/{}.results.async/".format( sub_path_image) # Logger log = os.path.join( path_save, "log_" + time_as_str() + "_" + str(os.getpid()) + ".txt") log_file = open(log, "w") sys.stdout = MultiFile([sys.stdout, log_file]) config_logging("info", file=sys.stdout) # Managing dir paths path = "../../image_samples/Karman/{}/".format(sub_path_image) assert os.listdir(path) if not os.path.exists(path_save): os.makedirs(path_save) def partition(lst, n): """ Partition evently lst into n sublists and add the last images of each sublist to the head of the next sublist ( in order to compute all piv ) :param lst: a list :param n: number of sublist wanted :return: A sliced list """ L = len(lst) assert 0 < n <= L s, r = divmod(L, n) t = s + 1 lst = [lst[p:p + t] for p in range(0, r * t, t) ] + [lst[p:p + s] for p in range(r * t, L, s)] # in order to compute all piv # add the last images of each sublist to the head of the next sublist for i in range(1, n): lst[i].insert(0, lst[i - 1][-1]) return lst nb_process = multiprocessing.cpu_count() # spliting images list listdir = os.listdir(path) if len(listdir) <= nb_process: # if there is less piv to compute than cpu nb_process = len(listdir) - 1 # adapt process number print("nb process :{}".format(nb_process)) listdir.sort() listdir = partition(listdir, nb_process) # making and starting processes processes = [] for i in range(nb_process): async_piv = AsyncPiv(path, path_save) p = multiprocessing.Process(target=async_piv.a_process, args=(listdir[i], )) p.start() for p in processes: p.join()
def __init__( self, path_dir=None, path_output=None, logging_level="info", nb_max_workers=None, ): super().__init__(logging_level=logging_level, nb_max_workers=nb_max_workers) if path_dir is None: self.path_dir = "../../../image_samples/Karman/Images2" else: self.path_dir = path_dir if path_output is not None: if not os.path.exists(path_output): os.makedirs(path_output) self.path_output = path_output log = os.path.join( path_output, "log_" + time_as_str() + "_" + str(os.getpid()) + ".txt") stdout = sys.stdout if isinstance(stdout, MultiFile): stdout = _stdout_at_import stderr = sys.stderr if isinstance(stderr, MultiFile): stderr = _stderr_at_import self._log_file = open(log, "w") sys.stdout = MultiFile([stdout, self._log_file]) sys.stderr = MultiFile([stderr, self._log_file]) if logging_level is not None: for handler in logger.handlers: logger.removeHandler(handler) config_logging(logging_level, file=sys.stdout) if hasattr(self, "path_output"): logger.info("path results:\n" + self.path_output) self.img_counter = 0 queue_names_img1 = self.add_queue("names img 1") queue_names_img2 = self.add_queue("names img 2") queue_array_couple = self.add_queue("array couples") queue_cpu1 = self.add_queue("queue_cpu1") queue_cpu2 = self.add_queue("queue_cpu2") self.add_work( "fill names", func_or_cls=self.fill_names, output_queue=(queue_names_img1, queue_names_img2), kind=("global", "one shot"), ) self.add_work( "make couple", func_or_cls=self.make_couple, input_queue=(queue_names_img1, queue_names_img2), output_queue=queue_array_couple, kind=("global", "io"), ) self.add_work( "cpu1", func_or_cls=self.cpu1, input_queue=queue_array_couple, output_queue=queue_cpu1, kind="server", ) self.add_work( "cpu2", func_or_cls=self.cpu2, params_cls=None, input_queue=queue_cpu1, output_queue=queue_cpu2, kind="server", ) self.add_work("save", func_or_cls=self.save, params_cls=None, input_queue=queue_cpu2)
def _init_log_path(self): name = "_".join(("log", time_as_str(), str(os.getpid()))) self.path_dir_exceptions = self.path_dir_result / name self._log_path = self.path_dir_result / (name + ".txt")
def _finalize_compute(self): log_memory_usage(time_as_str(2) + ": end of `compute`. mem usage") self.topology.print_at_exit(time() - self.t_start) self._reset_std_as_default()
def _init_log_path(self): name = "_".join(("log", time_as_str(), str(os.getpid()))) path_dir_log = self.path_dir_exceptions = self.path_dir_result / name path_dir_log.mkdir(exist_ok=True) self._log_path = path_dir_log / (name + ".txt")
def compute(self, sequential=None, has_to_exit=True): """Compute (run all works to be done). Parameters ---------- sequential : None If bool(sequential) is True, the computations are run in sequential (useful for debugging). has_to_exit : True If bool(has_to_exit) is True and if the computation has to stop because of a signal 12 (cluster), a signal 99 is sent at exit. """ if hasattr(self, "path_output"): logger.info("path results:\n" + str(self.path_output)) if hasattr(self, "params"): tmp_path_params = str( self.path_output / ("params_" + time_as_str() + f"_{os.getpid()}")) if not os.path.exists(tmp_path_params + ".xml"): path_params = tmp_path_params + ".xml" else: i = 1 while os.path.exists(tmp_path_params + "_" + str(i) + ".xml"): i += 1 path_params = tmp_path_params + "_" + str(i) + ".xml" self.params._save_as_xml(path_params) self.t_start = time() log_memory_usage(time_as_str(2) + ": starting execution. mem usage") self.nb_workers_cpu = 0 self.nb_workers_io = 0 workers = [] class CheckWorksThread(threading.Thread): cls_to_be_updated = threading.Thread def __init__(self): self.has_to_stop = False super().__init__() self.exitcode = None self.daemon = True def in_time_loop(self): t_tmp = time() for worker in workers: if (isinstance(worker, self.cls_to_be_updated) and worker.fill_destination()): workers.remove(worker) t_tmp = time() - t_tmp if t_tmp > 0.2: logger.info("update list of workers with fill_destination " "done in {:.3f} s".format(t_tmp)) sleep(dt_update) def run(self): try: while not self.has_to_stop: self.in_time_loop() except Exception as e: print("Exception in UpdateThread") self.exitcode = 1 self.exception = e class CheckWorksProcess(CheckWorksThread): cls_to_be_updated = Process def in_time_loop(self): # weird bug subprocessing py3 for worker in workers: if not worker.really_started: # print('check if worker has really started.' + # worker.key) try: worker.really_started = ( worker.comm_started.get_nowait()) except queue.Empty: pass if (not worker.really_started and time() - worker.t_start > 10): # bug! The worker does not work. We kill it! :-) logger.error( cstring( "Mysterious bug multiprocessing: " "a launched worker has not started. " "We kill it! ({}, key: {}).".format( worker.work_name, worker.key), color="FAIL", )) # the case of this worker has been worker.really_started = True worker.terminate() super().in_time_loop() self.thread_check_works_t = CheckWorksThread() self.thread_check_works_t.start() self.thread_check_works_p = CheckWorksProcess() self.thread_check_works_p.start() while not self._has_to_stop and (any( [not q.is_empty() for q in self.queues]) or len(workers) > 0): # debug # if logger.level == 10 and \ # all([q.is_empty() for q in self.queues]) and len(workers) == 1: # for worker in workers: # try: # is_alive = worker.is_alive() # except AttributeError: # is_alive = None # logger.debug( # str((worker, worker.key, worker.exitcode, is_alive))) # if time() - worker.t_start > 60: # from fluiddyn import ipydebug # ipydebug() self.nb_workers = len(workers) # slow down this loop... sleep(dt_small) if self.nb_workers_cpu >= nb_max_workers: logger.debug( cstring( ("The workers are saturated: " "{}, sleep {} s").format(self.nb_workers_cpu, dt), color="WARNING", )) sleep(dt) for q in self.queues: if not q.is_empty(): logger.debug(q) logger.debug("check_and_act for work: " + repr(q.work)) try: new_workers = q.check_and_act(sequential=sequential) except OSError: logger.error( cstring( "Memory full: to free some memory, no more " "computing job will be launched while the last " "(saving) waiting queue is not empty.", color="FAIL", )) log_memory_usage(color="FAIL", mode="error") self._clear_save_queue(workers, sequential) logger.info( cstring( "The last waiting queue has been emptied.", color="FAIL", )) log_memory_usage(color="FAIL", mode="info") continue if new_workers is not None: for worker in new_workers: workers.append(worker) logger.debug("workers: " + repr(workers)) if self.thread_check_works_t.exitcode: raise self.thread_check_works_t.exception if self.thread_check_works_p.exitcode: raise self.thread_check_works_p.exception if len(workers) != self.nb_workers: gc.collect() if self._has_to_stop: logger.info( cstring( "Will exist because of signal 12.", "Waiting for all workers to finish...", color="FAIL", )) self._clear_save_queue(workers, sequential) self.thread_check_works_t.has_to_stop = True self.thread_check_works_p.has_to_stop = True self.thread_check_works_t.join() self.thread_check_works_p.join() self.print_at_exit(time() - self.t_start) log_memory_usage(time_as_str(2) + ": end of `compute`. mem usage") if self._has_to_stop and has_to_exit: logger.info(cstring("Exit with signal 99.", color="FAIL")) exit(99) self._reset_std_as_default()