예제 #1
0
                async def func(work=work):
                    while True:
                        while (
                            isinstance(work.input_queue, tuple)
                            and all(not q for q in work.input_queue)
                        ) or not work.input_queue:
                            await trio.sleep(self.sleep_time)
                            if self._has_to_stop:
                                return
                        t_start = time.time()
                        log_memory_usage(
                            f"{time.time() - self.t_start:.2f} s. Launch work "
                            + work.name_no_space
                            + f" (?). mem usage"
                        )
                        work.func_or_cls(work.input_queue, work.output_queue)
                        if self._has_to_stop:
                            return
                        await trio.sleep(self.sleep_time)

                        logger.info(
                            f"work {work.name_no_space} "
                            f"done in {time.time() - t_start:.3f} s"
                        )

                        await trio.sleep(self.sleep_time)
예제 #2
0
 def signal_handler(sig, frame):
     del sig, frame  # unused
     logger.info("Ctrl+C signal received...")
     self._has_to_stop = True
     self.nursery.cancel_scope.cancel()
     # it seems that we don't need to raise the exception
     raise KeyboardInterrupt
예제 #3
0
    def fill_queue_paths(self, input_queue, output_queue):
        """Fill the first queue (paths)"""
        assert input_queue is None

        serie = self.serie
        if not serie:
            logger.warning("add 0 image. No image to process.")
            return

        names = serie.get_name_arrays()

        for name in names:
            path_im_output = self.path_dir_result / name
            path_im_input = str(self.path_dir_src / name)
            if self.how_saving == "complete":
                if not path_im_output.exists():
                    output_queue[name] = path_im_input
            else:
                output_queue[name] = path_im_input

        if not names:
            if self.how_saving == "complete":
                logger.warning(
                    'topology in mode "complete" and work already done.')
            else:
                logger.warning("Nothing to do")
            return

        nb_names = len(names)

        logger.info(f"Add {nb_names} images to compute.")
        logger.info(f"First files to process: {names[:4]}")

        logger.debug(f"All files: {names}")
예제 #4
0
 def _init_compute_log(self):
     log_memory_usage(time_as_str(2) + ": starting execution. mem usage")
     logger.info(f"  topology: {str_short(type(self.topology))}")
     logger.info(f"  executor: {str_short(type(self))}")
     logger.info(f"  nb_cpus_allowed = {nb_cores}")
     logger.info(f"  nb_max_workers = {self.nb_max_workers}")
     logger.info(f"  path_dir_result = {self.path_dir_result}")
예제 #5
0
    def exec_one_shot_works(self):
        """
        Execute all "one shot" functions.

        """
        for work in self.topology.works:
            if work.kind is not None and "one shot" in work.kind:
                pretty = str_short(work.func_or_cls.__func__)
                logger.info(f'Running "one_shot" job "{work.name}" ({pretty})')
                work.func_or_cls(work.input_queue, work.output_queue)
예제 #6
0
 def in_time_loop(self):
     t_tmp = time()
     for worker in workers:
         if (isinstance(worker, self.cls_to_be_updated)
                 and worker.fill_destination()):
             workers.remove(worker)
     t_tmp = time() - t_tmp
     if t_tmp > 0.2:
         logger.info("update list of workers with fill_destination "
                     "done in {:.3f} s".format(t_tmp))
     sleep(dt_update)
예제 #7
0
        def signal_handler(sig, frame):
            del sig, frame
            logger.info("Ctrl+C signal received...")

            for worker in self.workers:
                worker.terminate()

            self._has_to_stop = True
            self.nursery.cancel_scope.cancel()
            # we need to raise the exception
            raise KeyboardInterrupt
예제 #8
0
    async def async_run_work_cpu(self, work):
        """Executes the work on an item (key, obj), and add the result on
        work.output_queue.

        Parameters
        ----------

        work :

          A work from the topology

        key : hashable

          The key of the dictionnary item to be process

        obj : object

          The value of the dictionnary item to be process

        """
        self.nb_working_workers_cpu += 1

        try:
            key, obj = work.input_queue.pop_first_item()
        except KeyError:
            self.nb_working_workers_cpu -= 1
            return

        if work.check_exception(key, obj):
            self.nb_working_workers_cpu -= 1
            return

        t_start = time.time()
        log_memory_usage(f"{time.time() - self.t_start:.2f} s. Launch work " +
                         work.name_no_space + f" ({key}). mem usage")
        # pylint: disable=W0703
        try:
            # here we do something very bad from the async point of view:
            # we launch a potentially long blocking function:
            ret = work.func_or_cls(obj)
        except Exception as error:
            self.log_exception(error, work.name_no_space, key)
            if self.stop_if_error:
                raise
            ret = error
        else:
            logger.info(f"work {work.name_no_space} ({key}) "
                        f"done in {time.time() - t_start:.3f} s")

        if work.output_queue is not None:
            work.output_queue[key] = ret
        self.nb_working_workers_cpu -= 1
예제 #9
0
    async def start_async_works(self):
        """Create a trio nursery and start all async functions.

        """
        async with trio.open_nursery() as self.nursery:
            for af in reversed(self.async_funcs.values()):
                self.nursery.start_soon(af)

            self.nursery.start_soon(self.update_has_to_stop)

        logger.info("terminate the servers")
        for worker in self.workers:
            worker.terminate()
예제 #10
0
    async def async_run_work_cpu(self, work):
        """Is destined to be started with a "trio.start_soon".

        Executes the work on an item (key, obj), and add the result on
        work.output_queue.

        Parameters
        ----------

        work :

          A work from the topology

        """
        self.nb_working_workers_cpu += 1

        try:
            key, obj = work.input_queue.pop_first_item()
        except KeyError:
            self.nb_working_workers_cpu -= 1
            return

        if work.check_exception(key, obj):
            self.nb_working_workers_cpu -= 1
            return

        t_start = time.time()
        log_memory_usage(
            f"{time.time() - self.t_start:.2f} s. Launch work "
            + work.name_no_space
            + f" ({key}). mem usage"
        )
        # pylint: disable=W0703
        try:
            ret = await trio.run_sync_in_worker_thread(work.func_or_cls, obj)
        except Exception as error:
            self.log_exception(error, work.name_no_space, key)
            if self.stop_if_error:
                raise
            ret = error
        else:
            logger.info(
                f"work {work.name_no_space} ({key}) "
                f"done in {time.time() - t_start:.3f} s"
            )

        if work.output_queue is not None:
            work.output_queue[key] = ret
        self.nb_working_workers_cpu -= 1
예제 #11
0
    def wait_for_all_processes(self):
        """logging + wait for all processes to finish"""
        logger.info(
            f"logging files: {[log_path.name for log_path in self.log_paths]}")

        # wait until end of all processes

        self.topology.results = results_all = []
        for process in self.processes:
            results = process.connection.recv()

            if results is not None:
                results_all.extend(results)

        for process in self.processes:
            process.join()
예제 #12
0
        def run_process():

            # we do this complicate thing because there may be a strange bug

            def start_process_and_check(index_attempt):
                process = Process(
                    target=exec_work_and_comm,
                    args=(work.func_or_cls, obj, child_conn, event),
                )
                process.daemon = True
                process.start()
                # check whether the process has really started (possible bug!)
                if not event.wait(1):
                    log_debug(
                        f"problem: process {work.name_no_space} ({key}) "
                        f"has not really started... (attempt {index_attempt})"
                    )
                    process.terminate()
                    return False
                return process

            really_started = False
            for index_attempt in range(10):
                process = start_process_and_check(index_attempt)
                if process:
                    really_started = True
                    break

            if not really_started:
                raise Exception(
                    f"A process {work.name_no_space} ({key}) "
                    "has not started after 10 attempts"
                )

            # todo: use parent_conn.poll to implement a timeout

            # log_debug(f"waiting for result ({key})")
            result = parent_conn.recv()
            # log_debug(f"result ({key}) received")

            process.join(10 * self.sleep_time)
            if process.exitcode != 0:
                logger.info(f"process.exitcode: {process.exitcode}")
                process.terminate()

            return result
예제 #13
0
    def fill_queue_paths(self, input_queue, output_queues):

        assert input_queue is None
        queue_paths = output_queues[0]
        queue_couples_of_names = output_queues[1]

        serie = self.serie
        if len(serie) == 0:
            logger.warning("add 0 image. No image to process.")
            return

        names = serie.get_name_arrays()
        for name in names:
            path_im_output = self.path_dir_result / name
            path_im_input = str(self.path_dir_src / name)
            if self.how_saving == "complete":
                if not path_im_output.exists():
                    queue_paths[name] = path_im_input
            else:
                queue_paths[name] = path_im_input

        if len(names) == 0:
            if self.how_saving == "complete":
                logger.warning(
                    'topology in mode "complete" and work already done.')
            else:
                logger.warning("Nothing to do")
            return

        nb_names = len(names)
        logger.info(f"Add {nb_names} images to compute.")
        logger.info("First files to process: " + str(names[:4]))

        logger.debug("All files: " + str(names))

        series = self.series
        if not series:
            logger.warning("add 0 couple. No phase to correct.")
            return

        nb_series = len(series)
        logger.info(f"Add {nb_series} phase to correct.")

        for iserie, serie in enumerate(series):
            if iserie > 1:
                break
            logger.info("Files of serie {}: {}".format(
                iserie, serie.get_name_arrays()))
        # for the first corrected angle : corrected_angle = angle
        ind_serie, serie = next(series.items())
        name = serie.get_name_arrays()[0]
        queue_couples_of_names[ind_serie - 1] = (name, name)
        for ind_serie, serie in series.items():
            queue_couples_of_names[ind_serie] = serie.get_name_arrays()
예제 #14
0
파일: piv.py 프로젝트: hbcbh1999/fluidimage
    def fill_couples_of_names_and_paths(self, input_queue, output_queues):
        """Fill the two first queues"""
        assert input_queue is None
        queue_couples_of_names = output_queues[0]
        queue_paths = output_queues[1]

        series = self.series
        if not series:
            logger.warning("add 0 couple. No PIV to compute.")
            return
        if self.how_saving == "complete":
            index_series = []
            for ind_serie, serie in self.series.items():
                name_piv = get_name_piv(serie, prefix="piv")
                if not (self.path_dir_result / name_piv).exists():
                    index_series.append(ind_serie)

            if not index_series:
                logger.warning(
                    'topology in mode "complete" and work already done.')
                return

            series.set_index_series(index_series)

            if logger.isEnabledFor(DEBUG):
                logger.debug(
                    repr([serie.get_name_arrays() for serie in series]))

        nb_series = len(series)
        logger.info(f"Add {nb_series} PIV fields to compute.")

        for iserie, serie in enumerate(series):
            if iserie > 1:
                break
            logger.info("Files of serie {}: {}".format(
                iserie, serie.get_name_arrays()))

        for ind_serie, serie in series.items():
            queue_couples_of_names[ind_serie] = serie.get_name_arrays()
            for name, path in serie.get_name_path_arrays():
                queue_paths[name] = path
예제 #15
0
    def _run_works(self):

        while not all([len(queue) == 0 for queue in self.topology.queues]):

            for work in self.works:

                # global functions
                if work.kind is not None and "global" in work.kind:
                    if len(work.output_queue) > self.nb_items_queue_max:
                        continue

                    work.func_or_cls(work.input_queue, work.output_queue)

                else:
                    if not work.input_queue:
                        continue

                    key, obj = work.input_queue.pop_first_item()

                    if work.check_exception(key, obj):
                        continue

                    t_start = time.time()
                    log_memory_usage(
                        f"{time.time() - self.t_start:.2f} s. Launch work " +
                        work.name_no_space + f" ({key}). mem usage")
                    # pylint: disable=W0703
                    try:
                        ret = work.func_or_cls(obj)
                    except Exception as error:
                        self.log_exception(error, work.name_no_space, key)
                        if self.stop_if_error:
                            raise
                        ret = error
                    else:
                        logger.info(f"work {work.name_no_space} ({key}) "
                                    f"done in {time.time() - t_start:.3f} s")

                    if work.output_queue is not None:
                        work.output_queue[key] = ret
예제 #16
0
    def __init__(self,
                 queues,
                 path_output=None,
                 logging_level="info",
                 nb_max_workers=None):

        if path_output is not None:
            if not os.path.exists(path_output):
                os.makedirs(path_output)
            self.path_output = path_output
            log = os.path.join(
                path_output,
                "log_" + time_as_str() + "_" + str(os.getpid()) + ".txt",
            )
            self._log_file = open(log, "w")

            stdout = sys.stdout
            if isinstance(stdout, MultiFile):
                stdout = sys.__stdout__

            stderr = sys.stderr
            if isinstance(stderr, MultiFile):
                stderr = sys.__stderr__

            sys.stdout = MultiFile([stdout, self._log_file])
            sys.stderr = MultiFile([stderr, self._log_file])

        if logging_level is not None:
            reset_logger()
            config_logging(logging_level, file=sys.stdout)

        if nb_max_workers is None:
            nb_max_workers = _nb_max_workers

        self.nb_max_workers_io = max(int(nb_max_workers * 0.8), 2)
        self.nb_max_launch = max(int(self.nb_max_workers_io), 1)

        if nb_max_workers < 1:
            raise ValueError("nb_max_workers < 1")

        logger.info(f"  nb_cpus_allowed = {nb_cores}")
        logger.info(f"  nb_max_workers = {nb_max_workers}")
        logger.info(f"  nb_max_workers_io = {self.nb_max_workers_io}")

        self.queues = queues
        self.nb_max_workers = nb_max_workers
        self.nb_cores = nb_cores
        self.nb_items_lim = max(2 * nb_max_workers, 2)

        self._has_to_stop = False

        if sys.platform != "win32":

            def handler_signals(signal_number, stack):
                print("signal {} received: set _has_to_stop to True".format(
                    signal_number))
                self._has_to_stop = True

            signal.signal(12, handler_signals)
예제 #17
0
    def view(self, path, title=None, hide_crosshair=True):
        """
        ImageView, a high-level widget for displaying and analyzing 2D and 3D
        data. ImageView provides:

          1. A zoomable region (ViewBox) for displaying the image
          2. A combination histogram and gradient editor (HistogramLUTItem)
             for controlling the visual appearance of the image
          3. A timeline for selecting the currently displayed frame (for 3D
             data only).
          4. Tools for very basic analysis of image data (see ROI and Norm
             buttons)

        """
        imv = pg.ImageView()
        win = self._win(title)
        self._add_gfx_item(win, imv)

        if not isinstance(path, str):
            data = []
            for p in path:
                logger.info(f"Viewing {p}")
                data.append(imread(p).transpose())

            data = np.array(data)
            imv.setImage(data, xvals=np.linspace(0, len(path), data.shape[0]))
        elif Path(path).is_dir():
            raise ValueError("Expected files not directory.")
        else:
            logger.info(f"Viewing {path}")
            try:
                data = imread(path).transpose()
            except AttributeError:
                raise ValueError(f"Is {path} an image?")
            imv.setImage(data)

        vb = imv.imageItem.getViewBox()
        self._add_crosshair(win, imv, vb, hide_lines=hide_crosshair)
예제 #18
0
    def init_series(self) -> List[str]:
        """Initializes the SeriesOfArrays object `self.series` based on input
        parameters."""
        series = self.series
        if not series:
            logger.warning(
                "encountered empty series. No images to preprocess.")
            return

        if self.how_saving == "complete":
            index_subsets = []
            for ind_subset, subset in self.series.items():
                names_serie = subset.get_name_arrays()
                name_preproc = get_name_preproc(
                    subset,
                    names_serie,
                    ind_subset,
                    series.nb_series,
                    self.params.saving.format,
                )
                if not (self.path_dir_result / name_preproc).exists():
                    index_subsets.append(ind_subset)
            series.set_index_series(index_subsets)
            if logger.isEnabledFor(DEBUG):
                logger.debug(
                    repr([subset.get_name_arrays() for subset in series]))

        nb_subsets = len(series)
        if nb_subsets == 0:
            logger.warning(
                'topology in mode "complete" and work already done.')
            return
        elif nb_subsets == 1:
            plurial = ""
        else:
            plurial = "s"

        logger.info(f"Add {nb_subsets} image serie{plurial} to compute.")
예제 #19
0
    def compute(self, sequential=None, has_to_exit=True):
        """Compute (run all works to be done).

        Parameters
        ----------

        sequential : None

          If bool(sequential) is True, the computations are run in sequential
          (useful for debugging).

        has_to_exit : True

          If bool(has_to_exit) is True and if the computation has to stop
          because of a signal 12 (cluster), a signal 99 is sent at exit.

        """
        if hasattr(self, "path_output"):
            logger.info("path results:\n" + str(self.path_output))
            if hasattr(self, "params"):
                tmp_path_params = str(
                    self.path_output /
                    ("params_" + time_as_str() + f"_{os.getpid()}"))

                if not os.path.exists(tmp_path_params + ".xml"):
                    path_params = tmp_path_params + ".xml"
                else:
                    i = 1
                    while os.path.exists(tmp_path_params + "_" + str(i) +
                                         ".xml"):
                        i += 1
                    path_params = tmp_path_params + "_" + str(i) + ".xml"
                self.params._save_as_xml(path_params)

        self.t_start = time()

        log_memory_usage(time_as_str(2) + ": starting execution. mem usage")

        self.nb_workers_cpu = 0
        self.nb_workers_io = 0
        workers = []

        class CheckWorksThread(threading.Thread):
            cls_to_be_updated = threading.Thread

            def __init__(self):
                self.has_to_stop = False
                super().__init__()
                self.exitcode = None
                self.daemon = True

            def in_time_loop(self):
                t_tmp = time()
                for worker in workers:
                    if (isinstance(worker, self.cls_to_be_updated)
                            and worker.fill_destination()):
                        workers.remove(worker)
                t_tmp = time() - t_tmp
                if t_tmp > 0.2:
                    logger.info("update list of workers with fill_destination "
                                "done in {:.3f} s".format(t_tmp))
                sleep(dt_update)

            def run(self):
                try:
                    while not self.has_to_stop:
                        self.in_time_loop()
                except Exception as e:
                    print("Exception in UpdateThread")
                    self.exitcode = 1
                    self.exception = e

        class CheckWorksProcess(CheckWorksThread):
            cls_to_be_updated = Process

            def in_time_loop(self):
                # weird bug subprocessing py3
                for worker in workers:
                    if not worker.really_started:
                        # print('check if worker has really started.' +
                        #       worker.key)
                        try:
                            worker.really_started = (
                                worker.comm_started.get_nowait())
                        except queue.Empty:
                            pass
                        if (not worker.really_started
                                and time() - worker.t_start > 10):
                            # bug! The worker does not work. We kill it! :-)
                            logger.error(
                                cstring(
                                    "Mysterious bug multiprocessing: "
                                    "a launched worker has not started. "
                                    "We kill it! ({}, key: {}).".format(
                                        worker.work_name, worker.key),
                                    color="FAIL",
                                ))
                            # the case of this worker has been
                            worker.really_started = True
                            worker.terminate()

                super().in_time_loop()

        self.thread_check_works_t = CheckWorksThread()
        self.thread_check_works_t.start()

        self.thread_check_works_p = CheckWorksProcess()
        self.thread_check_works_p.start()

        while not self._has_to_stop and (any(
            [not q.is_empty() for q in self.queues]) or len(workers) > 0):
            # debug
            # if logger.level == 10 and \
            #    all([q.is_empty() for q in self.queues]) and len(workers) == 1:
            #     for worker in workers:
            #         try:
            #             is_alive = worker.is_alive()
            #         except AttributeError:
            #             is_alive = None

            #         logger.debug(
            #             str((worker, worker.key, worker.exitcode, is_alive)))

            #         if time() - worker.t_start > 60:
            #             from fluiddyn import ipydebug
            #             ipydebug()

            self.nb_workers = len(workers)

            # slow down this loop...
            sleep(dt_small)
            if self.nb_workers_cpu >= nb_max_workers:
                logger.debug(
                    cstring(
                        ("The workers are saturated: "
                         "{}, sleep {} s").format(self.nb_workers_cpu, dt),
                        color="WARNING",
                    ))
                sleep(dt)

            for q in self.queues:
                if not q.is_empty():
                    logger.debug(q)
                    logger.debug("check_and_act for work: " + repr(q.work))
                    try:
                        new_workers = q.check_and_act(sequential=sequential)
                    except OSError:
                        logger.error(
                            cstring(
                                "Memory full: to free some memory, no more "
                                "computing job will be launched while the last "
                                "(saving) waiting queue is not empty.",
                                color="FAIL",
                            ))
                        log_memory_usage(color="FAIL", mode="error")
                        self._clear_save_queue(workers, sequential)
                        logger.info(
                            cstring(
                                "The last waiting queue has been emptied.",
                                color="FAIL",
                            ))
                        log_memory_usage(color="FAIL", mode="info")
                        continue

                    if new_workers is not None:
                        for worker in new_workers:
                            workers.append(worker)
                    logger.debug("workers: " + repr(workers))

            if self.thread_check_works_t.exitcode:
                raise self.thread_check_works_t.exception

            if self.thread_check_works_p.exitcode:
                raise self.thread_check_works_p.exception

            if len(workers) != self.nb_workers:
                gc.collect()

        if self._has_to_stop:
            logger.info(
                cstring(
                    "Will exist because of signal 12.",
                    "Waiting for all workers to finish...",
                    color="FAIL",
                ))
            self._clear_save_queue(workers, sequential)

        self.thread_check_works_t.has_to_stop = True
        self.thread_check_works_p.has_to_stop = True
        self.thread_check_works_t.join()
        self.thread_check_works_p.join()

        self.print_at_exit(time() - self.t_start)
        log_memory_usage(time_as_str(2) + ": end of `compute`. mem usage")

        if self._has_to_stop and has_to_exit:
            logger.info(cstring("Exit with signal 99.", color="FAIL"))
            exit(99)

        self._reset_std_as_default()
예제 #20
0
    async def async_run_work_cpu(self, work):
        """Is destined to be started with a "trio.start_soon".

        Executes the work on an item (key, obj), and add the result on
        work.output_queue.

        Parameters
        ----------

        work :

          A work from the topology

        """
        self.nb_working_workers_cpu += 1

        try:
            key, obj = work.input_queue.pop_first_item()
        except KeyError:
            self.nb_working_workers_cpu -= 1

        if work.check_exception(key, obj):
            self.nb_working_workers_cpu -= 1
            return

        t_start = time.time()
        log_memory_usage(
            f"{time.time() - self.t_start:.2f} s. Launch work "
            + work.name_no_space
            + f" ({key}). mem usage"
        )

        def exec_work_and_comm(func, obj, child_conn, event):
            # log_debug(f"process ({key}) started")
            event.set()
            # pylint: disable=W0703
            try:
                result = func(obj)
            except Exception as error:
                result = error

            # log_debug(f"in process, send result ({key}): {result}")
            child_conn.send(result)

        parent_conn, child_conn = Pipe()
        event = Event()

        def run_process():

            # we do this complicate thing because there may be a strange bug

            def start_process_and_check(index_attempt):
                process = Process(
                    target=exec_work_and_comm,
                    args=(work.func_or_cls, obj, child_conn, event),
                )
                process.daemon = True
                process.start()
                # check whether the process has really started (possible bug!)
                if not event.wait(1):
                    log_debug(
                        f"problem: process {work.name_no_space} ({key}) "
                        f"has not really started... (attempt {index_attempt})"
                    )
                    process.terminate()
                    return False
                return process

            really_started = False
            for index_attempt in range(10):
                process = start_process_and_check(index_attempt)
                if process:
                    really_started = True
                    break

            if not really_started:
                raise Exception(
                    f"A process {work.name_no_space} ({key}) "
                    "has not started after 10 attempts"
                )

            # todo: use parent_conn.poll to implement a timeout

            # log_debug(f"waiting for result ({key})")
            result = parent_conn.recv()
            # log_debug(f"result ({key}) received")

            process.join(10 * self.sleep_time)
            if process.exitcode != 0:
                logger.info(f"process.exitcode: {process.exitcode}")
                process.terminate()

            return result

        ret = await trio.run_sync_in_worker_thread(run_process)

        if isinstance(ret, Exception):
            self.log_exception(ret, work.name_no_space, key)
            if self.stop_if_error:
                raise ret
        else:
            logger.info(
                f"work {work.name_no_space} ({key}) "
                f"done in {time.time() - t_start:.3f} s"
            )

        if work.output_queue is not None:
            work.output_queue[key] = ret
        self.nb_working_workers_cpu -= 1