コード例 #1
0
class Restarter(FileSystemEventHandler):
    __slots__ = ("_observer", "_changed")

    PATHS = Token("RESTARTER_PATHS")

    def __init__(self, paths: PATHS = None):
        self._changed = False
        self._observer = PollingObserver()

        if paths:
            for p in paths:  # type: ignore
                print("watching for changes %r" % p)
                self._observer.schedule(self, str(p), recursive=True)
            self._observer.start()

    def restart_required(self) -> bool:
        changed = self._changed
        self._changed = False
        return changed

    def on_any_event(self, event: FileSystemEvent):
        self._changed = self._changed or (not event.is_directory and event.src_path.endswith(".py"))

    def stop(self):
        self._observer.unschedule_all()
コード例 #2
0
class AutoOcrScheduler(object):

    SINGLE_FOLDER = 'single_folder'
    MIRROR_TREE = 'mirror_tree'

    OUTPUT_MODES = [SINGLE_FOLDER, MIRROR_TREE]

    def __init__(
            self,
            config_dir,
            input_dir,
            output_dir,
            output_mode,
            success_action=OcrTask.ON_SUCCESS_DO_NOTHING,
            archive_dir=None,
            notify_url='',
            process_existing_files=False,
            run_scheduler=True,
            polling_observer=False,
        ):
        self.logger = logger.getChild('scheduler')

        self.config_dir = local.path(config_dir)
        self.input_dir = local.path(input_dir)
        self.output_dir = local.path(output_dir)
        if self.input_dir == self.output_dir:
            raise AutoOcrSchedulerError('Invalid configuration. Input and output directories must not be the same to avoid recursive OCR invocation!')
        self.output_mode = output_mode.lower()
        if self.output_mode not in AutoOcrScheduler.OUTPUT_MODES:
            raise AutoOcrSchedulerError('Invalid output mode: {}. Must be one of: {}'.format(self.output_mode, ', '.join(AutoOcrScheduler.OUTPUT_MODES)))
        self.success_action = success_action.lower()
        if self.success_action not in OcrTask.SUCCESS_ACTIONS:
            raise AutoOcrSchedulerError('Invalid success action: {}. Must be one of {}'.format(self.success_action, ', '.join(OcrTask.SUCCESS_ACTIONS)))
        self.archive_dir = local.path(archive_dir) if archive_dir else None
        if self.success_action == OcrTask.ON_SUCCESS_ARCHIVE and not self.archive_dir:
            raise AutoOcrSchedulerError('Archive directory required for success action {}'.format(self.success_action))

        self.notify_url = notify_url
        self.current_tasks = {}
        self.walk_existing_task = None
        self.current_outputs = set()

        # Create a Threadpool to run OCR tasks on
        self.threadpool = ThreadPoolExecutor(max_workers=3)

        # Wire up an AutoOcrWatchdogHandler
        watchdog_handler = AutoOcrWatchdogHandler(self.on_file_touched, self.on_file_deleted)

        # Schedule watchdog to observe the input directory
        if run_scheduler:
            self.observer = PollingObserver() if polling_observer else Observer()
            self.observer.schedule(watchdog_handler, self.input_dir, recursive=True)
            self.observer.start()
            self.logger.warning('Watching %s', self.input_dir)
        else:
            self.observer = None
            self.logger.warning('Not watching %s', self.input_dir)

        # Process existing files in input directory, if requested
        if process_existing_files:
            self.walk_existing_task = self.threadpool.submit(self.walk_existing_files)

    def shutdown(self):
        # Shut down the feed of incoming watchdog events
        if self.observer:
            self.logger.debug('Shutting down filesystem watchdog...')
            self.observer.unschedule_all()
            self.observer.stop()

        # Cancel all outstanding cancelable tasks
        if self.walk_existing_task:
            self.logger.debug('Canceling walk existing files task...')
            self.walk_existing_task.cancel()
        self.logger.debug('Canceling all %d in-flight tasks...', len(self.current_tasks))
        tasks = [task for _, task in self.current_tasks.items()]
        for task in tasks:
            task.cancel()

        # Wait for the threadpool to clean up
        if self.threadpool:
            self.logger.debug('Shutting down threadpool...')
            self.threadpool.shutdown()
            self.threadpool = None

        # Wait for the watchdog to clean up
        if self.observer:
            self.logger.debug('Cleaning up filesystem watchdog...')
            self.observer.join()
            self.observer = None

    def __enter__(self):
        return self

    def __exit__(self, *args):
        self.shutdown()
        return False

    def _map_output_path(self, input_path):
        if self.output_mode == AutoOcrScheduler.MIRROR_TREE:
            return self.output_dir / (input_path - self.input_dir)
        else:
            assert self.output_mode == AutoOcrScheduler.SINGLE_FOLDER
            output_path = self.output_dir / (input_path.name)
            unique = 1
            if output_path.exists() or output_path in self.current_outputs:
                suffix = '.{}.{}{}'.format(datetime.now().strftime('%Y%m%d'), unique, output_path.suffix)
                output_path = output_path.with_suffix(suffix)

            while output_path.exists() or output_path in self.current_outputs:
                unique = unique + 1
                output_path = output_path.with_suffix('.{}{}'.format(unique, output_path.suffix), depth=2)
            return output_path

    def _map_archive_path(self, input_path):
        return self.archive_dir / (input_path - self.input_dir)

    def _get_config_path(self, input_path):
        assert (input_path - self.input_dir)[0] != '..'
        config_path = input_path.parent / 'ocr.config'
        while True:
            if config_path.exists():
                return config_path
            if config_path.parent == self.input_dir:
                break
            config_path = config_path.parent.parent / 'ocr.config'

        config_path = self.config_dir / 'ocr.config'
        if config_path.exists():
            return config_path
        return None

    def queue_path(self, path):
        output_path = self._map_output_path(path)
        config_file = self._get_config_path(path)
        archive_file = self._map_archive_path(path)
        task = OcrTask(path,
                       output_path,
                       self.threadpool.submit,
                       self.on_task_done,
                       config_file=config_file,
                       success_action=self.success_action,
                       archive_path=archive_file,
                       notify_url=self.notify_url)
        self.current_tasks[path] = task
        self.current_outputs.add(output_path)

    def walk_existing_files(self):
        self.logger.debug('Enumerating existing input files...')
        def keep_file(file):
            return any([fnmatch.fnmatch(file, pattern) for pattern in AutoOcrWatchdogHandler.MATCH_PATTERNS])
        for file in self.input_dir.walk(filter=keep_file):
            self.on_file_touched(file)
        self.walk_existing_task = None

    def on_file_touched(self, path):
        if path in self.current_tasks:
            self.current_tasks[path].touch()
        else:
            self.queue_path(path)

    def on_file_deleted(self, path):
        if path in self.current_tasks:
            self.current_tasks[path].cancel()

    def on_task_done(self, task):
        self.current_outputs.remove(task.output_path)
        del self.current_tasks[task.input_path]

    def wait_for_idle(self):
        if self.walk_existing_task:
            self.logger.debug('Waiting for walk existing files to complete...')
            concurrent.futures.wait([self.walk_existing_task])
        while self.current_tasks:
            self.logger.debug('Waiting for %d tasks to complete...', len(self.current_tasks))
            concurrent.futures.wait([task.future for _, task in self.current_tasks.items()])
コード例 #3
0
ファイル: minFQ.py プロジェクト: LooseLab/minotourcli
def start_minknow_and_basecalled_monitoring(sequencing_statistics, args,
                                            header, minotour_api, stdscr,
                                            screen, log_win):
    """
    Start the minKnow monitoring and basecalled data monitoring in accordance with arguments passed by user
    Parameters
    ----------
    sequencing_statistics: minFQ.utils.SequencingStatistics
        Tracker class for files being monitored, and the metrics about upload
    args: argparse.NameSpace
        The command line arguments that were passed to the script
    log: logging.Logger
        The logger for this script
    header: dict
        The dictionary with headers for the requests, including authentiction
    minotour_api: minFQ.minotourapi.MinotourAPI
        The minotourAPI class
    stdscr: _curses.window
        Curses window for printing out
    screen: _curses.window
        The main curses screen
    log_win: _curses.window
        The logging window we write to
    Returns
    -------

    """
    sequencing_statistics.read_count = 0
    runs_being_monitored_dict = {}
    if not args.no_fastq:
        # This block handles the fastq
        # Add our watchdir to our WATCHLIST
        if args.watch_dir is not None:
            sequencing_statistics.to_watch_directory_list.append(
                args.watch_dir)
        # if we are connecting to minKNOW
    if not args.no_minknow:
        # this block is going to handle the running of minknow monitoring by the client.
        stdscr.addstr("Connecting to minknow instance at {}".format(args.ip))
        refresh_pad(screen, stdscr)
        minknow_connection = MinionManager(
            args=args,
            header=header,
            sequencing_statistics=sequencing_statistics)
    curses.napms(2000)
    stdscr.clear()
    event_handler = FastqHandler(args, header, runs_being_monitored_dict,
                                 sequencing_statistics, minotour_api)
    observer = Observer()
    observer.start()
    stats = True
    try:
        while True:
            try:
                c = stdscr.getch()
                if c == ord("l"):
                    stats = False
                elif c == ord("s"):
                    stats = True
            except curses.ERR:
                stats = True
            # todo these should be abstracted into one function as they are verrrry similar
            if stats:
                stdscr.addstr(
                    0, 0,
                    "To stop minFQ use CTRL-C. To see the logs, Press l. To return to stats, Press s.",
                    curses.color_pair(4))
                stdscr.addstr(1, 0, "Fetching Data...")
                write_out_minfq_info(stdscr, sequencing_statistics)
                ascii_minotour(stdscr)
                write_out_minknow_info(stdscr, sequencing_statistics)
                write_out_fastq_info(stdscr, sequencing_statistics)
                refresh_pad(screen, stdscr)
                stdscr.overwrite(screen)
            else:
                log_win.overwrite(screen)
                # log_win.addstr(0, 0, "To stop minFQ use CTRL-C. To see the logs, Press l. To see info, Press s.", curses.color_pair(4))
                refresh_pad(screen, log_win)
            screen.refresh()
            if not args.no_fastq and sequencing_statistics.to_watch_directory_list:
                for folder in sequencing_statistics.to_watch_directory_list:  # directory watchlist has the new run dir
                    log.warning(
                        "Checking folder {} that is in our to watch directory".
                        format(folder))
                    if folder and folder not in sequencing_statistics.watched_directory_set:
                        # check that the folder exists, before adding it to be scheduled
                        if os.path.exists(folder):
                            # We have a new folder that hasn't been added.
                            # We need to add this to our list to schedule and catalogue the files.
                            # TODO bug here where we never remove directories from the already watching set - eventually this would lead to a massive set of strings in this set if minFQ is never quit
                            sequencing_statistics.watched_directory_set.add(
                                folder)
                            event_handler.addfolder(folder)
                            log.info(
                                "FastQ Monitoring added for {}".format(folder))
                            sequencing_statistics.to_watch_directory_list.remove(
                                folder)
                            sequencing_statistics.update = True
                        else:
                            log.warning(
                                "Waiting for minKNOW to create folder {} before updating watchdog."
                                .format(folder))
                log.warning("Updating observers is {}".format(
                    sequencing_statistics.update))
                if sequencing_statistics.update:
                    observer.unschedule_all()
                    for folder in sequencing_statistics.watched_directory_set:
                        if folder and os.path.exists(folder):
                            log.info(
                                "Scheduling observer for {}, which does exist".
                                format(folder))
                            observer.schedule(event_handler,
                                              path=folder,
                                              recursive=True)
                        else:
                            log.warning(
                                "Tried to add {}, but folder does not exist".
                                format(folder))
                    sequencing_statistics.update = False
                # check if we need to remove any fastq info
            sequencing_statistics.check_fastq_info()
            time.sleep(1)

            if sequencing_statistics.errored:
                log.error(
                    "Errored - Will take a few seconds to clean clear_lines!")
                log.error(sequencing_statistics.error_message)
                if not args.no_minknow:
                    minknow_connection.stop_monitoring()
                if not args.no_fastq:
                    observer.stop()
                    observer.join()
                observer.stop()
                observer.join()
                event_handler.stopt()
                curses.nocbreak()
                stdscr.keypad(False)
                curses.echo()
                curses.endwin()
                print(repr(sequencing_statistics.error_message))
                print(
                    "Exiting after error - Will take a few seconds to close threads."
                )
                sys.exit(0)

    except (KeyboardInterrupt, Exception) as e:
        stdscr.addstr("Exiting - Will take a few seconds to close threads.")
        if not args.no_minknow:
            minknow_connection.stop_monitoring()
        observer.stop()
        observer.join()
        event_handler.stopt()
        curses.nocbreak()
        stdscr.keypad(False)
        curses.echo()
        curses.endwin()
        print(repr(e))
        print("Exiting - Will take a few seconds to close threads.")
        sys.exit(0)