def start(self):
        # Create watcher for filterbank data -> filterbank queue
        self.watcher_fil = mwax_watcher.Watcher(
            path=self.watch_dir_fil,
            q=self.queue_fil,
            pattern=".fil",
            log=self.logger,
            mode=self.mwax_mover_mode,
            recursive=False,
        )

        # Create queueworker for filterbank queue
        self.queue_worker_fil = mwax_queue_worker.QueueWorker(
            label="Filterbank Archive",
            q=self.queue_fil,
            executable_path=None,
            event_handler=self.filterbank_handler,
            log=self.logger,
            exit_once_queue_empty=False,
        )

        # Setup thread for watching filesystem
        watcher_fil_thread = threading.Thread(
            name="watch_fil", target=self.watcher_fil.start, daemon=True
        )
        self.watcher_threads.append(watcher_fil_thread)
        watcher_fil_thread.start()

        # Setup thread for processing items
        queue_worker_fil_thread = threading.Thread(
            name="work_fil", target=self.queue_worker_fil.start, daemon=True
        )
        self.worker_threads.append(queue_worker_fil_thread)
        queue_worker_fil_thread.start()
Beispiel #2
0
    def start(self):
        # Create watcher for the subfiles
        self.subfile_watcher = mwax_watcher.Watcher(
            path=self.subfile_incoming_path,
            q=self.subfile_queue,
            pattern=f"{self.ext_to_watch_for}",
            log=self.logger,
            mode=self.mwax_mover_mode,
            recursive=False,
        )

        # Create queueworker
        self.subfile_queue_worker = mwax_queue_worker.QueueWorker(
            label="Subfile Input Queue",
            q=self.subfile_queue,
            executable_path=None,
            event_handler=self.handler,
            log=self.logger,
            requeue_to_eoq_on_failure=False,
            exit_once_queue_empty=False,
        )

        # Setup thread for watching filesystem
        watcher_thread = threading.Thread(
            name="watch_sub", target=self.subfile_watcher.start, daemon=True
        )
        self.watcher_threads.append(watcher_thread)
        watcher_thread.start()

        # Setup thread for processing items
        queue_worker_thread = threading.Thread(
            name="work_sub",
            target=self.subfile_queue_worker.start,
            daemon=True,
        )
        self.worker_threads.append(queue_worker_thread)
        queue_worker_thread.start()
    def start(self):
        # create a health thread
        self.logger.info("Starting health_thread...")
        health_thread = threading.Thread(name="health_thread",
                                         target=self.health_handler,
                                         daemon=True)
        health_thread.start()

        self.logger.info("Creating watchers...")
        for watch_dir in self.watch_dirs:
            #
            # Remove any partial files first if they are old
            #
            partial_files = glob(os.path.join(watch_dir, "*.part*"))
            for partial_file in partial_files:
                # Ensure now minus the last mod time of the partial file
                # is > 60 mins, it is definitely safe to delete
                # In theory we could be starting up as mwax is sending
                # us a new file and we don't want to delete an real
                # in progress file.
                MIN_PARTIAL_PURGE_AGE_SECS = 3600

                if (time.time() - os.path.getmtime(partial_file) >
                        MIN_PARTIAL_PURGE_AGE_SECS):
                    self.logger.warning(
                        f"Partial file {partial_file} is older than"
                        f" {MIN_PARTIAL_PURGE_AGE_SECS} seconds and will be"
                        " removed...")
                    os.remove(partial_file)
                    self.logger.warning(f"Partial file {partial_file} deleted")
                else:
                    self.logger.warning(
                        f"Partial file {partial_file} is newer than"
                        f" {MIN_PARTIAL_PURGE_AGE_SECS} seconds so will NOT be"
                        " removed this time")

            # Create watcher for each data path queue
            new_watcher = mwax_watcher.Watcher(
                path=watch_dir,
                q=self.queue,
                pattern=".*",
                log=self.logger,
                mode=self.mwax_mover_mode,
                recursive=self.recursive,
                exclude_pattern=".part*",
            )
            self.watchers.append(new_watcher)

        # Create queueworker archive queue
        self.logger.info("Creating workers...")

        for n in range(0, self.concurrent_archive_workers):
            new_worker = mwax_ceph_queue_worker.CephQueueWorker(
                label=f"Archiver{n}",
                q=self.queue,
                executable_path=None,
                event_handler=self.archive_handler,
                log=self.logger,
                requeue_to_eoq_on_failure=True,
                exit_once_queue_empty=False,
                backoff_initial_seconds=20,
                backoff_factor=2,
                backoff_limit_seconds=40,
                ceph_endpoint=self.acacia_ceph_endpoint,
                ceph_profile=self.acacia_profile,
            )
            self.queue_workers.append(new_worker)

        self.logger.info("Starting watchers...")
        # Setup thread for watching filesystem
        for i, watcher in enumerate(self.watchers):
            watcher_thread = threading.Thread(name=f"watch_thread{i}",
                                              target=watcher.start,
                                              daemon=True)
            self.watcher_threads.append(watcher_thread)
            watcher_thread.start()

        self.logger.info("Starting workers...")
        # Setup thread for processing items
        for i, worker in enumerate(self.queue_workers):
            queue_worker_thread = threading.Thread(name=f"worker_thread{i}",
                                                   target=worker.start,
                                                   daemon=True)
            self.worker_threads.append(queue_worker_thread)
            queue_worker_thread.start()

        self.logger.info("Started...")
Beispiel #4
0
    def initialise(self):

        # Get command line args
        parser = argparse.ArgumentParser()
        parser.description = (
            "mwax_mover: a command line tool which is part of the mwax"
            " correlator for the MWA.\n"
        )
        parser.add_argument(
            "-w",
            "--watchdir",
            required=True,
            help="Directory to watch for files with watchext extension",
        )
        parser.add_argument(
            "-x",
            "--watchext",
            required=True,
            help="Extension to watch for e.g. .sub",
        )
        parser.add_argument(
            "-e",
            "--executablepath",
            required=True,
            help=(
                "Absolute path to executable to launch."
                f" {FILE_REPLACEMENT_TOKEN} will be substituted with the abs"
                " path of the filename being"
                f" processed.{FILENOEXT_REPLACEMENT_TOKEN} will be replaced"
                " with the filename but not extenson."
            ),
        )
        parser.add_argument(
            "-m",
            "--mode",
            required=True,
            default=None,
            choices=[
                MODE_WATCH_DIR_FOR_NEW,
                MODE_WATCH_DIR_FOR_RENAME,
                MODE_WATCH_DIR_FOR_RENAME_OR_NEW,
                MODE_PROCESS_DIR,
            ],
            help=(
                f"Mode to run:\n{MODE_WATCH_DIR_FOR_NEW}: Watch watchdir for"
                " new files forever. Launch"
                f" executable.\n{MODE_WATCH_DIR_FOR_RENAME}: Watch watchdir"
                " for renamed files forever. Launch"
                f" executable.\n{MODE_WATCH_DIR_FOR_RENAME_OR_NEW}: Watch"
                " watchdir for new OR renamed files forever. Launch"
                f" executable.\n{MODE_PROCESS_DIR}: For each file in watchdir,"
                " launch executable. Exit.\n"
            ),
        )
        parser.add_argument(
            "-r",
            "--recursive",
            required=False,
            default=False,
            help=(
                "Recurse subdirectories of the watchdir. Omitting this option"
                " is the default and only the watchdir will be monitored."
            ),
        )
        args = vars(parser.parse_args())

        # Check args
        self.watch_dir = args["watchdir"]
        self.watch_ext = args["watchext"]
        self.executable = args["executablepath"]
        self.mode = args["mode"]

        if self.mode == MODE_PROCESS_DIR:
            exit_once_queue_empty = True
        else:
            exit_once_queue_empty = False

        if args["recursive"]:
            self.recursive = args["recursive"]

        if not os.path.isdir(self.watch_dir):
            print(
                f"Error: --watchdir '{self.watch_dir}' does not exist or you"
                " don't have permission"
            )
            exit(1)

        if not self.watch_ext[0] == ".":
            print(
                f"Error: --watchext '{self.watch_ext}' should start with a '.'"
                " e.g. '.sub'"
            )
            exit(1)

        # start logging
        self.logger.setLevel(logging.DEBUG)
        self.logger.propagate = False
        ch = logging.StreamHandler()
        ch.setLevel(logging.DEBUG)
        ch.setFormatter(
            logging.Formatter("%(asctime)s, %(levelname)s, %(message)s")
        )
        self.logger.addHandler(ch)

        self.logger.info("Starting mwax_mover processor...")

        # Create a queue for dealing with files
        self.q = queue.Queue()

        # Create watcher
        self.watch = mwax_watcher.Watcher(
            path=self.watch_dir,
            q=self.q,
            pattern=f"{self.watch_ext}",
            log=self.logger,
            mode=self.mode,
            recursive=False,
        )

        # Create queueworker
        self.queueworker = mwax_queue_worker.QueueWorker(
            label="queue",
            q=self.q,
            executable_path=self.executable,
            exit_once_queue_empty=exit_once_queue_empty,
            log=self.logger,
            event_handler=None,
        )

        self.running = True
        self.logger.info("Processor Initialised...")
    def start(self):
        if self.archive_destination_enabled:
            # Create watcher for voltage data -> checksum+db queue
            self.watcher_incoming_volt = mwax_watcher.Watcher(
                path=self.watch_dir_incoming_volt,
                q=self.queue_checksum_and_db,
                pattern=".sub",
                log=self.logger,
                mode=mwax_mover.MODE_WATCH_DIR_FOR_NEW,
                recursive=False,
            )

            # Create watcher for visibility data -> checksum+db queue
            # This will watch for mwax visibilities being renamed OR
            # fits files being created
            # (e.g. metafits ppd files being copied into /visdata).
            self.watcher_incoming_vis = mwax_watcher.Watcher(
                path=self.watch_dir_incoming_vis,
                q=self.queue_checksum_and_db,
                pattern=".fits",
                log=self.logger,
                mode=mwax_mover.MODE_WATCH_DIR_FOR_RENAME_OR_NEW,
                recursive=False,
            )

            # Create queueworker for the checksum and db queue
            self.queue_worker_checksum_and_db = mwax_queue_worker.QueueWorker(
                label="checksum and database worker",
                q=self.queue_checksum_and_db,
                executable_path=None,
                event_handler=self.checksum_and_db_handler,
                log=self.logger,
                exit_once_queue_empty=False,
            )

            # Create watcher for visibility processing stats
            self.watcher_processing_stats_vis = mwax_watcher.Watcher(
                path=self.watch_dir_processing_stats_vis,
                q=self.queue_processing_stats_vis,
                pattern=".fits",
                log=self.logger,
                mode=mwax_mover.MODE_WATCH_DIR_FOR_RENAME,
                recursive=False,
            )

            # worker for visibility processing stats
            self.queue_worker_processing_stats_vis = (
                mwax_queue_worker.QueueWorker(
                    label="processing stats vis worker",
                    q=self.queue_processing_stats_vis,
                    executable_path=None,
                    event_handler=self.stats_handler,
                    log=self.logger,
                    exit_once_queue_empty=False,
                )
            )

            # Create watcher for archiving outgoing voltage data
            self.watcher_outgoing_volt = mwax_watcher.Watcher(
                path=self.watch_dir_outgoing_volt,
                q=self.queue_outgoing_volt,
                pattern=".sub",
                log=self.logger,
                mode=mwax_mover.MODE_WATCH_DIR_FOR_RENAME,
                recursive=False,
            )

            # Create queueworker for voltage outgoing queue
            self.queue_worker_outgoing_volt = mwax_queue_worker.QueueWorker(
                label="outgoing volt worker",
                q=self.queue_outgoing_volt,
                executable_path=None,
                event_handler=self.archive_handler,
                log=self.logger,
                exit_once_queue_empty=False,
            )

            # Create watcher for archiving outgoing visibility data
            self.watcher_outgoing_vis = mwax_watcher.Watcher(
                path=self.watch_dir_outgoing_vis,
                q=self.queue_outgoing_vis,
                pattern=".fits",
                log=self.logger,
                mode=mwax_mover.MODE_WATCH_DIR_FOR_RENAME,
                recursive=False,
            )

            # Create queueworker for visibility outgoing queue
            self.queue_worker_outgoing_vis = mwax_queue_worker.QueueWorker(
                label="outgoing vis worker",
                q=self.queue_outgoing_vis,
                executable_path=None,
                event_handler=self.archive_handler,
                log=self.logger,
                exit_once_queue_empty=False,
            )

            # Create watcher for sending calibration visibility data
            # for processing
            self.watcher_outgoing_cal = mwax_watcher.Watcher(
                path=self.watch_dir_outgoing_cal,
                q=self.queue_outgoing_cal,
                pattern=".fits",
                log=self.logger,
                mode=mwax_mover.MODE_WATCH_DIR_FOR_RENAME,
                recursive=False,
            )

            # Create queueworker for sending calibration visibility data
            # for processing
            self.queue_worker_outgoing_cal = mwax_queue_worker.QueueWorker(
                label="outgoing cal vis worker",
                q=self.queue_outgoing_cal,
                executable_path=None,
                event_handler=self.cal_handler,
                log=self.logger,
                exit_once_queue_empty=False,
            )

            #
            # Start watcher threads
            #

            # Setup thread for watching incoming filesystem (volt)
            watcher_volt_incoming_thread = threading.Thread(
                name="watch_volt_incoming",
                target=self.watcher_incoming_volt.start,
                daemon=True,
            )
            self.watcher_threads.append(watcher_volt_incoming_thread)
            watcher_volt_incoming_thread.start()

            # Setup thread for watching incoming filesystem (vis)
            watcher_vis_incoming_thread = threading.Thread(
                name="watch_vis_incoming",
                target=self.watcher_incoming_vis.start,
                daemon=True,
            )
            self.watcher_threads.append(watcher_vis_incoming_thread)
            watcher_vis_incoming_thread.start()

            # Setup thread for watching processing_stats filesystem (vis)
            watcher_vis_processing_stats_thread = threading.Thread(
                name="watch_vis_processing_stats",
                target=self.watcher_processing_stats_vis.start,
                daemon=True,
            )
            self.watcher_threads.append(watcher_vis_processing_stats_thread)
            watcher_vis_processing_stats_thread.start()

            # Setup thread for watching outgoing filesystem (volt)
            watcher_volt_outgoing_thread = threading.Thread(
                name="watch_volt_outgoing",
                target=self.watcher_outgoing_volt.start,
                daemon=True,
            )
            self.watcher_threads.append(watcher_volt_outgoing_thread)
            watcher_volt_outgoing_thread.start()

            # Setup thread for watching outgoing filesystem (vis)
            watcher_vis_outgoing_thread = threading.Thread(
                name="watch_vis_outgoing",
                target=self.watcher_outgoing_vis.start,
                daemon=True,
            )
            self.watcher_threads.append(watcher_vis_outgoing_thread)
            watcher_vis_outgoing_thread.start()

            # Setup thread for watching outgoing filesystem (cal)
            watcher_cal_outgoing_thread = threading.Thread(
                name="watch_cal_outgoing",
                target=self.watcher_outgoing_cal.start,
                daemon=True,
            )
            self.watcher_threads.append(watcher_cal_outgoing_thread)
            watcher_cal_outgoing_thread.start()

            #
            # Start queue worker threads
            #
            # Setup thread for processing items on the checksum and db queue
            queue_worker_checksum_and_db_thread = threading.Thread(
                name="work_checksum_and_db",
                target=self.queue_worker_checksum_and_db.start,
                daemon=True,
            )
            self.worker_threads.append(queue_worker_checksum_and_db_thread)
            queue_worker_checksum_and_db_thread.start()

            # Setup thread for processing items on the
            # processing stats vis queue
            queue_worker_vis_processing_stats_thread = threading.Thread(
                name="work_vis_processing_stats",
                target=self.queue_worker_processing_stats_vis.start,
                daemon=True,
            )
            self.worker_threads.append(
                queue_worker_vis_processing_stats_thread
            )
            queue_worker_vis_processing_stats_thread.start()

            # Setup thread for processing items on the outgoing_volt queue
            queue_worker_volt_outgoing_thread = threading.Thread(
                name="work_volt_outgoing",
                target=self.queue_worker_outgoing_volt.start,
                daemon=True,
            )
            self.worker_threads.append(queue_worker_volt_outgoing_thread)
            queue_worker_volt_outgoing_thread.start()

            # Setup thread for processing items on the outgoing vis queue
            queue_worker_vis_outgoing_thread = threading.Thread(
                name="work_vis_outgoing",
                target=self.queue_worker_outgoing_vis.start,
                daemon=True,
            )
            self.worker_threads.append(queue_worker_vis_outgoing_thread)
            queue_worker_vis_outgoing_thread.start()

            # Setup thread for processing items on the outgoing vis queue
            queue_worker_cal_outgoing_thread = threading.Thread(
                name="work_cal_outgoing",
                target=self.queue_worker_outgoing_cal.start,
                daemon=True,
            )
            self.worker_threads.append(queue_worker_cal_outgoing_thread)
            queue_worker_cal_outgoing_thread.start()
        else:
            # We have disabled archiving, so use a different
            # handler for incoming data
            # which just moves the files elsewhere

            # First check to ensure there are no existing unarchived files on
            # our watching dirs
            if (
                len(next(os.walk(self.watch_dir_incoming_volt))[2]) > 0
                or len(next(os.walk(self.watch_dir_incoming_vis))[2]) > 0
                or len(next(os.walk(self.watch_dir_outgoing_volt))[2]) > 0
                or len(next(os.walk(self.watch_dir_outgoing_vis))[2]) > 0
                or len(next(os.walk(self.watch_dir_outgoing_cal))[2]) > 0
                or len(next(os.walk(self.watch_dir_processing_stats_vis))[2])
                > 0
            ):
                self.logger.error(
                    "Error- voltage incoming/outgoing and/or visibility "
                    "incoming/processing/outgoing/cal dirs are not empty! "
                    "Watched paths must be empty before starting with  "
                    "archiving disabled to prevent inadvertent data loss. "
                    "Exiting."
                )
                exit(-2)

            # Create watcher for voltage data -> dont_archive queue
            self.watcher_incoming_volt = mwax_watcher.Watcher(
                path=self.watch_dir_incoming_volt,
                q=self.queue_dont_archive,
                pattern=".sub",
                log=self.logger,
                mode=mwax_mover.MODE_WATCH_DIR_FOR_NEW,
                recursive=False,
            )

            # Create watcher for visibility data -> dont_archive queue
            # This will watch for mwax visibilities being renamed OR
            # fits files being created (e.g. metafits ppd files being copied
            # into /visdata).
            self.watcher_incoming_vis = mwax_watcher.Watcher(
                path=self.watch_dir_incoming_vis,
                q=self.queue_dont_archive,
                pattern=".fits",
                log=self.logger,
                mode=mwax_mover.MODE_WATCH_DIR_FOR_RENAME_OR_NEW,
                recursive=False,
            )

            # Create queueworker for the don't archive queue
            self.queue_worker_dont_archive = mwax_queue_worker.QueueWorker(
                label="dont archive worker",
                q=self.queue_dont_archive,
                executable_path=None,
                event_handler=self.dont_archive_handler,
                log=self.logger,
                exit_once_queue_empty=False,
            )

            #
            # Start watcher threads
            #

            # Setup thread for watching incoming filesystem (volt)
            watcher_volt_incoming_thread = threading.Thread(
                name="watch_volt_incoming",
                target=self.watcher_incoming_volt.start,
                daemon=True,
            )
            self.watcher_threads.append(watcher_volt_incoming_thread)
            watcher_volt_incoming_thread.start()

            # Setup thread for watching incoming filesystem (vis)
            watcher_vis_incoming_thread = threading.Thread(
                name="watch_vis_incoming",
                target=self.watcher_incoming_vis.start,
                daemon=True,
            )
            self.watcher_threads.append(watcher_vis_incoming_thread)
            watcher_vis_incoming_thread.start()

            #
            # Start queue worker threads
            #
            # Setup thread for processing items on the dont archive queue
            queue_worker_dont_archive_thread = threading.Thread(
                name="work_dont_archive",
                target=self.queue_worker_dont_archive.start,
                daemon=True,
            )
            self.worker_threads.append(queue_worker_dont_archive_thread)
            queue_worker_dont_archive_thread.start()