Example #1
0
def lasif_process_data(parser, args):
    """
    Launch data processing.

    This function works with MPI. Don't use too many cores, I/O quickly
    becomes the limiting factor. It also works without MPI but then only one
    core actually does any work.
    """
    parser.add_argument(
        "events",
        help="One or more events. If none given, all will be done.",
        nargs="*",
    )
    parser.add_argument(
        "--iteration",
        help="Take all events used in "
        "iteration",
        default=None,
    )

    args = parser.parse_args(args)
    api.process_data(
        lasif_root=".",
        events=args.events if args.events else None,
        iteration=args.iteration,
    )
    def process_data(self, event: str):
        """
        Process the data for the periods specified in Lasif project.

        :param event: Name of event to be processed
        :type event: str
        """
        if self._already_processed(event):
            return

        lapi.process_data(self.lasif_comm, events=[event])
Example #3
0
    def process_data(self, event: str):
        """
        Process the data for the periods specified in Lasif project.

        :param event: Name of event to be processed
        :type event: str
        """
        if self._already_processed(event):
            return

        if self.comm.project.remote_data_processing:
            # Get local proc filename
            lasif_root = self.comm.project.lasif_root
            proc_filename = (
                f"preprocessed_{int(self.comm.project.min_period)}s_"
                f"to_{int(self.comm.project.max_period)}s.h5")
            local_proc_folder = os.path.join(lasif_root, "PROCESSED_DATA",
                                             "EARTHQUAKES", event)
            local_proc_file = os.path.join(local_proc_folder, proc_filename)

            if not os.path.exists(local_proc_folder):
                os.mkdir(local_proc_folder)

            remote_proc_file_name = f"{event}_{proc_filename}"
            hpc_cluster = get_site(self.comm.project.site_name)

            remote_processed_dir = os.path.join(
                self.comm.project.remote_inversionson_dir, "PROCESSED_DATA")

            remote_proc_path = os.path.join(remote_processed_dir,
                                            remote_proc_file_name)
            tmp_local_path = local_proc_file + "_tmp"
            if hpc_cluster.remote_exists(remote_proc_path):
                hpc_cluster.remote_get(remote_proc_path, tmp_local_path)
                os.rename(tmp_local_path, local_proc_file)
                return  # Return if it got it and got it there.

        lapi.process_data(self.lasif_comm, events=[event])
Example #4
0
    def process_random_unprocessed_event(self) -> bool:
        """
        Instead of sleeping when we queue for the HPC, we can also process a
        random unprocessed event. That is what this function does.

        it first tries to process a high priority event, from
        the validation dataset or the current iteration, otherwise
        it tries to process any other event that may be used in the future.

        Leaves the function as soon as one event was processed or
        if there was nothing to process.

        :return: Returns True if an event was processed, otherwise False
        :rtype: bool
        """

        events_in_iteration = self.comm.project.events_in_iteration
        events = self.comm.lasif.list_events()
        validation_events = self.comm.project.validation_dataset
        msg = (f"Seems like there is nothing to do now. "
               f"I might as well process some random event.")
        if not self.everything_processed:
            self.everything_processed = True
            # First give the most urgent events a try.
            if not self.validation_data_processed:
                self.validation_data_processed = True
                for event in validation_events:
                    if self._already_processed(event):
                        continue
                    else:
                        self.print(msg)
                        self.print(f"Processing validation {event}...")
                        self.validation_data_processed = False
                        lapi.process_data(self.lasif_comm, events=[event])
                        return True
            for event in events_in_iteration:
                if self._already_processed(event):
                    continue
                else:
                    self.print(msg)
                    self.print(f"Processing {event} from current iteration...")
                    self.everything_processed = False
                    lapi.process_data(self.lasif_comm, events=[event])
                    return True
            for event in events:
                if self._already_processed(event):
                    continue
                else:
                    self.print(msg)
                    self.print(f"Processing random other {event}...")
                    self.everything_processed = False
                    lapi.process_data(self.lasif_comm, events=[event])
                    return True
        return False