def pysiral_l2proc_time_range_job(args): """ This is a Level-2 Processor job for a given time range """ # Get start time of processor run t0 = time.clock() # Get the product definition product_def = Level2ProductDefinition(args.run_tag, args.l2_settings_file) mission_id = product_def.l2def.mission.id hemisphere = product_def.l2def.hemisphere # Specifically add an output handler product_def.add_output_definition( args.l2_output, overwrite_protection=args.overwrite_protection) # --- Get the period for the Level-2 Processor --- # Evaluate the input arguments period = DatePeriod(args.start, args.stop) # Clip the time range to the valid time range of the target platform period = period.intersect(psrlcfg.get_platform_period(mission_id)) # The Level-2 processor operates in monthly iterations # -> Break down the full period into monthly segments and # filter specific month that should not be processed period_segments = period.get_segments("month", crop_to_period=True) if args.exclude_month is not None: period_segments.filter_month(args.exclude_month) # Prepare DataHandler l1b_data_handler = DefaultL1bDataHandler(mission_id, hemisphere, version=args.l1b_version) # Processor Initialization l2proc = Level2Processor(product_def) # Now loop over the month for time_range in period_segments: # Do some extra logging l2proc.log.info("Processing period: %s" % time_range.label) # Product Data Management if args.remove_old: for output_handler in product_def.output_handler: output_handler.remove_old(time_range) # Get input files l1b_files = l1b_data_handler.get_files_from_time_range(time_range) l2proc.log.info("Found %g files in %s" % (len(l1b_files), l1b_data_handler.last_directory)) # Process the orbits l2proc.process_l1b_files(l1b_files) # All done t1 = time.clock() seconds = int(t1 - t0) l2proc.log.info("Run completed in %s" % str(timedelta(seconds=seconds)))
def pysiral_l3proc(): # parse command line arguments args = Level3ProcArgParser() args.parse_command_line_arguments() # Get start time of processor run t0 = time.clock() # --- Get the period segments for the Level-3 processor --- # NOTE: These depend on the chosen total time range and the duration period for the grid. period = DatePeriod(args.start, args.stop) if args.period == "custom": period_segments = [period] n_periods = 1 else: period_segments = period.get_segments(args.period) n_periods = period_segments.n_periods # Get the output grid grid = Level3GridDefinition(args.l3_griddef) # Initialize the interface to the l2i products l2i_handler = L2iDataHandler(args.l2i_product_directory) # Initialize the output handler # Currently the overwrite protection is disabled per default output = [] for l3_output_file in args.l3_output_file: output_handler = Level3OutputHandler(output_def=l3_output_file, base_directory=args.l3_product_basedir, period=args.period, doi=args.doi, data_record_type=args.data_record_type, overwrite_protection=False) output.append(output_handler) # Compile the product def product_def = Level3ProductDefinition(args.l3_settings_file, grid, output, period) # Initialize the Processor l3proc = Level3Processor(product_def) # Loop over all iterations for i, time_range in enumerate(period_segments): # Report processing period msg = "# Processing %s period (%g of %g): %s" msg = msg % (args.period, i+1, n_periods, time_range.date_label) logger.info(msg) # Retrieve files l2i_files = l2i_handler.get_files_from_time_range(time_range) logger.info("Num l2i files: %g" % len(l2i_files)) if len(l2i_files) == 0: logger.info("Skip data period") continue # Start the Level-3 processing l3proc.process_l2i_files(l2i_files, time_range) # Final reporting t1 = time.clock() seconds = int(t1 - t0) logger.info("Run completed in %s" % str(timedelta(seconds=seconds)))
def pysiral_l2preproc(): """ Caller for converting Level-2 Intermediate (l2i) into Level-2 Pre-Processed (l2p) data products. NOTE: At the moment that only means summary of valid freeboard/thickness data points into daily summary files. """ # Collect job settings from pysiral configuration data and # command line arguments args = Level2PreProcArgParser() # Parse and validate the command line arguments args.parse_command_line_arguments() # Get confirmation for critical choices (if necessary) args.critical_prompt_confirmation() # Start the level-2 pre-processor # Get start time of processor run t0 = time.clock() # Get the product definition product_def = Level2PreProcProductDefinition() # Specifically add an output handler product_def.add_output_definition( args.l2i_product_dir, args.l2p_output, period="daily", doi=args.doi, overwrite_protection=args.overwrite_protection) # Prepare DataHandler # The l2 pre-processor requires l2i input files l2i_handler = L2iDataHandler(args.l2i_product_dir) # Get list of days for processing # start and/or stop can be ommitted. In this case fall back to the # start and/or stop of l2i product availability start = args.start if args.start is not None else l2i_handler.start_month stop = args.stop if args.stop is not None else l2i_handler.stop_month period = DatePeriod(start, stop) days = period.get_segments("day") if args.exclude_month is not None: days.filter_month(args.exclude_month) # Processor Initialization # NOTE: This is only for later cases. Not much is done here at this # point l2preproc = Level2PreProcessor(product_def) # # Loop over iterations (one per day) for day in days: # Do some extra logging logger.info("Processing Day [%s]" % day.label) # XXX: This needs a bit more thought # # Product Data Management # if args.remove_old: # for output_handler in product_def.output_handler: # output_handler.remove_old(day) # Get input files l2i_daily_files = l2i_handler.get_files_for_day(day.tcs.dt) if len(l2i_daily_files) == 0: logger.info("- no l2i products, skip day") continue logger.info("- Found %g l2i product files" % len(l2i_daily_files)) # Process the orbits l2preproc.process_l2i_files(l2i_daily_files, day) # All done, log processor time t1 = time.clock() seconds = int(t1 - t0) logger.info("Run completed in %s" % str(timedelta(seconds=seconds)))
class Level1PreProcJobDef(DefaultLoggingClass): """ A class that contains the information for the Level-1 pre-processor JOB (not the pre-processor class!) """ def __init__(self, l1p_settings_id_or_file, tcs, tce, exclude_month=None, hemisphere="global", platform=None, output_handler_cfg=None, source_repo_id=None): """ The settings for the Level-1 pre-processor job :param l1p_settings_id_or_file: An id of an proc/l1 processor config file (filename excluding the .yaml extension) or an full filepath to a yaml config file :param tcs: [int list] Time coverage start (YYYY MM [DD]) :param tce: [int list] Time coverage end (YYYY MM [DD]) [int list] :param exclude_month: [int list] A list of month that will be ignored :param hemisphere: [str] The target hemisphere (`north`, `south`, `global`:default). :param platform: [str] The target platform (pysiral id). Required if l1p settings files is valid for multiple platforms (e.g. ERS-1/2, ...) :param output_handler_cfg: [dict] An optional dictionary with options of the output handler (`overwrite_protection`: [True, False], `remove_old`: [True, False]) :param source_repo_id: [str] The tag in local_machine_def.yaml (l1b_repository.<platform>.<source_repo_id>) -> Overwrites the default source repo in the l1p settings (input_handler.options.local_machine_def_tag & output_handler.options.local_machine_def_tag) """ super(Level1PreProcJobDef, self).__init__(self.__class__.__name__) self.error = ErrorStatus() # Get pysiral configuration # TODO: Move to global self._cfg = psrlcfg # Store command line options self._hemisphere = hemisphere self._platform = platform self._source_repo_id = source_repo_id # Parse the l1p settings file self.set_l1p_processor_def(l1p_settings_id_or_file) # Get full requested time range self._time_range = DatePeriod(tcs, tce) logger.info("Requested time range is %s" % self.time_range.label) # Store the data handler options if output_handler_cfg is None: output_handler_cfg = {} self._output_handler_cfg = output_handler_cfg # Measure execution time self.stopwatch = StopWatch() @classmethod def from_args(cls, args): """ Init the Processor Definition from the pysiral-l1preproc command line argument object """ # Optional Keywords kwargs = {} if args.exclude_month is not None: kwargs["exclude_month"] = args.exclude_month data_handler_cfg = dict() data_handler_cfg["overwrite_protection"] = args.overwrite_protection data_handler_cfg["remove_old"] = args.remove_old if args.source_repo_id is not None: data_handler_cfg["local_machine_def_tag"] = args.source_repo_id kwargs["output_handler_cfg"] = data_handler_cfg kwargs["hemisphere"] = args.hemisphere kwargs["platform"] = args.platform kwargs["source_repo_id"] = args.source_repo_id # Return the initialized class return cls(args.l1p_settings, args.start_date, args.stop_date, **kwargs) def set_l1p_processor_def(self, l1p_settings_id_or_file): """ Parse the content of the processor definition file """ # 1. Resolve the absolute file path procdef_file_path = self.get_l1p_proc_def_filename( l1p_settings_id_or_file) # 2. Read the content logger.info("Parsing L1P processor definition file: %s" % procdef_file_path) self._l1pprocdef = get_yaml_config(procdef_file_path) self._check_if_unambiguous_platform() # 3. Expand info (input data lookup directories) self._get_local_input_directory() # 4. update hemisphere for input adapter self._l1pprocdef.level1_preprocessor.options.polar_ocean.target_hemisphere = self.target_hemisphere def get_l1p_proc_def_filename(self, l1p_settings_id_or_file): """ Query pysiral config to obtain filename for processor definition file """ # A. Check if already filename if Path(l1p_settings_id_or_file).is_file(): return l1p_settings_id_or_file # B. Not a file, try to resolve filename via pysiral config filename = self.pysiral_cfg.get_settings_file("proc", "l1", l1p_settings_id_or_file) if filename is None: msg = "Invalid Level-1 pre-processor definition filename or id: %s\n" % l1p_settings_id_or_file msg = msg + " \nRecognized Level-1 pre-processor definitions ids:\n" ids = self.pysiral_cfg.get_setting_ids("proc", "l1") for id in ids: msg = msg + " - " + id + "\n" self.error.add_error("invalid-l1p-outputdef", msg) self.error.raise_on_error() return filename def _get_local_input_directory(self): """ Replace the tag for local machine def with the actual path info """ input_handler_cfg = self.l1pprocdef.input_handler.options local_machine_def_tag = input_handler_cfg.local_machine_def_tag primary_input_def = self.pysiral_cfg.local_machine.l1b_repository platform, tag = self.platform, local_machine_def_tag # Overwrite the tag if specifically supplied if self._source_repo_id is not None: tag = self._source_repo_id # Get the value expected_branch_name = "root.l1b_repository.%s.%s" % (platform, tag) try: branch = AttrDict(primary_input_def[platform][tag]) except KeyError: msg = "Missing definition in `local_machine_def.yaml`. Expected branch: %s" msg = msg % expected_branch_name self.error.add_error("local-machine-def-missing-tag", msg) self.error.raise_on_error() # Sanity Checks # TODO: Obsolete? if branch is None: msg = "Missing definition in `local_machine_def.yaml`. Expected branch: %s" msg = msg % expected_branch_name self.error.add_error("local-machine-def-missing-tag", msg) self.error.raise_on_error() # Validity checks # TODO: These checks are probably better located in a separate method? for key in ["source", "l1p"]: # 1. Branch must have specific keys for input and output if not key in branch: msg = "Missing definition in `local_machine_def.yaml`. Expected value: %s.%s" msg = msg % (expected_branch_name, key) self.error.add_error("local-machine-def-missing-tag", msg) self.error.raise_on_error() # 2. The value of each branch must be a valid directory or a # attr (e.g. for different radar modes) with a list of directories directory_or_attrdict = branch[key] try: directories = directory_or_attrdict.values() except AttributeError: directories = [directory_or_attrdict] for directory in directories: if not Path(directory).is_dir(): msg = "Invalid directory in `local_machine_def.yaml`: %s is not a valid directory" msg = msg % directory self.error.add_error("local-machine-def-invalid-dir", msg) self.error.raise_on_error() # Update the lookup dir parameter self.l1pprocdef.input_handler["options"]["lookup_dir"] = branch.source def _check_if_unambiguous_platform(self): """ Checks if the platform is unique, since some l1 processor definitions are valid for a series of platforms, such as ERS-1/2, Sentinel-3A/B, etc. The indicator is that the platform tag in the l1 preprocessor settings is comma separated list. For the location of the source data, it is however necessary that the exact platform is known. It must therefore be specified explicitly by the -platform argument """ settings_is_ambigous = "," in self._l1pprocdef.platform platform_is_known = self.platform is not None # Test if platform is given if the settings file is valid for more than 1 platform if settings_is_ambigous and not platform_is_known: msg = "Error: platform in l1p settings is ambiguous (%s), but no platform has been given (-platform)" msg = msg % self._l1pprocdef.platform sys.exit(msg) # Test if platform provided matches the platform list in the settings file if settings_is_ambigous and platform_is_known: if not self.platform in str(self._l1pprocdef.platform): msg = "Error: platform in l1p settings (%s) and given platform (%s) do not match" msg = msg % (self._l1pprocdef.platform, self.platform) sys.exit(msg) # If platform in settings is unambigous, but not provided -> get platform from settings if not settings_is_ambigous and not platform_is_known: self._platform = self._l1pprocdef.platform logger.info("- get platform from l1p settings -> %s" % self.platform) @property def hemisphere(self): return self._hemisphere @property def target_hemisphere(self): values = { "north": ["north"], "south": ["south"], "global": ["north", "south"] } return values[self.hemisphere] @property def pysiral_cfg(self): return self._cfg @property def l1pprocdef(self): return self._l1pprocdef @property def time_range(self): return self._time_range @property def period_segments(self): segments = self._time_range.get_segments("month", crop_to_period=True) return segments @property def output_handler_cfg(self): return self._output_handler_cfg @property def platform(self): return self._platform