Exemple #1
0
def pysiral_l2proc_time_range_job(args):
    """ This is a Level-2 Processor job for a given time range """

    # Get start time of processor run
    t0 = time.clock()

    # Get the product definition
    product_def = Level2ProductDefinition(args.run_tag, args.l2_settings_file)
    mission_id = product_def.l2def.mission.id
    hemisphere = product_def.l2def.hemisphere

    # Specifically add an output handler
    product_def.add_output_definition(
        args.l2_output, overwrite_protection=args.overwrite_protection)

    # --- Get the period for the Level-2 Processor ---
    # Evaluate the input arguments
    period = DatePeriod(args.start, args.stop)

    # Clip the time range to the valid time range of the target platform
    period = period.intersect(psrlcfg.get_platform_period(mission_id))

    # The Level-2 processor operates in monthly iterations
    # -> Break down the full period into monthly segments and
    #    filter specific month that should not be processed
    period_segments = period.get_segments("month", crop_to_period=True)
    if args.exclude_month is not None:
        period_segments.filter_month(args.exclude_month)

    # Prepare DataHandler
    l1b_data_handler = DefaultL1bDataHandler(mission_id,
                                             hemisphere,
                                             version=args.l1b_version)

    # Processor Initialization
    l2proc = Level2Processor(product_def)

    # Now loop over the month
    for time_range in period_segments:

        # Do some extra logging
        l2proc.log.info("Processing period: %s" % time_range.label)

        # Product Data Management
        if args.remove_old:
            for output_handler in product_def.output_handler:
                output_handler.remove_old(time_range)

        # Get input files
        l1b_files = l1b_data_handler.get_files_from_time_range(time_range)
        l2proc.log.info("Found %g files in %s" %
                        (len(l1b_files), l1b_data_handler.last_directory))

        # Process the orbits
        l2proc.process_l1b_files(l1b_files)

    # All done
    t1 = time.clock()
    seconds = int(t1 - t0)
    l2proc.log.info("Run completed in %s" % str(timedelta(seconds=seconds)))
Exemple #2
0
    def __init__(self,
                 l1p_settings_id_or_file,
                 tcs,
                 tce,
                 exclude_month=None,
                 hemisphere="global",
                 platform=None,
                 output_handler_cfg=None,
                 source_repo_id=None):
        """
        The settings for the Level-1 pre-processor job
        :param l1p_settings_id_or_file: An id of an proc/l1 processor config file (filename excluding the .yaml
                                        extension) or an full filepath to a yaml config file
        :param tcs: [int list] Time coverage start (YYYY MM [DD])
        :param tce: [int list] Time coverage end (YYYY MM [DD]) [int list]
        :param exclude_month: [int list] A list of month that will be ignored
        :param hemisphere: [str] The target hemisphere (`north`, `south`, `global`:default).
        :param platform: [str] The target platform (pysiral id). Required if l1p settings files is valid for
                               multiple platforms (e.g. ERS-1/2, ...)
        :param output_handler_cfg: [dict] An optional dictionary with options of the output handler
                                   (`overwrite_protection`: [True, False], `remove_old`: [True, False])
        :param source_repo_id: [str] The tag in local_machine_def.yaml (l1b_repository.<platform>.<source_repo_id>)
                                  -> Overwrites the default source repo in the l1p settings
                                     (input_handler.options.local_machine_def_tag &
                                      output_handler.options.local_machine_def_tag)
        """

        super(Level1PreProcJobDef, self).__init__(self.__class__.__name__)
        self.error = ErrorStatus()

        # Get pysiral configuration
        # TODO: Move to global
        self._cfg = psrlcfg

        # Store command line options
        self._hemisphere = hemisphere
        self._platform = platform
        self._source_repo_id = source_repo_id

        # Parse the l1p settings file
        self.set_l1p_processor_def(l1p_settings_id_or_file)

        # Get full requested time range
        self._time_range = DatePeriod(tcs, tce)
        logger.info("Requested time range is %s" % self.time_range.label)

        # Store the data handler options
        if output_handler_cfg is None:
            output_handler_cfg = {}
        self._output_handler_cfg = output_handler_cfg

        # Measure execution time
        self.stopwatch = StopWatch()
Exemple #3
0
 def get_platform_period(self, platform_id):
     """
     Get a period definition for a given platform ID
     :param platform_id:
     :return: dateperiods.DatePeriod
     """
     tcs, tce = self.platforms.get_time_coverage(platform_id)
     return DatePeriod(tcs, tce)
Exemple #4
0
    def _l2_processing_of_orbit_files(self):
        """ Orbit-wise level2 processing """

        # TODO: Evaluate parallelization
        logger.info("Start Orbit Processing")

        n_files = len(self._l1b_files)

        # loop over l1bdata preprocessed orbits
        for i, l1b_file in enumerate(self._l1b_files):

            # Log the current position in the file stack
            logger.info("+ [ %g of %g ] (%.2f%%)" %
                        (i + 1, n_files, float(i + 1) / float(n_files) * 100.))

            # Read the the level 1b file (l1bdata netCDF is required)
            l1b = self._read_l1b_file(l1b_file)
            source_primary_filename = Path(l1b_file).parts[-1]

            # Initialize the orbit level-2 data container
            # TODO: replace by proper product metadata transfer
            try:
                period = DatePeriod(l1b.info.start_time, l1b.info.stop_time)
            except SystemExit:
                msg = "Computation of data period caused exception"
                logger.warning("[invalid-l1b]", msg)
                continue

            # Init the Level-2 data object
            l2 = Level2Data(l1b.info, l1b.time_orbit, period=period)

            # Overwrite the timeliness value of the l1p input data
            # (requires settings of --force-l2def-record-type option in pysiral-l2proc)
            if self._l2def.force_l2def_record_type:
                l2.info.timeliness = self._l2def.record_type

            # Get auxiliary data from all registered auxdata handlers
            error_status, error_codes = self.get_auxiliary_data(l2)
            if True in error_status:
                self._discard_l1b_procedure(error_codes, l1b_file)
                continue

            # Execute all Level-2 processor steps
            self.execute_l2_processor_steps(l1b, l2)

            # Create output files
            l2.set_metadata(auxdata_source_dict=self.l2_auxdata_source_dict,
                            source_primary_filename=source_primary_filename,
                            l2_algorithm_id=self.l2def.label,
                            l2_version_tag=self.l2def.file_version_tag)
            self._create_l2_outputs(l2)

            # Add data to orbit stack
            self._add_to_orbit_collection(l2)
Exemple #5
0
    def get_month_products(self,
                           month_num,
                           exclude_years=None,
                           platform="all"):
        """Returns a list all products that have coverage for a given month
        
        Arguments:
            month {int} -- month number (1-Jan, ..., 12:Dec)

        Returns: 
            product_files {tuple list} -- ((year, month), [list of files month])
        """

        # Query time range
        product_ids = []

        platforms = self.platforms if platform == "all" else [platform]
        years_to_include = [] if exclude_years is None else exclude_years
        n_products = 0
        for year in self.years:
            if year in years_to_include:
                continue
            time_range = DatePeriod([year, month_num], [year, month_num])
            tcs, tce = time_range.tcs.dt, time_range.tce.dt
            ids = [
                prd.id for prd in self.product_list
                if prd.has_overlap(tcs, tce) and prd.platform in platforms
            ]
            n_products += len(ids)
            product_ids.extend(ids)

        # Reporting
        msg = "Found %g %s files for %s"
        month_name = datetime.datetime(2000, month_num, 1).strftime("%B")
        msg %= (n_products, self.processing_level, month_name)
        logger.info(msg)

        return product_ids
Exemple #6
0
    def _l2_processing_of_orbit_files(self):
        """ Orbit-wise level2 processing """

        # TODO: Evaluate parallelization
        self.log.info("Start Orbit Processing")

        n_files = len(self._l1b_files)

        # loop over l1bdata preprocessed orbits
        for i, l1b_file in enumerate(self._l1b_files):

            # Log the current position in the file stack
            self.log.info(
                "+ [ %g of %g ] (%.2f%%)" %
                (i + 1, n_files, float(i + 1) / float(n_files) * 100.))

            # Read the the level 1b file (l1bdata netCDF is required)
            l1b = self._read_l1b_file(l1b_file)
            source_primary_filename = Path(l1b_file).parts[-1]

            # Apply the geophysical range corrections on the waveform range
            # bins in the l1b data container
            # TODO: move to level1bData class
            self._apply_range_corrections(l1b)

            # Apply a pre-filter of the l1b data (can be none)
            self._apply_l1b_prefilter(l1b)

            # Initialize the orbit level-2 data container
            # TODO: replace by proper product metadata transfer
            try:
                period = DatePeriod(l1b.info.start_time, l1b.info.stop_time)
            except SystemExit:
                msg = "Computation of data period caused exception"
                self.log.warning("[invalid-l1b]", msg)
                continue
            l2 = Level2Data(l1b.info, l1b.time_orbit, period=period)

            # Transfer l1p parameter to the l2 data object (if applicable)
            # NOTE: This is only necessary, if parameters from the l1p files (classifiers) should
            #       be present in the l2i product
            self._transfer_l1p_vars(l1b, l2)

            # Get auxiliary data from all registered auxdata handlers
            error_status, error_codes = self._get_auxiliary_data(l2)
            if True in error_status:
                self._discard_l1b_procedure(error_codes, l1b_file)
                continue

            # Surface type classification (ocean, ice, lead, ...)
            # (ice type classification comes later)
            self._classify_surface_types(l1b, l2)

            # Validate surface type classification
            # yes/no decision on continuing with orbit
            error_status, error_codes = self._validate_surface_types(l2)
            if error_status:
                self._discard_l1b_procedure(error_codes, l1b_file)
                continue

            # Get elevation by retracking of different surface types
            # adds parameter elevation to l2
            error_status, error_codes = self._waveform_retracking(l1b, l2)
            if error_status:
                self._discard_l1b_procedure(error_codes, l1b_file)
                continue

            # Compute the sea surface anomaly (from mss and lead tie points)
            # adds parameter ssh, ssa, afrb to l2
            self._estimate_sea_surface_height(l2)

            # Compute the radar freeboard and its uncertainty
            self._get_altimeter_freeboard(l1b, l2)

            # get radar(-derived) from altimeter freeboard
            self._get_freeboard_from_radar_freeboard(l1b, l2)

            # Apply freeboard filter
            self._apply_freeboard_filter(l2)

            # Convert to thickness
            self._convert_freeboard_to_thickness(l2)

            # Filter thickness
            self._apply_thickness_filter(l2)

            # Post processing
            self._post_processing_items(l2)

            # Create output files
            l2.set_metadata(auxdata_source_dict=self.l2_auxdata_source_dict,
                            source_primary_filename=source_primary_filename,
                            l2_algorithm_id=self.l2def.id,
                            l2_version_tag=self.l2def.version_tag)
            self._create_l2_outputs(l2)

            # Add data to orbit stack
            self._add_to_orbit_collection(l2)
Exemple #7
0
def pysiral_l2preproc():
    """ Caller for converting Level-2 Intermediate (l2i) into
    Level-2 Pre-Processed (l2p) data products.
    NOTE: At the moment that only means summary of valid freeboard/thickness
          data points into daily summary files. """

    # Collect job settings from pysiral configuration data and
    # command line arguments
    args = Level2PreProcArgParser()

    # Parse and validate the command line arguments
    args.parse_command_line_arguments()

    # Get confirmation for critical choices (if necessary)
    args.critical_prompt_confirmation()

    # Start the level-2 pre-processor
    # Get start time of processor run
    t0 = time.clock()

    # Get the product definition
    product_def = Level2PreProcProductDefinition()

    # Specifically add an output handler
    product_def.add_output_definition(
        args.l2i_product_dir,
        args.l2p_output,
        period="daily",
        doi=args.doi,
        overwrite_protection=args.overwrite_protection)

    # Prepare DataHandler
    # The l2 pre-processor requires l2i input files
    l2i_handler = L2iDataHandler(args.l2i_product_dir)

    # Get list of days for processing
    # start and/or stop can be ommitted. In this case fall back to the
    # start and/or stop of l2i product availability
    start = args.start if args.start is not None else l2i_handler.start_month
    stop = args.stop if args.stop is not None else l2i_handler.stop_month
    period = DatePeriod(start, stop)
    days = period.get_segments("day")
    if args.exclude_month is not None:
        days.filter_month(args.exclude_month)

    # Processor Initialization
    # NOTE: This is only for later cases. Not much is done here at this
    #       point
    l2preproc = Level2PreProcessor(product_def)

    #    # Loop over iterations (one per day)
    for day in days:

        # Do some extra logging
        logger.info("Processing Day [%s]" % day.label)

        #        XXX: This needs a bit more thought
        #        # Product Data Management
        #        if args.remove_old:
        #            for output_handler in product_def.output_handler:
        #                output_handler.remove_old(day)

        # Get input files
        l2i_daily_files = l2i_handler.get_files_for_day(day.tcs.dt)
        if len(l2i_daily_files) == 0:
            logger.info("- no l2i products, skip day")
            continue
        logger.info("- Found %g l2i product files" % len(l2i_daily_files))

        # Process the orbits
        l2preproc.process_l2i_files(l2i_daily_files, day)

    # All done, log processor time
    t1 = time.clock()
    seconds = int(t1 - t0)
    logger.info("Run completed in %s" % str(timedelta(seconds=seconds)))
Exemple #8
0
def pysiral_l3proc():
    # parse command line arguments
    args = Level3ProcArgParser()
    args.parse_command_line_arguments()

    # Get start time of processor run
    t0 = time.clock()

    # --- Get the period segments for the Level-3 processor ---
    # NOTE: These depend on the chosen total time range and the duration period for the grid.
    period = DatePeriod(args.start, args.stop)
    if args.period == "custom":
        period_segments = [period]
        n_periods = 1
    else:
        period_segments = period.get_segments(args.period)
        n_periods = period_segments.n_periods

    # Get the output grid
    grid = Level3GridDefinition(args.l3_griddef)

    # Initialize the interface to the l2i products
    l2i_handler = L2iDataHandler(args.l2i_product_directory)

    # Initialize the output handler
    # Currently the overwrite protection is disabled per default
    output = []
    for l3_output_file in args.l3_output_file:
        output_handler = Level3OutputHandler(output_def=l3_output_file,
                                             base_directory=args.l3_product_basedir,
                                             period=args.period,
                                             doi=args.doi,
                                             data_record_type=args.data_record_type,
                                             overwrite_protection=False)
        output.append(output_handler)

    # Compile the product def
    product_def = Level3ProductDefinition(args.l3_settings_file, grid, output, period)

    # Initialize the Processor
    l3proc = Level3Processor(product_def)

    # Loop over all iterations
    for i, time_range in enumerate(period_segments):

        # Report processing period
        msg = "# Processing %s period (%g of %g): %s"
        msg = msg % (args.period, i+1, n_periods, time_range.date_label)
        logger.info(msg)

        # Retrieve files
        l2i_files = l2i_handler.get_files_from_time_range(time_range)
        logger.info("Num l2i files: %g" % len(l2i_files))
        if len(l2i_files) == 0:
            logger.info("Skip data period")
            continue

        # Start the Level-3 processing
        l3proc.process_l2i_files(l2i_files, time_range)

    # Final reporting
    t1 = time.clock()
    seconds = int(t1 - t0)
    logger.info("Run completed in %s" % str(timedelta(seconds=seconds)))
Exemple #9
0
 def get_time_range_ids(self, tcs, tce):
     time_range = DatePeriod(tcs, tce)
     return self.query_overlap(time_range.tcs.dt,
                               time_range.tce.dt,
                               return_value="ids")
Exemple #10
0
 def get_winter_time_range(start_year):
     winter_start_tuple = [start_year, 10]
     winter_end_tuple = [start_year + 1, 4]
     return DatePeriod(winter_start_tuple, winter_end_tuple)
Exemple #11
0
class Level1PreProcJobDef(DefaultLoggingClass):
    """ A class that contains the information for the Level-1 pre-processor JOB (not the pre-processor class!) """
    def __init__(self,
                 l1p_settings_id_or_file,
                 tcs,
                 tce,
                 exclude_month=None,
                 hemisphere="global",
                 platform=None,
                 output_handler_cfg=None,
                 source_repo_id=None):
        """
        The settings for the Level-1 pre-processor job
        :param l1p_settings_id_or_file: An id of an proc/l1 processor config file (filename excluding the .yaml
                                        extension) or an full filepath to a yaml config file
        :param tcs: [int list] Time coverage start (YYYY MM [DD])
        :param tce: [int list] Time coverage end (YYYY MM [DD]) [int list]
        :param exclude_month: [int list] A list of month that will be ignored
        :param hemisphere: [str] The target hemisphere (`north`, `south`, `global`:default).
        :param platform: [str] The target platform (pysiral id). Required if l1p settings files is valid for
                               multiple platforms (e.g. ERS-1/2, ...)
        :param output_handler_cfg: [dict] An optional dictionary with options of the output handler
                                   (`overwrite_protection`: [True, False], `remove_old`: [True, False])
        :param source_repo_id: [str] The tag in local_machine_def.yaml (l1b_repository.<platform>.<source_repo_id>)
                                  -> Overwrites the default source repo in the l1p settings
                                     (input_handler.options.local_machine_def_tag &
                                      output_handler.options.local_machine_def_tag)
        """

        super(Level1PreProcJobDef, self).__init__(self.__class__.__name__)
        self.error = ErrorStatus()

        # Get pysiral configuration
        # TODO: Move to global
        self._cfg = psrlcfg

        # Store command line options
        self._hemisphere = hemisphere
        self._platform = platform
        self._source_repo_id = source_repo_id

        # Parse the l1p settings file
        self.set_l1p_processor_def(l1p_settings_id_or_file)

        # Get full requested time range
        self._time_range = DatePeriod(tcs, tce)
        logger.info("Requested time range is %s" % self.time_range.label)

        # Store the data handler options
        if output_handler_cfg is None:
            output_handler_cfg = {}
        self._output_handler_cfg = output_handler_cfg

        # Measure execution time
        self.stopwatch = StopWatch()

    @classmethod
    def from_args(cls, args):
        """ Init the Processor Definition from the pysiral-l1preproc command line argument object """

        # Optional Keywords
        kwargs = {}
        if args.exclude_month is not None:
            kwargs["exclude_month"] = args.exclude_month
        data_handler_cfg = dict()
        data_handler_cfg["overwrite_protection"] = args.overwrite_protection
        data_handler_cfg["remove_old"] = args.remove_old
        if args.source_repo_id is not None:
            data_handler_cfg["local_machine_def_tag"] = args.source_repo_id
        kwargs["output_handler_cfg"] = data_handler_cfg
        kwargs["hemisphere"] = args.hemisphere
        kwargs["platform"] = args.platform
        kwargs["source_repo_id"] = args.source_repo_id

        # Return the initialized class
        return cls(args.l1p_settings, args.start_date, args.stop_date,
                   **kwargs)

    def set_l1p_processor_def(self, l1p_settings_id_or_file):
        """ Parse the content of the processor definition file """

        # 1. Resolve the absolute file path
        procdef_file_path = self.get_l1p_proc_def_filename(
            l1p_settings_id_or_file)

        # 2. Read the content
        logger.info("Parsing L1P processor definition file: %s" %
                    procdef_file_path)
        self._l1pprocdef = get_yaml_config(procdef_file_path)
        self._check_if_unambiguous_platform()

        # 3. Expand info (input data lookup directories)
        self._get_local_input_directory()

        # 4. update hemisphere for input adapter
        self._l1pprocdef.level1_preprocessor.options.polar_ocean.target_hemisphere = self.target_hemisphere

    def get_l1p_proc_def_filename(self, l1p_settings_id_or_file):
        """ Query pysiral config to obtain filename for processor definition file """

        # A. Check if already filename
        if Path(l1p_settings_id_or_file).is_file():
            return l1p_settings_id_or_file

        # B. Not a file, try to resolve filename via pysiral config
        filename = self.pysiral_cfg.get_settings_file("proc", "l1",
                                                      l1p_settings_id_or_file)
        if filename is None:
            msg = "Invalid Level-1 pre-processor definition filename or id: %s\n" % l1p_settings_id_or_file
            msg = msg + " \nRecognized Level-1 pre-processor definitions ids:\n"
            ids = self.pysiral_cfg.get_setting_ids("proc", "l1")
            for id in ids:
                msg = msg + "    - " + id + "\n"
            self.error.add_error("invalid-l1p-outputdef", msg)
            self.error.raise_on_error()
        return filename

    def _get_local_input_directory(self):
        """ Replace the tag for local machine def with the actual path info """

        input_handler_cfg = self.l1pprocdef.input_handler.options
        local_machine_def_tag = input_handler_cfg.local_machine_def_tag
        primary_input_def = self.pysiral_cfg.local_machine.l1b_repository
        platform, tag = self.platform, local_machine_def_tag

        # Overwrite the tag if specifically supplied
        if self._source_repo_id is not None:
            tag = self._source_repo_id

        # Get the value
        expected_branch_name = "root.l1b_repository.%s.%s" % (platform, tag)
        try:
            branch = AttrDict(primary_input_def[platform][tag])
        except KeyError:
            msg = "Missing definition in `local_machine_def.yaml`. Expected branch: %s"
            msg = msg % expected_branch_name
            self.error.add_error("local-machine-def-missing-tag", msg)
            self.error.raise_on_error()

        # Sanity Checks
        # TODO: Obsolete?
        if branch is None:
            msg = "Missing definition in `local_machine_def.yaml`. Expected branch: %s"
            msg = msg % expected_branch_name
            self.error.add_error("local-machine-def-missing-tag", msg)
            self.error.raise_on_error()

        # Validity checks
        # TODO: These checks are probably better located in a separate method?
        for key in ["source", "l1p"]:

            # 1. Branch must have specific keys for input and output
            if not key in branch:
                msg = "Missing definition in `local_machine_def.yaml`. Expected value: %s.%s"
                msg = msg % (expected_branch_name, key)
                self.error.add_error("local-machine-def-missing-tag", msg)
                self.error.raise_on_error()

            # 2. The value of each branch must be a valid directory or a
            #    attr (e.g. for different radar modes) with a list of directories
            directory_or_attrdict = branch[key]
            try:
                directories = directory_or_attrdict.values()
            except AttributeError:
                directories = [directory_or_attrdict]

            for directory in directories:
                if not Path(directory).is_dir():
                    msg = "Invalid directory in `local_machine_def.yaml`: %s is not a valid directory"
                    msg = msg % directory
                    self.error.add_error("local-machine-def-invalid-dir", msg)
                    self.error.raise_on_error()

        # Update the lookup dir parameter
        self.l1pprocdef.input_handler["options"]["lookup_dir"] = branch.source

    def _check_if_unambiguous_platform(self):
        """ Checks if the platform is unique, since some l1 processor definitions are valid for a series of
        platforms, such as ERS-1/2, Sentinel-3A/B, etc. The indicator is that the platform tag in the
        l1 preprocessor settings is comma separated list.

        For the location of the source data, it is however necessary that the exact platform is known. It must
        therefore be specified explicitly by the -platform argument """

        settings_is_ambigous = "," in self._l1pprocdef.platform
        platform_is_known = self.platform is not None

        # Test if platform is given if the settings file is valid for more than 1 platform
        if settings_is_ambigous and not platform_is_known:
            msg = "Error: platform in l1p settings is ambiguous (%s), but no platform has been given (-platform)"
            msg = msg % self._l1pprocdef.platform
            sys.exit(msg)

        # Test if platform provided matches the platform list in the settings file
        if settings_is_ambigous and platform_is_known:
            if not self.platform in str(self._l1pprocdef.platform):
                msg = "Error: platform in l1p settings (%s) and given platform (%s) do not match"
                msg = msg % (self._l1pprocdef.platform, self.platform)
                sys.exit(msg)

        # If platform in settings is unambigous, but not provided -> get platform from settings
        if not settings_is_ambigous and not platform_is_known:
            self._platform = self._l1pprocdef.platform
            logger.info("- get platform from l1p settings -> %s" %
                        self.platform)

    @property
    def hemisphere(self):
        return self._hemisphere

    @property
    def target_hemisphere(self):
        values = {
            "north": ["north"],
            "south": ["south"],
            "global": ["north", "south"]
        }
        return values[self.hemisphere]

    @property
    def pysiral_cfg(self):
        return self._cfg

    @property
    def l1pprocdef(self):
        return self._l1pprocdef

    @property
    def time_range(self):
        return self._time_range

    @property
    def period_segments(self):
        segments = self._time_range.get_segments("month", crop_to_period=True)
        return segments

    @property
    def output_handler_cfg(self):
        return self._output_handler_cfg

    @property
    def platform(self):
        return self._platform