Python prune_empty Exemples, met_util.prune_empty Python Exemples

Exemple #1

0

Afficher le fichier

    def run_all_times(self):
        cur_filename = sys._getframe().f_code.co_filename
        cur_function = sys._getframe().f_code.co_name
        init_time = datetime.datetime.strptime(self.init_beg, "%Y%m%d")
        end_time = datetime.datetime.strptime(self.init_end, "%Y%m%d")
        end_time = end_time + datetime.timedelta(hours=self.init_hour_end)

        # This is functionally equivalent to while loop below.
        # Get the desired YYYYMMDD_HH init increment list
        # init_list = util.gen_init_list(
        #    self.init_beg, self.init_end, self.init_hour_inc,
        #    str(self.init_hour_end))
        # while init_time in init_list
        #     self.run_at_time(init_time)

        # Loop from begYYYYMMDD to endYYYYMMDD incrementing by HH
        # and ending on the endYYYYMMDD_HH End Hour.
        while init_time <= end_time:
            self.run_at_time(init_time.strftime("%Y%m%d_%H"))
            init_time = init_time + datetime.timedelta(
                hours=self.init_hour_inc)

        # Remove any empty files and directories in the extract_tiles output
        # directory
        util.prune_empty(self.filtered_out_dir, self.logger)

        msg = ("INFO|[" + cur_function + ":" + cur_filename +
               "] | Finished extract tiles")
        self.logger.info(msg)

Exemple #2

0

Afficher le fichier

    def run_at_time(self, cur_init):
        """!Get TC-paris data then regrid tiles centered on the storm.

        Get TC-pairs track data and GFS model data, do any necessary
        processing then regrid the forecast and analysis files to a
        30 x 30 degree tile centered on the storm.
        Args:

        Returns:

            None: invokes regrid_data_plane to create a netCDF file from two
                    extratropical storm track files.
        """
        # pylint:disable=protected-access
        # Need to call sys.__getframe() to get the filename and method/func
        # for logging information.
        # Used in logging
        cur_filename = sys._getframe().f_code.co_filename
        cur_function = sys._getframe().f_code.co_name

        # get the process id to be used to identify the output
        # amongst different users and runs.
        cur_pid = str(os.getpid())
        tmp_dir = os.path.join(self.config.getdir('TMP_DIR'), cur_pid)
        msg = ("INFO|[" + cur_filename + ":" + cur_function + "]"
               "|Begin extract tiles")
        self.logger.info(msg)

        # Check that there are tc_pairs data which are used as input
        if util.is_dir_empty(self.tc_pairs_dir):
            msg = ("ERROR|[" + cur_filename + ":" + cur_function + "]"
                   "|No tc pairs data found at " + self.tc_pairs_dir +
                   "Exiting...")
            self.logger.error(msg)
            sys.exit(1)

        # Logging output: TIME UTC |TYPE (DEBUG, INFO, WARNING, etc.) |
        # [File : function]| Message logger.info("INFO |  [" +
        # cur_filename +  ":" + "cur_function] |" + "BEGIN extract_tiles")
        # Process TC pairs by initialization time
        # Begin processing for initialization time, cur_init
        year_month = util.extract_year_month(cur_init, self.logger)

        # Create the name of the filter file we need to find.  If
        # the file doesn't exist, then run TC_STAT
        filter_filename = "filter_" + cur_init + ".tcst"
        filter_name = os.path.join(self.filtered_out_dir, cur_init,
                                   filter_filename)

        if util.file_exists(filter_name) and not self.overwrite_flag:
            msg = ("DEBUG| [" + cur_filename + ":" + cur_function +
                   " ] | Filter file exists, using Track data file: " +
                   filter_name)
            self.logger.debug(msg)
        else:
            # Create the storm track by applying the
            # filter options defined in the config/param file.
            tile_dir_parts = [self.tc_pairs_dir, "/", year_month]
            tile_dir = ''.join(tile_dir_parts)
            # Use TcStatWrapper to build up the tc_stat command and invoke
            # the MET tool tc_stat to perform the filtering.
            tcs = TcStatWrapper(self.config)
            tcs.build_tc_stat(self.filtered_out_dir, cur_init,
                              tile_dir, self.addl_filter_opts)

            # Remove any empty files and directories that can occur
            # from filtering.
            util.prune_empty(filter_name, self.logger)

        # Now get unique storm ids from the filter file,
        # filter_yyyymmdd_hh.tcst
        sorted_storm_ids = util.get_storm_ids(filter_name, self.logger)

        # Check for empty sorted_storm_ids, if empty,
        # continue to the next time.
        if not sorted_storm_ids:
            # No storms found for init time, cur_init
            msg = ("DEBUG|[" + cur_filename + ":" + cur_function + " ]|" +
                   "No storms were found for " + cur_init +
                   "...continue to next in list")
            self.logger.debug(msg)
            return

        # Process each storm in the sorted_storm_ids list
        # Iterate over each filter file in the output directory and
        # search for the presence of the storm id.  Store this
        # corresponding row of data into a temporary file in the
        # /tmp/<pid> directory.
        for cur_storm in sorted_storm_ids:
            storm_output_dir = os.path.join(self.filtered_out_dir,
                                            cur_init, cur_storm)
            header = open(filter_name, "r").readline()
            util.mkdir_p(storm_output_dir)
            util.mkdir_p(tmp_dir)
            tmp_filename = "filter_" + cur_init + "_" + cur_storm
            full_tmp_filename = os.path.join(tmp_dir, tmp_filename)

            storm_match_list = util.grep(cur_storm, filter_name)
            with open(full_tmp_filename, "a+") as tmp_file:
                # copy over header information
                tmp_file.write(header)
                for storm_match in storm_match_list:
                    tmp_file.write(storm_match)

            # Perform regridding of the forecast and analysis files
            # to an n X n degree tile centered on the storm (dimensions
            # are indicated in the config/param file).
            util.retrieve_and_regrid(full_tmp_filename, cur_init,
                                     cur_storm, self.filtered_out_dir,
                                     self.logger, self.config)

        # end of for cur_storm

        # Remove any empty files and directories in the extract_tiles output
        # directory
        util.prune_empty(self.filtered_out_dir, self.logger)

        # Clean up the tmp directory if it exists
        if os.path.isdir(tmp_dir):
            util.rmtree(tmp_dir)
            msg = ("INFO|[" + cur_function + ":" + cur_filename + "]"
                   "| Finished extract tiles")
            self.logger.info(msg)

Exemple #3

0

Afficher le fichier

    def run_at_time(self, input_dict):
        """!Get TC-paris data then regrid tiles centered on the storm.

        Get TC-pairs track data and GFS model data, do any necessary
        processing then regrid the forecast and analysis files to a
        30 x 30 degree tile centered on the storm.
        Args:
            input_dict:  Time dictionary
        Returns:

            None: invokes regrid_data_plane to create a netCDF file from two
                    extratropical storm track files.
        """
        time_info = time_util.ti_calculate(input_dict)
        init_time = time_info['init_fmt']

        # get the process id to be used to identify the output
        # amongst different users and runs.
        cur_pid = str(os.getpid())
        tmp_dir = os.path.join(self.config.getdir('TMP_DIR'), cur_pid)
        self.logger.info("Begin extract tiles")

        cur_init = init_time[0:8] + "_" + init_time[8:10]

        # Check that there are tc_pairs data which are used as input
        if util.is_dir_empty(self.tc_pairs_dir):
            self.logger.error("No tc pairs data found at {}"\
                              .format(self.tc_pairs_dir))
            sys.exit(1)

        # Create the name of the filter file we need to find.  If
        # the file doesn't exist, then run TC_STAT
        filter_filename = "filter_" + cur_init + ".tcst"
        filter_name = os.path.join(self.filtered_out_dir, cur_init,
                                   filter_filename)

        if util.file_exists(filter_name) and not self.overwrite_flag:
            self.logger.debug("Filter file exists, using Track data file: {}"\
                              .format(filter_name))
        else:
            # Create the storm track by applying the
            # filter options defined in the config/param file.
            # Use TcStatWrapper to build up the tc_stat command and invoke
            # the MET tool tc_stat to perform the filtering.
            tiles_list = util.get_files(self.tc_pairs_dir, ".*tcst",
                                        self.logger)
            tiles_list_str = ' '.join(tiles_list)

            tcs = TcStatWrapper(self.config, self.logger)
            tcs.build_tc_stat(self.filtered_out_dir, cur_init, tiles_list_str,
                              self.addl_filter_opts)

            # Remove any empty files and directories that can occur
            # from filtering.
            util.prune_empty(filter_name, self.logger)

        # Now get unique storm ids from the filter file,
        # filter_yyyymmdd_hh.tcst
        sorted_storm_ids = util.get_storm_ids(filter_name, self.logger)

        # Check for empty sorted_storm_ids, if empty,
        # continue to the next time.
        if not sorted_storm_ids:
            # No storms found for init time, cur_init
            msg = "No storms were found for {} ...continue to next in list"\
              .format(cur_init)
            self.logger.debug(msg)
            return

        # Process each storm in the sorted_storm_ids list
        # Iterate over each filter file in the output directory and
        # search for the presence of the storm id.  Store this
        # corresponding row of data into a temporary file in the
        # /tmp/<pid> directory.
        for cur_storm in sorted_storm_ids:
            storm_output_dir = os.path.join(self.filtered_out_dir, cur_init,
                                            cur_storm)
            header = open(filter_name, "r").readline()
            util.mkdir_p(storm_output_dir)
            util.mkdir_p(tmp_dir)
            tmp_filename = "filter_" + cur_init + "_" + cur_storm
            full_tmp_filename = os.path.join(tmp_dir, tmp_filename)

            storm_match_list = util.grep(cur_storm, filter_name)
            with open(full_tmp_filename, "a+") as tmp_file:
                # copy over header information
                tmp_file.write(header)
                for storm_match in storm_match_list:
                    tmp_file.write(storm_match)

            # Perform regridding of the forecast and analysis files
            # to an n X n degree tile centered on the storm (dimensions
            # are indicated in the config/param file).
            feature_util.retrieve_and_regrid(full_tmp_filename, cur_init,
                                             cur_storm, self.filtered_out_dir,
                                             self.config)

        # end of for cur_storm

        # Remove any empty files and directories in the extract_tiles output
        # directory
        util.prune_empty(self.filtered_out_dir, self.logger)

        # Clean up the tmp directory if it exists
        if os.path.isdir(tmp_dir):
            util.rmtree(tmp_dir)

Exemple #4

0

Afficher le fichier

    def create_fcst_anly_to_ascii_file(self, fcst_anly_grid_files, cur_init,
                                       cur_storm, fcst_anly_filename_base):
        """! Create ASCII file for either the FCST or ANLY files that are
             aggregated based on init time and storm id.

        Args:
            fcst_anly_grid_files:       A list of the FCST or ANLY gridded
                                        files under consideration.

            cur_init:                  The initialization time of interest

            cur_storm:                 The storm id of interest

            fcst_anly_filename_base:   The base name of the ASCII file
                                        (either ANLY_ASCII_FILES_ or
                                        FCST_ASCII_FILES_ which will be
                                        appended with the storm id.

        Returns:
            None:                      Creates an ASCII file containing a list
                                        of either FCST or ANLY files based on
                                        init time and storm id.
        """

        # pylint:disable=protected-access
        # Need to call sys.__getframe() to get the filename and method/func
        # for logging information.

        # For logging
        cur_filename = sys._getframe().f_code.co_filename
        cur_function = sys._getframe().f_code.co_name

        # Create an ASCII file containing a list of all
        # the fcst or analysis tiles.
        fcst_anly_ascii_fname_parts = [fcst_anly_filename_base, cur_storm]
        fcst_anly_ascii_fname = ''.join(fcst_anly_ascii_fname_parts)
        fcst_anly_ascii_dir = os.path.join(self.series_out_dir, cur_init,
                                           cur_storm)
        util.mkdir_p(fcst_anly_ascii_dir)
        fcst_anly_ascii = os.path.join(fcst_anly_ascii_dir,
                                       fcst_anly_ascii_fname)

        # Sort the files in the fcst_anly_grid_files list.
        sorted_fcst_anly_grid_files = sorted(fcst_anly_grid_files)
        tmp_param = ''
        for cur_fcst_anly in sorted_fcst_anly_grid_files:
            # Write out the files that pertain to this storm and
            # don't write if already in tmp_param.
            if cur_fcst_anly not in tmp_param and cur_storm in cur_fcst_anly:
                tmp_param += cur_fcst_anly
                tmp_param += '\n'
        # Now create the fcst or analysis ASCII file
        try:
            with open(fcst_anly_ascii, 'a') as filehandle:
                filehandle.write(tmp_param)
        except IOError:
            msg = ("Could not create requested ASCII file:  " +
                   fcst_anly_ascii)
            self.logger.error(msg)

        if os.stat(fcst_anly_ascii).st_size == 0:
            # Just in case there are any empty fcst ASCII or anly ASCII files
            # at this point,
            # explicitly remove them (and any resulting empty directories)
            #  so they don't cause any problems with further processing
            # steps.
            util.prune_empty(fcst_anly_ascii_dir, self.logger)

Exemple #5

0

Afficher le fichier

    def run_all_times(self):
        """! Invoke the series analysis script based on
            the init time in the format YYYYMMDD_hh

            Args:

            Returns:
                None:  Creates graphical plots of storm tracks
        """
        # pylint:disable=protected-access
        # Need to call sys.__getframe() to get the filename and method/func
        # for logging information.

        # Used for logging.
        cur_filename = sys._getframe().f_code.co_filename
        cur_function = sys._getframe().f_code.co_name
        self.logger.info("Starting series analysis by init time")

        # Set up the environment variable to be used in the Series Analysis
        #   Config file (SERIES_ANALYSIS_BY_INIT_CONFIG_FILE)
        # Used to set cnt  value in output_stats in
        # "SERIES_ANALYSIS_BY_INIT_CONFIG_FILE"
        # Need to do some pre-processing so that Python will use " and not '
        #  because currently MET doesn't support single-quotes
        tmp_stat_string = str(self.stat_list)
        tmp_stat_string = tmp_stat_string.replace("\'", "\"")

        # For example, we want tmp_stat_string to look like
        #   '["TOTAL","FBAR"]', NOT "['TOTAL','FBAR']"
        os.environ['STAT_LIST'] = tmp_stat_string
        self.add_env_var('STAT_LIST', tmp_stat_string)

        series_filter_opts = \
            self.config.getstr('config', 'SERIES_ANALYSIS_FILTER_OPTS')

        if self.regrid_with_met_tool:
            # Regridding via MET Tool regrid_data_plane.
            fcst_tile_regex = self.config.getstr('regex_pattern',
                                                 'FCST_NC_TILE_REGEX')
            anly_tile_regex = self.config.getstr('regex_pattern',
                                                 'ANLY_NC_TILE_REGEX')
        else:
            # Regridding via wgrib2 tool.
            fcst_tile_regex = self.config.getstr('regex_pattern',
                                                 'FCST_TILE_REGEX')
            anly_tile_regex = self.config.getstr('regex_pattern',
                                                 'ANLY_TILE_REGEX')
        # Initialize the tile_dir to point to the extract_tiles_dir.
        # And retrieve a list of init times based on the data available in
        # the extract tiles directory.
        tile_dir = self.extract_tiles_dir
        init_times = util.get_updated_init_times(tile_dir, self.logger)

        # Check for input tile data.
        try:
            util.check_for_tiles(tile_dir, fcst_tile_regex, anly_tile_regex,
                                 self.logger)
        except OSError:
            msg = ("Missing n x m tile files.  " +
                   "Extract tiles needs to be run")
            self.logger.error(msg)

        # If applicable, apply any filtering via tc_stat, as indicated in the
        # parameter/config file.
        tmp_dir = os.path.join(self.config.getdir('TMP_DIR'), str(os.getpid()))
        if series_filter_opts:
            self.apply_series_filters(tile_dir, init_times,
                                      self.series_filtered_out_dir,
                                      self.filter_opts, tmp_dir)

            # Clean up any empty files and directories that could arise as
            # a result of filtering
            util.prune_empty(self.series_filtered_out_dir, self.logger)

            # Get the list of all the files that were created as a result
            # of applying the filter options.
            # First, make sure that the series_lead_filtered_out
            # directory isn't empty.  If so, then no files fall within the
            # filter criteria.
            if os.listdir(self.series_filtered_out_dir):
                # The series filter directory has data, use this directory as
                # input for series analysis.
                tile_dir = self.series_filtered_out_dir

                # Generate the tmp_anly and tmp_fcst files used to validate
                # filtering and for troubleshooting
                # The tmp_fcst and tmp_anly ASCII files contain the
                # list of files that meet the filter criteria.
                filtered_dirs_list = util.get_files(tile_dir, ".*.",
                                                    self.logger)
                util.create_filter_tmp_files(filtered_dirs_list,
                                             self.series_filtered_out_dir,
                                             self.logger)

            else:
                # Applying the filter produced no results.  Rather than
                # stopping, continue by using the files from extract_
                # tiles as input.
                msg = ("Applied series filter options, no results..." +
                       "using extract tiles data for series analysis input.")
                self.logger.debug(msg)
                tile_dir = self.extract_tiles_dir

        else:
            # No additional filtering was requested.
            # Use the data in the extract tiles directory
            # as input for series analysis.
            # source of input tile data.
            tile_dir = self.extract_tiles_dir

        # Create FCST and ANLY ASCII files based on init time and storm id.
        # These are arguments to the
        # -fcst and -obs arguments to the MET Tool series_analysis.
        # First, get an updated list of init times,
        # since filtering can reduce the amount of init times.
        sorted_filter_init = self.get_ascii_storm_files_list(tile_dir)

        # Clean up any remaining empty files and dirs
        util.prune_empty(self.series_out_dir, self.logger)
        self.logger.debug("Finished creating FCST and ANLY ASCII files, and " +
                          "cleaning empty files and dirs")

        # Build up the arguments to and then run the MET tool series_analysis.
        self.build_and_run_series_request(sorted_filter_init, tile_dir)

        # Generate plots
        # Check for .nc files in output_dir first, if these are absent, the
        # there is a problem.
        if self.is_netcdf_created():
            self.generate_plots(sorted_filter_init, tile_dir)
        else:
            self.logger.error("No NetCDF files were created by"
                              " series_analysis, exiting...")
            sys.exit(errno.ENODATA)
        self.logger.info("Finished series analysis by init time")

Exemple #6

0

Afficher le fichier

    def get_ascii_storm_files_list(self, tile_dir):
        """! Creates the list of ASCII files that contain the storm id and init
             times.  The list is used to create an ASCII file which will be
             used as the option to the -obs or -fcst flag to the MET
             series_analysis tool.
             Args:
                   @param tile_dir:  The directory where input files reside.
             Returns:
                   sorted_filter_init:  A list of the sorted directories
                                        corresponding to the init times after
                                        filtering has been applied.  If
                                        filtering produced no results, this
                                        is the list of files created from
                                        running extract_tiles.
        """

        # pylint:disable=protected-access
        # Need to call sys.__getframe() to get the filename and method/func
        # for logging information.
        # For logging
        cur_filename = sys._getframe().f_code.co_filename
        cur_function = sys._getframe().f_code.co_name

        filter_init_times = util.get_updated_init_times(tile_dir, self.logger)
        sorted_filter_init = sorted(filter_init_times)

        for cur_init in sorted_filter_init:
            # Get all the storm ids for storm track pairs that
            # correspond to this init time.
            storm_list = self.get_storms_for_init(cur_init, tile_dir)
            if not storm_list:
                # No storms for this init time,
                # check next init time in list
                continue
            else:
                for cur_storm in storm_list:
                    # First get the filenames for the gridded forecast and
                    # analysis (n deg x m deg tiles that were created by
                    # extract_tiles). These files are aggregated by
                    # init time and storm id.
                    anly_grid_regex = ".*ANLY_TILE_F.*grb2"
                    fcst_grid_regex = ".*FCST_TILE_F.*grb2"

                    if self.regrid_with_met_tool:
                        anly_grid_regex = ".*ANLY_TILE_F.*nc"
                        fcst_grid_regex = ".*FCST_TILE_F.*nc"

                    anly_grid_files = util.get_files(tile_dir, anly_grid_regex,
                                                     self.logger)
                    fcst_grid_files = util.get_files(tile_dir, fcst_grid_regex,
                                                     self.logger)

                    # Now do some checking to make sure we aren't
                    # missing either the forecast or
                    # analysis files, if so log the error and proceed to next
                    # storm in the list.
                    if not anly_grid_files or not fcst_grid_files:
                        # No gridded analysis or forecast
                        # files found, continue
                        self.logger.info("no gridded analysis or forecast " +
                                         "file found, continue to next storm")
                        continue

                    # Now create the FCST and ANLY ASCII files based on
                    # cur_init and cur_storm:
                    self.create_fcst_anly_to_ascii_file(
                        fcst_grid_files, cur_init, cur_storm,
                        self.fcst_ascii_file_prefix)
                    self.create_fcst_anly_to_ascii_file(
                        anly_grid_files, cur_init, cur_storm,
                        self.anly_ascii_file_prefix)
                    util.prune_empty(self.series_out_dir, self.logger)
        return sorted_filter_init

Exemple #7

0

Afficher le fichier

    def apply_series_filters(self, tile_dir, init_times, series_output_dir,
                             filter_opts, temporary_dir):
        """! Apply filter options, as specified in the
            param/config file.
            Args:
               @param tile_dir:  Directory where input data files reside.
                                 e.g. data which we will be applying our filter
                                 criteria.
               @param init_times:  List of init times that define the
                                   input data.
               @param series_output_dir:  The directory where the filter results
                                          will be stored.
               @param filter_opts:  The filter options to apply
               @param temporary_dir:  The temporary directory where intermediate
                                      files are saved.
            Returns:
                None
        """
        # pylint: disable=too-many-arguments
        # Seven input arguments are needed to perform filtering.

        # pylint:disable=protected-access
        # Need to call sys.__getframe() to get the filename and method/func
        # for logging information.

        # Useful for logging
        cur_filename = sys._getframe().f_code.co_filename
        cur_function = sys._getframe().f_code.co_name

        # Create temporary directory where intermediate files are saved.
        cur_pid = str(os.getpid())
        tmp_dir = os.path.join(temporary_dir, cur_pid)
        self.logger.debug("creating tmp dir: " + tmp_dir)

        for cur_init in init_times:
            # Call the tc_stat wrapper to build up the command and invoke
            # the MET tool tc_stat.
            filter_file = "filter_" + cur_init + ".tcst"
            filter_filename = os.path.join(series_output_dir, cur_init,
                                           filter_file)

            tcs = TcStatWrapper(self.config, self.logger)
            tcs.build_tc_stat(series_output_dir, cur_init, tile_dir,
                              filter_opts)

            # Check that the filter.tcst file isn't empty. If
            # it is, then use the files from extract_tiles as
            # input (tile_dir = extract_out_dir)
            if not util.file_exists(filter_filename):
                msg = ("Non-existent filter file, filter " +
                       " Never created by MET Tool tc_stat.")
                self.logger.debug(msg)
                continue
            elif os.stat(filter_filename).st_size == 0:
                msg = ("Empty filter file, filter " +
                       " options yield nothing.")
                self.logger.debug(msg)
                continue
            else:
                # Now retrieve the files corresponding to these
                # storm ids that resulted from filtering.
                sorted_storm_ids = util.get_storm_ids(filter_filename,
                                                      self.logger)

                # Retrieve the header from filter_filename to be used in
                # creating the temporary files.
                with open(filter_filename, 'r') as filter_file:
                    header = filter_file.readline()

                for cur_storm in sorted_storm_ids:
                    msg = ("Processing storm: " + cur_storm + " for file: " +
                           filter_filename)
                    self.logger.debug(msg)
                    storm_output_dir = os.path.join(series_output_dir,
                                                    cur_init, cur_storm)
                    util.mkdir_p(storm_output_dir)
                    util.mkdir_p(tmp_dir)
                    tmp_file = "filter_" + cur_init + "_" + cur_storm
                    tmp_filename = os.path.join(tmp_dir, tmp_file)
                    storm_match_list = util.grep(cur_storm, filter_filename)
                    with open(tmp_filename, "a+") as tmp_file:
                        tmp_file.write(header)
                        for storm_match in storm_match_list:
                            tmp_file.write(storm_match)

                    # Create the analysis and forecast files based
                    # on the storms (defined in the tmp_filename created above)
                    # Store the analysis and forecast files in the
                    # series_output_dir.
                    feature_util.retrieve_and_regrid(tmp_filename, cur_init,
                                                     cur_storm,
                                                     series_output_dir,
                                                     self.config)

        # Check for any empty files and directories and remove them to avoid
        # any errors or performance degradation when performing
        # series analysis.
        util.prune_empty(series_output_dir, self.logger)

        # Clean up the tmp dir
        util.rmtree(tmp_dir)