def get_one_field_info(self, v_name, v_level, v_extra, v_thresh, d_type): """! Builds the FCST_FIELD or OBS_FIELD items that are sent to the mode config file Overrides get_one_field_info in compare_gridded_wrappers.py Args: @param v_name var_info name @param v_level var_info level @param v_extra var_info extra arguments @param v_thresh probability threshold @param d_type type of data (FCST or OBS) @return returns a string with field info """ level_type, level = util.split_level(v_level) field = "" if self.c_dict[d_type + '_IS_PROB']: thresh_str = "" comparison = util.get_comparison_from_threshold(v_thresh) number = util.get_number_from_threshold(v_thresh) if comparison in ["gt", "ge", ">", ">="]: thresh_str += "thresh_lo=" + str(number) + ";" elif comparison in ["lt", "le", "<", "<="]: thresh_str += "thresh_hi=" + str(number) + ";" if self.c_dict[d_type+'_INPUT_DATATYPE'] == "NETCDF" or \ self.c_dict[d_type+'_INPUT_DATATYPE'] == "GEMPAK": field = "{ name=\"" + v_name + "\"; level=\"" + \ level+"\"; prob=TRUE; " else: field = "{ name=\"PROB\"; level=\""+level_type + \ level.zfill(2) + "\"; prob={ name=\"" + \ v_name + "\"; " + thresh_str + "} " else: if self.config.getbool('config', d_type + '_PCP_COMBINE_RUN', False): field = "{ name=\""+v_name+"_"+level + \ "\"; level=\"(*,*)\"; " else: field = "{ name=\""+v_name + \ "\"; level=\""+v_level+"\"; " field += v_extra + " }" return field
def process_fields_one_thresh(self, time_info, var_info, model_path, obs_path): """! For each threshold, set up environment variables and run mode Args: @param time_info dictionary containing timing information @param var_info object containing variable information @param model_path forecast file @param obs_path observation file """ # if no thresholds are specified, run once fcst_thresh_list = [None] obs_thresh_list = [None] if len(var_info['fcst_thresh']) != 0: fcst_thresh_list = var_info['fcst_thresh'] obs_thresh_list = var_info['obs_thresh'] elif self.c_dict['FCST_IS_PROB']: self.logger.error('Must specify field threshold value to '+\ 'process probabilistic forecast') return for fthresh, othresh in zip(fcst_thresh_list, obs_thresh_list): self.param = self.c_dict['CONFIG_FILE'] self.create_and_set_output_dir(time_info) self.infiles.append(model_path) self.infiles.append(obs_path) self.add_merge_config_file() fcst_field = self.get_one_field_info(var_info['fcst_name'], var_info['fcst_level'], var_info['fcst_extra'], fthresh, 'FCST') obs_field = self.get_one_field_info(var_info['obs_name'], var_info['obs_level'], var_info['obs_extra'], othresh, 'OBS') print_list = [ "MODEL", "FCST_VAR", "OBS_VAR", "LEVEL", "OBTYPE", "CONFIG_DIR", "FCST_FIELD", "OBS_FIELD", "QUILT", "MET_VALID_HHMM", "FCST_CONV_RADIUS", "FCST_CONV_THRESH", "OBS_CONV_RADIUS", "OBS_CONV_THRESH", "FCST_MERGE_THRESH", "FCST_MERGE_FLAG", "OBS_MERGE_THRESH", "OBS_MERGE_FLAG" ] self.add_env_var("MODEL", self.c_dict['MODEL']) self.add_env_var("OBTYPE", self.c_dict['OBTYPE']) self.add_env_var("FCST_VAR", var_info['fcst_name']) self.add_env_var("OBS_VAR", var_info['obs_name']) self.add_env_var("LEVEL", util.split_level(var_info['fcst_level'])[1]) self.add_env_var("FCST_FIELD", fcst_field) self.add_env_var("OBS_FIELD", obs_field) self.add_env_var("CONFIG_DIR", self.c_dict['CONFIG_DIR']) self.add_env_var("MET_VALID_HHMM", time_info['valid_fmt'][4:8]) quilt = 'TRUE' if self.c_dict['QUILT'] else 'FALSE' self.add_env_var("QUILT", quilt) self.add_env_var("FCST_CONV_RADIUS", self.c_dict["FCST_CONV_RADIUS"]) self.add_env_var("OBS_CONV_RADIUS", self.c_dict["OBS_CONV_RADIUS"]) self.add_env_var("FCST_CONV_THRESH", self.c_dict["FCST_CONV_THRESH"]) self.add_env_var("OBS_CONV_THRESH", self.c_dict["OBS_CONV_THRESH"]) self.add_env_var("FCST_MERGE_THRESH", self.c_dict["FCST_MERGE_THRESH"]) self.add_env_var("OBS_MERGE_THRESH", self.c_dict["OBS_MERGE_THRESH"]) self.add_env_var("FCST_MERGE_FLAG", self.c_dict["FCST_MERGE_FLAG"]) self.add_env_var("OBS_MERGE_FLAG", self.c_dict["OBS_MERGE_FLAG"]) # add additional env vars if they are specified if self.c_dict['VERIFICATION_MASK'] != '': self.add_env_var('VERIF_MASK', self.c_dict['VERIFICATION_MASK']) print_list.append('VERIF_MASK') self.logger.debug("ENVIRONMENT FOR NEXT COMMAND: ") self.print_user_env_items() for item in print_list: self.print_env_item(item) self.logger.debug("COPYABLE ENVIRONMENT FOR NEXT COMMAND: ") self.print_env_copy(print_list) cmd = self.get_command() if cmd is None: self.logger.error("Could not generate command") return self.build() self.clear()
def process_fields_one_thresh(self, time_info, var_info, model_path, obs_path): """! For each threshold, set up environment variables and run mode Args: @param time_info dictionary containing timing information @param var_info object containing variable information @param model_path forecast file list path @param obs_path observation file list path """ # if no thresholds are specified, run once fcst_thresh_list = [0] obs_thresh_list = [0] if len(var_info['fcst_thresh']) != 0: fcst_thresh_list = var_info['fcst_thresh'] obs_thresh_list = var_info['obs_thresh'] for fthresh, othresh in zip(fcst_thresh_list, obs_thresh_list): self.param = self.c_dict['CONFIG_FILE'] self.create_and_set_output_dir(time_info) print_list = [ 'MIN_VOLUME', 'MODEL', 'FCST_VAR', 'OBTYPE', 'OBS_VAR', 'LEVEL', 'CONFIG_DIR', 'MET_VALID_HHMM', 'FCST_FIELD', 'OBS_FIELD', 'FCST_CONV_RADIUS', 'FCST_CONV_THRESH', 'OBS_CONV_RADIUS', 'OBS_CONV_THRESH' ] self.add_env_var("MIN_VOLUME", self.c_dict["MIN_VOLUME"] ) self.add_env_var("MODEL", self.c_dict['MODEL']) self.add_env_var("FCST_VAR", var_info['fcst_name']) self.add_env_var("OBTYPE", self.c_dict['OBTYPE']) self.add_env_var("OBS_VAR", var_info['obs_name']) self.add_env_var("LEVEL", util.split_level(var_info['fcst_level'])[1]) self.add_env_var("CONFIG_DIR", self.c_dict['CONFIG_DIR']) self.add_env_var("MET_VALID_HHMM", time_info['valid_fmt'][4:8]) # single mode - set fcst file, field, etc. if self.c_dict['SINGLE_RUN']: if self.c_dict['SINGLE_DATA_SRC'] == 'OBS': self.set_fcst_file(obs_path) obs_field = self.get_one_field_info(var_info['obs_name'], var_info['obs_level'], var_info['obs_extra'], othresh, 'OBS') self.add_env_var("FCST_FIELD", obs_field) self.add_env_var("OBS_FIELD", obs_field) self.add_env_var("OBS_CONV_RADIUS", self.c_dict["OBS_CONV_RADIUS"] ) self.add_env_var("FCST_CONV_RADIUS", self.c_dict["OBS_CONV_RADIUS"] ) self.add_env_var("OBS_CONV_THRESH", self.c_dict["OBS_CONV_THRESH"] ) self.add_env_var("FCST_CONV_THRESH", self.c_dict["OBS_CONV_THRESH"] ) else: self.set_fcst_file(model_path) fcst_field = self.get_one_field_info(var_info['fcst_name'], var_info['fcst_level'], var_info['fcst_extra'], fthresh, 'FCST') self.add_env_var("FCST_FIELD", fcst_field) self.add_env_var("OBS_FIELD", fcst_field) self.add_env_var("FCST_CONV_RADIUS", self.c_dict["FCST_CONV_RADIUS"] ) self.add_env_var("OBS_CONV_RADIUS", self.c_dict["FCST_CONV_RADIUS"] ) self.add_env_var("FCST_CONV_THRESH", self.c_dict["FCST_CONV_THRESH"] ) self.add_env_var("OBS_CONV_THRESH", self.c_dict["FCST_CONV_THRESH"] ) else: self.set_fcst_file(model_path) self.set_obs_file(obs_path) self.add_env_var("FCST_CONV_RADIUS", self.c_dict["FCST_CONV_RADIUS"] ) self.add_env_var("FCST_CONV_THRESH", self.c_dict["FCST_CONV_THRESH"] ) self.add_env_var("OBS_CONV_RADIUS", self.c_dict["OBS_CONV_RADIUS"] ) self.add_env_var("OBS_CONV_THRESH", self.c_dict["OBS_CONV_THRESH"] ) fcst_field = self.get_one_field_info(var_info['fcst_name'], var_info['fcst_level'], var_info['fcst_extra'], fthresh, 'FCST') obs_field = self.get_one_field_info(var_info['obs_name'], var_info['obs_level'], var_info['obs_extra'], othresh, 'OBS') self.add_env_var("FCST_FIELD", fcst_field) self.add_env_var("OBS_FIELD", obs_field) self.logger.debug("ENVIRONMENT FOR NEXT COMMAND: ") self.print_user_env_items() for l in print_list: self.print_env_item(l) self.logger.debug("COPYABLE ENVIRONMENT FOR NEXT COMMAND: ") self.print_env_copy(print_list) cmd = self.get_command() if cmd is None: self.logger.error(self.app_name + " could not generate command") return self.build() self.clear()
def setup_subtract_method(self, time_info, var_info, rl): """!Setup pcp_combine to subtract two files to build desired accumulation Args: @param ti time_info object containing timing information @param v var_info object containing variable information @params rl data type (FCST or OBS) @rtype string @return path to output file""" self.clear() in_dir, in_template = self.get_dir_and_template(rl, 'INPUT') out_dir, out_template = self.get_dir_and_template(rl, 'OUTPUT') if rl == 'FCST': accum = var_info.fcst_level else: accum = var_info.obs_level level_type, accum = util.split_level(accum) lead = time_info['lead_hours'] lead2 = lead - int(accum) outSts = sts.StringSub(self.logger, out_template, level=(int(accum) * 3600), **time_info) out_file = outSts.doStringSub() self.outfile = out_file self.outdir = out_dir self.set_method("SUBTRACT") pcpSts1 = sts.StringSub(self.logger, in_template, level=(int(accum) * 3600), **time_info) file1 = os.path.join(in_dir, pcpSts1.doStringSub()) file1 = util.preprocess_file(file1, self.c_dict[rl+'_INPUT_DATATYPE'], self.config) if file1 is None: self.logger.error("Could not find file in {} for init time {} and lead {}" .format(in_dir, time_info['init_fmt'], lead)) return None # if level type is A (accum) and second lead is 0, then # run PcpCombine in -add mode with just the first file if lead2 == 0 and level_type == 'A': self.set_method("ADD") self.add_input_file(file1, lead) return self.get_command() # set time info for second lead input_dict2 = { 'init' : time_info['init'], 'lead_hours' : lead2 } time_info2 = time_util.ti_calculate(input_dict2) pcpSts2 = sts.StringSub(self.logger, in_template, level=(int(accum) * 3600), **time_info2) file2 = os.path.join(in_dir, pcpSts2.doStringSub()) file2 = util.preprocess_file(file2, self.c_dict[rl+'_INPUT_DATATYPE'], self.config) if file2 is None: self.logger.error("Could not find file in {} for init time {} and lead {}" .format(in_dir, init_time, lead2)) return None self.add_input_file(file1,lead) self.add_input_file(file2,lead2) return self.get_command()
def find_data(self, time_info, var_info, data_type): """! Finds the data file to compare Args: @param time_info dictionary containing timing information @param var_info object containing variable information @param data_type type of data to find (FCST or OBS) @rtype string @return Returns the path to an observation file """ # get time info valid_time = time_info['valid_fmt'] if var_info is not None: # set level based on input data type if data_type.startswith("OBS"): v_level = var_info['obs_level'] else: v_level = var_info['fcst_level'] # separate character from beginning of numeric level value if applicable level = util.split_level(v_level)[1] # set level to 0 character if it is not a number if not level.isdigit(): level = '0' else: level = '0' template = self.c_dict[data_type + '_INPUT_TEMPLATE'] data_dir = self.c_dict[data_type + '_INPUT_DIR'] # if looking for a file with an exact time match: if self.c_dict[data_type + '_FILE_WINDOW_BEGIN'] == 0 and \ self.c_dict[data_type + '_FILE_WINDOW_END'] == 0: # perform string substitution dsts = sts.StringSub(self.logger, template, level=(int(level.split('-')[0]) * 3600), **time_info) filename = dsts.do_string_sub() # build full path with data directory and filename path = os.path.join(data_dir, filename) # check if desired data file exists and if it needs to be preprocessed path = util.preprocess_file( path, self.c_dict[data_type + '_INPUT_DATATYPE'], self.config) return path # if looking for a file within a time window: # convert valid_time to unix time valid_seconds = int( datetime.strptime(valid_time, "%Y%m%d%H%M").strftime("%s")) # get time of each file, compare to valid time, save best within range closest_files = [] closest_time = 9999999 # get range of times that will be considered valid_range_lower = self.c_dict[data_type + '_FILE_WINDOW_BEGIN'] valid_range_upper = self.c_dict[data_type + '_FILE_WINDOW_END'] lower_limit = int( datetime.strptime( util.shift_time_seconds(valid_time, valid_range_lower), "%Y%m%d%H%M").strftime("%s")) upper_limit = int( datetime.strptime( util.shift_time_seconds(valid_time, valid_range_upper), "%Y%m%d%H%M").strftime("%s")) # step through all files under input directory in sorted order # pylint:disable=unused-variable # os.walk returns a tuple. Not all returned values are needed. for dirpath, dirnames, all_files in os.walk(data_dir): for filename in sorted(all_files): fullpath = os.path.join(dirpath, filename) # remove input data directory to get relative path rel_path = fullpath.replace(data_dir + "/", "") # extract time information from relative path using template file_time_info = util.get_time_from_file( self.logger, rel_path, template) if file_time_info is not None: # get valid time and check if it is within the time range file_valid_time = file_time_info['valid'].strftime( "%Y%m%d%H%M") # skip if could not extract valid time if file_valid_time == '': continue file_valid_dt = datetime.strptime(file_valid_time, "%Y%m%d%H%M") file_valid_seconds = int(file_valid_dt.strftime("%s")) # skip if outside time range if file_valid_seconds < lower_limit or file_valid_seconds > upper_limit: continue # if only 1 file is allowed, check if file is # closer to desired valid time than previous match if not self.c_dict['ALLOW_MULTIPLE_FILES']: diff = abs(valid_seconds - file_valid_seconds) if diff < closest_time: closest_time = diff del closest_files[:] closest_files.append(fullpath) # if multiple files are allowed, get all files within range else: closest_files.append(fullpath) if not closest_files: return None # check if file(s) needs to be preprocessed before returning the path # return single file path if 1 file was found if len(closest_files) == 1: return util.preprocess_file( closest_files[0], self.c_dict[data_type + '_INPUT_DATATYPE'], self.config) # return list if multiple files are found out = [] for close_file in closest_files: outfile = util.preprocess_file( close_file, self.c_dict[data_type + '_INPUT_DATATYPE'], self.config) out.append(outfile) return out
def get_one_field_info(self, v_level, v_thresh, v_name, v_extra, d_type): """! Format field information into format expected by MET config file Args: @param v_level level of data to extract @param v_thresh threshold value to use in comparison @param v_name name of field to process @param v_extra additional field information to add if available @param d_type type of data to find (FCST or OBS) @rtype string @return Returns formatted field information """ # separate character from beginning of numeric level value if applicable level_type, level = util.split_level(v_level) # list to hold field information fields = [] # get cat thresholds if available cat_thresh = "" threshs = [] if len(v_thresh) != 0: threshs = v_thresh cat_thresh = "cat_thresh=[ " + ','.join(threshs) + " ];" # if either input is probabilistic, create separate item for each threshold if self.c_dict['FCST_IS_PROB'] or self.c_dict['OBS_IS_PROB']: # if input being processed if probabilistic, format accordingly if self.c_dict[d_type + '_IS_PROB']: for thresh in threshs: thresh_str = "" comparison = util.get_comparison_from_threshold(thresh) number = util.get_number_from_threshold(thresh) if comparison in ["gt", "ge", ">", ">=", "==", "eq"]: thresh_str += "thresh_lo=" + str(number) + "; " if comparison in ["lt", "le", "<", "<=", "==", "eq"]: thresh_str += "thresh_hi=" + str(number) + "; " prob_cat_thresh = self.c_dict[d_type + '_PROB_THRESH'] if self.c_dict[d_type + '_INPUT_DATATYPE'] == 'NETCDF': field = "{ name=\"" + v_name + "\"; level=\"" + \ level + "\"; prob=TRUE; cat_thresh=[" + prob_cat_thresh + "];}" else: field = "{ name=\"PROB\"; level=\"" + level_type + \ level + "\"; prob={ name=\"" + \ v_name + \ "\"; " + thresh_str + "} cat_thresh=[" + prob_cat_thresh + "];" field += v_extra + "}" fields.append(field) else: # if input being processed is not probabilistic but the other input is for thresh in threshs: # if pcp_combine was run, use name_level, (*,*) format # if not, use user defined name/level combination if self.config.getbool('config', d_type + '_PCP_COMBINE_RUN', False): field = "{ name=\"" + v_name + "_" + level + \ "\"; level=\"(*,*)\"; cat_thresh=[ " + \ str(thresh) + " ]; }" else: field = "{ name=\"" + v_name + \ "\"; level=\"" + v_level + "\"; cat_thresh=[ " + \ str(thresh) + " ]; }" fields.append(field) else: # if neither input is probabilistic, add all cat thresholds to same field info item # if pcp_combine was run, use name_level, (*,*) format # if not, use user defined name/level combination if self.config.getbool('config', d_type + '_PCP_COMBINE_RUN', False): field = "{ name=\"" + v_name + "_" + level + \ "\"; level=\"(*,*)\"; " else: field = "{ name=\"" + v_name + \ "\"; level=\"" + v_level + "\"; " field += cat_thresh + " " + v_extra + " }" fields.append(field) # combine all fields into a comma separated string and return field_list = ','.join(fields) return field_list
def run_at_time_once(self, time_info, var_info, dtype): """! Runs the MET application for a given time and forecast lead combination Args: @param ti time_info object containing timing information @param v var_info object containing variable information """ self.clear() if dtype == "FCST": compare_var = var_info['fcst_name'] level = var_info['fcst_level'] else: compare_var = var_info['obs_name'] level = var_info['obs_level'] level = util.split_level(level)[1] if self.c_dict[dtype + '_INPUT_DIR'] == '': self.logger.error('Must set {}_REGRID_DATA_PLANE_INPUT_DIR'.format(dtype) +\ ' in config file') exit(1) if self.c_dict[dtype + '_INPUT_TEMPLATE'] == '': self.logger.error('Must set {}_REGRID_DATA_PLANE_INPUT_TEMPLATE'.format(dtype) +\ ' in config file') exit(1) if self.c_dict[dtype + '_OUTPUT_DIR'] == '': self.logger.error('Must set {}_REGRID_DATA_PLANE_OUTPUT_DIR'.format(dtype) +\ ' in config file') exit(1) if self.c_dict[dtype + '_OUTPUT_TEMPLATE'] == '': self.logger.error('Must set {}_REGRID_DATA_PLANE_OUTPUT_TEMPLATE'.format(dtype) +\ ' in config file') exit(1) input_dir = self.c_dict[dtype + '_INPUT_DIR'] input_template = self.c_dict[dtype + '_INPUT_TEMPLATE'] output_dir = self.c_dict[dtype + '_OUTPUT_DIR'] output_template = self.c_dict[dtype + '_OUTPUT_TEMPLATE'] if not level.isdigit(): f_level = '0' else: f_level = level string_sub = sts.StringSub(self.logger, input_template, level=(int(f_level) * 3600), **time_info) infile = os.path.join(input_dir, string_sub.do_string_sub()) infile = util.preprocess_file( infile, self.config.getstr('config', dtype + '_REGRID_DATA_PLANE_INPUT_DATATYPE', ''), self.config) if infile is not None: self.infiles.append(infile) else: self.logger.error( 'Could not find input file in {} matching template {}'.format( input_dir, input_template)) return False verif_grid = self.c_dict['VERIFICATION_GRID'] if verif_grid == '': self.logger.error('No verification grid specified! ' + \ 'Set REGRID_DATA_PLANE_VERIF_GRID') return False self.infiles.append(verif_grid) string_sub = sts.StringSub(self.logger, output_template, level=(int(f_level) * 3600), **time_info) outfile = string_sub.do_string_sub() self.set_output_path(os.path.join(output_dir, outfile)) outpath = self.get_output_path() if os.path.exists(outpath) and \ self.c_dict['SKIP_IF_OUTPUT_EXISTS'] is True: self.logger.debug( 'Skip writing output file {} because it already ' 'exists. Remove file or change ' 'REGRID_DATA_PLANE_SKIP_IF_OUTPUT_EXISTS to True to process'. format(outpath)) return True if self.config.getstr('config', dtype + '_REGRID_DATA_PLANE_INPUT_DATATYPE', '') in ['', 'NETCDF']: field_name = "{:s}_{:s}".format(compare_var, str(level).zfill(2)) self.args.append( "-field 'name=\"{:s}\"; level=\"(*,*)\";'".format(field_name)) else: field_name = "{:s}".format(compare_var) self.args.append("-field 'name=\"{:s}\"; level=\"{:s}\";'".format( field_name, level)) if self.c_dict['METHOD'] != '': self.args.append("-method {}".format(self.c_dict['METHOD'])) self.args.append("-width {}".format(self.c_dict['WIDTH'])) self.args.append("-name " + field_name) cmd = self.get_command() if cmd is None: self.logger.error("Could not generate command") return self.build()