def run_at_time(self, input_dict): """! Runs the MET application for a given run time. This function loops over the list of forecast leads and runs the application for each. Overrides run_at_time in compare_gridded_wrapper.py Args: @param input_dict dictionary containing timing information """ # max_lookback = self.c_dict['MAX_LOOKBACK'] # file_interval = self.c_dict['FILE_INTERVAL'] lead_seq = util.get_lead_sequence(self.config, input_dict) for var_info in self.c_dict['VAR_LIST']: if self.c_dict['SINGLE_RUN']: self.run_single_mode(input_dict, var_info) continue model_list = [] obs_list = [] # find files for each forecast lead time tasks = [] for lead in lead_seq: input_dict['lead_hours'] = lead time_info = time_util.ti_calculate(input_dict) tasks.append(time_info) for current_task in tasks: # call find_model/obs as needed model_file = self.find_model(current_task, var_info) obs_file = self.find_obs(current_task, var_info) if model_file is None and obs_file is None: self.logger.warning('Obs and fcst files were not found for init {} and lead {}'. format(current_task['init_fmt'], current_task['lead_hours'])) continue if model_file is None: self.logger.warning('Forecast file was not found for init {} and lead {}'. format(current_task['init_fmt'], current_task['lead_hours'])) continue if obs_file is None: self.logger.warning('Observation file was not found for init {} and lead {}'. format(current_task['init_fmt'], current_task['lead_hours'])) continue model_list.append(model_file) obs_list.append(obs_file) if len(model_list) == 0: return # write ascii file with list of files to process input_dict['lead_hours'] = 0 time_info = time_util.ti_calculate(input_dict) model_outfile = time_info['valid_fmt'] + '_mtd_fcst_' + var_info['fcst_name'] + '.txt' obs_outfile = time_info['valid_fmt'] + '_mtd_obs_' + var_info['obs_name'] + '.txt' model_list_path = self.write_list_file(model_outfile, model_list) obs_list_path = self.write_list_file(obs_outfile, obs_list) arg_dict = {'obs_path' : obs_list_path, 'model_path' : model_list_path } self.process_fields_one_thresh(current_task, var_info, **arg_dict)
def test_setup_add_method(): rl = "OBS" pcw = pcp_combine_wrapper(rl) task_info = {} task_info['valid'] = datetime.datetime.strptime("2016090418", '%Y%m%d%H') time_info = time_util.ti_calculate(task_info) var_info = {} var_info['fcst_name'] = "APCP" var_info['obs_name'] = "ACPCP" var_info['fcst_extra'] = "" var_info['obs_extra'] = "" var_info['fcst_level'] = "A06" var_info['obs_level'] = "A06" input_dir = pcw.config.getdir('METPLUS_BASE')+"/internal_tests/data/accum" output_dir = pcw.config.getdir('METPLUS_BASE')+"/internal_tests/data/fakeout" pcw.setup_add_method(time_info, var_info, rl) in_files = pcw.infiles out_file = pcw.get_output_path() if len(in_files) == 6 and \ input_dir+"/20160904/file.2016090418.01h" in in_files and \ input_dir+"/20160904/file.2016090417.01h" in in_files and \ input_dir+"/20160904/file.2016090416.01h" in in_files and \ input_dir+"/20160904/file.2016090415.01h" in in_files and \ input_dir+"/20160904/file.2016090414.01h" in in_files and \ input_dir+"/20160904/file.2016090413.01h" in in_files and \ out_file == output_dir+"/20160904/outfile.2016090418_A06h": assert True else: assert False
def run_at_time(self, input_dict): """! Runs the MET application for a given run time. This function loops over the list of forecast leads and runs the application for each. Args: @param input_dict dictionary containing time information """ # loop of forecast leads and process each lead_seq = util.get_lead_sequence(self.config, input_dict) for lead in lead_seq: input_dict['lead_hours'] = lead self.logger.info("Processing forecast lead {}".format(lead)) # set current lead time config and environment variables self.config.set('config', 'CURRENT_LEAD_TIME', lead) os.environ['METPLUS_CURRENT_LEAD_TIME'] = str(lead) time_info = time_util.ti_calculate(input_dict) if util.skip_time(time_info, self.config): self.logger.debug('Skipping run time') continue # Run for given init/valid time and forecast lead combination self.run_at_time_once(time_info)
def test_run_gen_vx_mask_twice(): input_dict = {'valid': datetime.datetime.strptime("201802010000",'%Y%m%d%H%M'), 'lead': 0} time_info = time_util.ti_calculate(input_dict) wrap = gen_vx_mask_wrapper() wrap.c_dict['INPUT_TEMPLATE'] = '{valid?fmt=%Y%m%d%H}_ZENITH' wrap.c_dict['MASK_INPUT_TEMPLATES'] = ['LAT', 'LON'] wrap.c_dict['OUTPUT_DIR'] = os.path.join(wrap.config.getdir('OUTPUT_BASE'), 'GenVxMask_test') wrap.c_dict['OUTPUT_TEMPLATE'] = '{valid?fmt=%Y%m%d%H}_ZENITH_LAT_LON_MASK.nc' cmd_args = ["-type lat -thresh 'ge30&&le50'", "-type lon -thresh 'le-70&&ge-130' -intersection -name lat_lon_mask"] wrap.c_dict['COMMAND_OPTIONS'] = cmd_args wrap.run_at_time_all(time_info) expected_cmds = [f"{wrap.app_path} 2018020100_ZENITH LAT {wrap.config.getdir('OUTPUT_BASE')}/stage/gen_vx_mask/temp_0.nc {cmd_args[0]} -v 2", f"{wrap.app_path} {wrap.config.getdir('OUTPUT_BASE')}/stage/gen_vx_mask/temp_0.nc LON {wrap.config.getdir('OUTPUT_BASE')}/GenVxMask_test/2018020100_ZENITH_LAT_LON_MASK.nc {cmd_args[1]} -v 2"] test_passed = True if len(wrap.all_commands) != len(expected_cmds): print("Number of commands run is not the same as expected") assert(False) for cmd, expected_cmd in zip(wrap.all_commands, expected_cmds): print(f" ACTUAL:{cmd}") print(f"EXPECTED:{expected_cmd}") if cmd != expected_cmd: test_passed = False assert(test_passed)
def getLowestForecastFile(self, valid_time, dtype, template): """!Find the lowest forecast hour that corresponds to the valid time Args: @param valid_time valid time to search @param dtype data type (FCST or OBS) to get filename template @rtype string @return Path to file with the lowest forecast hour""" out_file = None # search for file with lowest forecast, then loop up into you find a valid one min_forecast = self.c_dict[dtype + '_MIN_FORECAST'] max_forecast = self.c_dict[dtype + '_MAX_FORECAST'] forecast_lead = min_forecast while forecast_lead <= max_forecast: input_dict = {} input_dict['valid'] = valid_time input_dict['lead_hours'] = forecast_lead time_info = time_util.ti_calculate(input_dict) fSts = sts.StringSub(self.logger, template, **time_info) search_file = os.path.join(self.input_dir, fSts.do_string_sub()) search_file = util.preprocess_file( search_file, self.c_dict[dtype + '_INPUT_DATATYPE'], self.config) if search_file != None: return search_file forecast_lead += 1 return None
def test_find_obs_offset(offsets, expected_file, offset_seconds): config = metplus_config() pcw = CommandBuilder(config, config.logger) v = {} v['obs_level'] = "6" task_info = {} task_info['valid'] = datetime.datetime.strptime("2020020112", '%Y%m%d%H') task_info['lead'] = 0 time_info = time_util.ti_calculate(task_info) pcw.c_dict['OFFSETS'] = offsets pcw.c_dict['OBS_INPUT_DIR'] = pcw.config.getdir( 'METPLUS_BASE') + "/internal_tests/data/obs" pcw.c_dict[ 'OBS_INPUT_TEMPLATE'] = "{da_init?fmt=%2H}z.prepbufr.tm{offset?fmt=%2H}.{da_init?fmt=%Y%m%d}" obs_file, time_info = pcw.find_obs_offset(time_info, v) print(f"OBSFILE: {obs_file}") print(f"EXPECTED FILE: {expected_file}") if expected_file is None: assert (not obs_file) else: assert (os.path.basename(obs_file) == expected_file and time_info['offset'] == offset_seconds)
def run_at_time(self, input_dict): """! Do some processing for the current run time (init or valid) Args: @param input_dict dictionary containing time information of current run generally contains 'now' (current) time and 'init' or 'valid' time """ # fill in time info dictionary time_info = time_util.ti_calculate(input_dict) # check if looping by valid or init and log time for run loop_by = time_info['loop_by'] self.logger.info('Running ExampleWrapper at {} time {}'.format(loop_by, time_info[loop_by+'_fmt'])) # read input directory and template from config dictionary input_dir = self.c_dict['INPUT_DIR'] input_template = self.c_dict['INPUT_TEMPLATE'] self.logger.info('Input directory is {}'.format(input_dir)) self.logger.info('Input template is {}'.format(input_template)) # get forecast leads to loop over lead_seq = util.get_lead_sequence(self.config, input_dict) for lead in lead_seq: # set forecast lead time in hours time_info['lead_hours'] = lead # recalculate time info items time_info = time_util.ti_calculate(time_info) # log init, valid, and forecast lead times for current loop iteration self.logger.info('Processing forecast lead {} initialized at {} and valid at {}' .format(lead, time_info['init'].strftime('%Y-%m-%d %HZ'), time_info['valid'].strftime('%Y-%m-%d %HZ'))) # perform string substitution to find filename based on template and current run time # pass in logger, then template, then any items to use to fill in template # pass time info with ** in front to expand each dictionary item to a variable # i.e. time_info['init'] becomes init=init_value filename = StringSub(self.logger, input_template, **time_info).do_string_sub() self.logger.info('Looking in input directory for file: {}'.format(filename)) return True
def run_single_mode(self, input_dict, var_info): single_list = [] if self.c_dict['SINGLE_DATA_SRC'] == 'OBS': find_method = self.find_obs s_name = var_info['obs_name'] s_level = var_info['obs_level'] else: find_method = self.find_model s_name = var_info['fcst_name'] s_level = var_info['fcst_level'] lead_seq = util.get_lead_sequence(self.config, input_dict) for lead in lead_seq: input_dict['lead_hours'] = lead current_task = time_util.ti_calculate(input_dict) single_file = find_method(current_task, var_info) if single_file is None: self.logger.warning('Single file was not found for init {} and lead {}'. format(current_task['init_fmt'], current_task['lead_hours'])) continue single_list.append(single_file) if len(single_list) == 0: return # write ascii file with list of files to process input_dict['lead_hours'] = 0 time_info = time_util.ti_calculate(input_dict) single_outfile = time_info['valid_fmt'] + '_mtd_single_' + s_name + '.txt' single_list_path = self.write_list_file(single_outfile, single_list) arg_dict = {} if self.c_dict['SINGLE_DATA_SRC'] == 'OBS': arg_dict['obs_path'] = single_list_path arg_dict['model_path'] = None else: arg_dict['model_path'] = single_list_path arg_dict['obs_path'] = None self.process_fields_one_thresh(current_task, var_info, **arg_dict)
def test_find_obs_dated(): pcw = grid_stat_wrapper() v = {} v['obs_level'] = "6" task_info = {} task_info['valid'] = datetime.datetime.strptime("201802010000", '%Y%m%d%H%M') task_info['lead'] = 0 time_info = time_util.ti_calculate(task_info) pcw.c_dict['OBS_FILE_WINDOW_BEGIN'] = -3600 pcw.c_dict['OBS_FILE_WINDOW_END'] = 3600 pcw.c_dict['OBS_INPUT_DIR'] = pcw.config.getdir('METPLUS_BASE')+"/internal_tests/data/obs" pcw.c_dict['OBS_INPUT_TEMPLATE'] = '{valid?fmt=%Y%m%d}/{valid?fmt=%Y%m%d}_{valid?fmt=%H%M}' obs_file = pcw.find_obs(time_info, v) assert(obs_file == pcw.c_dict['OBS_INPUT_DIR']+'/20180201/20180201_0013')
def run_at_time(self, input_dict): """! Runs the MET application for a given run time. Processing forecast or observation data is determined by conf variables. This function loops over the list of forecast leads and runs the application for each. Args: @param input_dict dictionary containing timing information """ lead_seq = util.get_lead_sequence(self.config, input_dict) for lead in lead_seq: self.clear() input_dict['lead_hours'] = lead self.config.set('config', 'CURRENT_LEAD_TIME', lead) os.environ['METPLUS_CURRENT_LEAD_TIME'] = str(lead) time_info = time_util.ti_calculate(input_dict) self.run_at_time_once(time_info)
def test_find_data_not_a_path(data_type): config = metplus_config() pcw = CommandBuilder(config, config.logger) task_info = {} task_info['valid'] = datetime.datetime.strptime("201802010000", '%Y%m%d%H%M') task_info['lead'] = 0 time_info = time_util.ti_calculate(task_info) pcw.c_dict[f'{data_type}FILE_WINDOW_BEGIN'] = 0 pcw.c_dict[f'{data_type}FILE_WINDOW_END'] = 0 pcw.c_dict[f'{data_type}INPUT_DIR'] = '' pcw.c_dict[f'{data_type}INPUT_TEMPLATE'] = 'G003' obs_file = pcw.find_data(time_info, var_info=None, data_type=data_type) assert (obs_file == 'G003')
def test_get_accumulation_6_to_6(): data_src = "FCST" pcw = pcp_combine_wrapper(data_src) input_dir = pcw.config.getdir('METPLUS_BASE')+"/internal_tests/data/accum" task_info = {} task_info['valid'] = datetime.datetime.strptime("2016090418", '%Y%m%d%H') time_info = time_util.ti_calculate(task_info) accum = 6 pcw.c_dict['FCST_INPUT_TEMPLATE'] = "{valid?fmt=%Y%m%d}/file.{valid?fmt=%Y%m%d%H}.{level?fmt=%HH}h" pcw.input_dir = input_dir pcw.get_accumulation(time_info, accum, data_src, False) in_files = pcw.infiles if len(in_files) == 1 and input_dir+"/20160904/file.2016090418.06h" in in_files: assert True else: assert False
def test_setup_subtract_method(): rl = "FCST" pcw = pcp_combine_wrapper(rl) task_info = {} task_info['valid'] = datetime.datetime.strptime("201609050000", '%Y%m%d%H%M') task_info['lead_hours'] = 9 time_info = time_util.ti_calculate(task_info) var_info = {} var_info['fcst_name'] = "APCP" var_info['obs_name'] = "ACPCP" var_info['fcst_extra'] = "" var_info['obs_extra'] = "" var_info['fcst_level'] = "A06" var_info['obs_level'] = "A06" pcw.setup_subtract_method(time_info, var_info, rl) in_files = pcw.infiles out_file = pcw.get_output_path() assert(len(in_files) == 2)
def test_find_obs_no_dated(): config = metplus_config() pcw = CommandBuilder(config, config.logger) v = {} v['obs_level'] = "6" task_info = {} task_info['valid'] = datetime.datetime.strptime("201802010000", '%Y%m%d%H%M') task_info['lead'] = 0 time_info = time_util.ti_calculate(task_info) pcw.c_dict['OBS_FILE_WINDOW_BEGIN'] = -3600 pcw.c_dict['OBS_FILE_WINDOW_END'] = 3600 pcw.c_dict['OBS_INPUT_DIR'] = pcw.config.getdir( 'METPLUS_BASE') + "/internal_tests/data/obs" pcw.c_dict['OBS_INPUT_TEMPLATE'] = "{valid?fmt=%Y%m%d}_{valid?fmt=%H%M}" obs_file = pcw.find_obs(time_info, v) assert (obs_file == pcw.c_dict['OBS_INPUT_DIR'] + '/20180201_0045')
def test_setup_sum_method(): rl = "OBS" pcw = pcp_combine_wrapper(rl) task_info = {} task_info['valid'] = datetime.datetime.strptime("2016090418", '%Y%m%d%H') task_info['lead'] = 0 time_info = time_util.ti_calculate(task_info) var_info = {} var_info['fcst_name'] = "APCP" var_info['obs_name'] = "ACPCP" var_info['fcst_extra'] = "" var_info['obs_extra'] = "" var_info['fcst_level'] = "A06" var_info['obs_level'] = "A06" input_dir = pcw.config.getdir('METPLUS_BASE')+"/internal_tests/data/accum" output_dir = pcw.config.getdir('METPLUS_BASE')+"/internal_tests/data/fakeout" pcw.setup_sum_method(time_info, var_info, rl) in_files = pcw.infiles out_file = pcw.get_output_path() assert(out_file == output_dir+"/20160904/outfile.2016090418_A06h")
def run_at_time(self, input_dict): """! Runs the MET application for a given run time. Processing forecast or observation data is determined by conf variables. This function loops over the list of forecast leads and runs the application for each. Args: @param init_time initialization time to run. -1 if not set @param valid_time valid time to run. -1 if not set """ app_name_caps = self.app_name.upper() class_name = self.__class__.__name__[0:-7] lead_seq = util.get_lead_sequence(self.config, input_dict) run_list = [] if self.config.getbool('config', 'FCST_' + app_name_caps + '_RUN', False): run_list.append("FCST") if self.config.getbool('config', 'OBS_' + app_name_caps + '_RUN', False): run_list.append("OBS") if len(run_list) == 0: self.logger.error(class_name+" specified in process_list, but "+\ "FCST_"+app_name_caps+"_RUN and OBS_"+app_name_caps+"_RUN "+\ " are both False. Set one or both to true or "+\ "remove "+class_name+" from the process_list") exit() for to_run in run_list: self.logger.info("Processing {} data".format(to_run)) for lead in lead_seq: input_dict['lead_hours'] = lead self.config.set('config', 'CURRENT_LEAD_TIME', lead) os.environ['METPLUS_CURRENT_LEAD_TIME'] = str(lead) self.logger.info("Processing forecast lead {}".format(lead)) time_info = time_util.ti_calculate(input_dict) for var_info in self.c_dict['VAR_LIST']: self.run_at_time_once(time_info, var_info, to_run)
def test_run_gen_vx_mask_once(): input_dict = {'valid': datetime.datetime.strptime("201802010000",'%Y%m%d%H%M'), 'lead': 0} time_info = time_util.ti_calculate(input_dict) wrap = gen_vx_mask_wrapper() wrap.c_dict['INPUT_TEMPLATE'] = '{valid?fmt=%Y%m%d%H}_ZENITH' wrap.c_dict['MASK_INPUT_TEMPLATES'] = ['LAT'] wrap.c_dict['OUTPUT_DIR'] = os.path.join(wrap.config.getdir('OUTPUT_BASE'), 'GenVxMask_test') wrap.c_dict['OUTPUT_TEMPLATE'] = '{valid?fmt=%Y%m%d%H}_ZENITH_LAT_MASK.nc' wrap.c_dict['COMMAND_OPTIONS'] = ["-type lat -thresh 'ge30&&le50'"] # wrap.c_dict['MASK_INPUT_TEMPLATES'] = ['LAT', 'LON'] # wrap.c_dict['COMMAND_OPTIONS'] = ["-type lat -thresh 'ge30&&le50'", "-type lon -thresh 'le-70&&ge-130' -intersection"] wrap.run_at_time_all(time_info) expected_cmd = f"{wrap.app_path} 2018020100_ZENITH LAT {wrap.config.getdir('OUTPUT_BASE')}/GenVxMask_test/2018020100_ZENITH_LAT_MASK.nc -type lat -thresh 'ge30&&le50' -v 2" for cmd in wrap.all_commands: print(f"COMMAND:{cmd}") print("EXPECTED:{expected_cmd}") assert(cmd == expected_cmd)
def test_get_accumulation_1_to_6(): data_src = "OBS" pcw = pcp_combine_wrapper(data_src) input_dir = pcw.config.getdir('METPLUS_BASE')+"/internal_tests/data/accum" task_info = {} task_info['valid'] = datetime.datetime.strptime("2016090418", '%Y%m%d%H') time_info = time_util.ti_calculate(task_info) accum = 6 file_template = "{valid?fmt=%Y%m%d}/file.{valid?fmt=%Y%m%d%H}.{level?fmt=%HH}h" pcw.input_dir = input_dir pcw.get_accumulation(time_info, accum, data_src, False) in_files = pcw.infiles if len(in_files) == 6 and \ input_dir+"/20160904/file.2016090418.01h" in in_files and \ input_dir+"/20160904/file.2016090417.01h" in in_files and \ input_dir+"/20160904/file.2016090416.01h" in in_files and \ input_dir+"/20160904/file.2016090415.01h" in in_files and \ input_dir+"/20160904/file.2016090414.01h" in in_files and \ input_dir+"/20160904/file.2016090413.01h" in in_files: assert True else: assert False
def run_at_time(self, input_dict): """!Get TC-paris data then regrid tiles centered on the storm. Get TC-pairs track data and GFS model data, do any necessary processing then regrid the forecast and analysis files to a 30 x 30 degree tile centered on the storm. Args: input_dict: Time dictionary Returns: None: invokes regrid_data_plane to create a netCDF file from two extratropical storm track files. """ time_info = time_util.ti_calculate(input_dict) init_time = time_info['init_fmt'] # get the process id to be used to identify the output # amongst different users and runs. cur_pid = str(os.getpid()) tmp_dir = os.path.join(self.config.getdir('TMP_DIR'), cur_pid) self.logger.info("Begin extract tiles") cur_init = init_time[0:8] + "_" + init_time[8:10] # Check that there are tc_pairs data which are used as input if util.is_dir_empty(self.tc_pairs_dir): self.logger.error("No tc pairs data found at {}"\ .format(self.tc_pairs_dir)) sys.exit(1) # Create the name of the filter file we need to find. If # the file doesn't exist, then run TC_STAT filter_filename = "filter_" + cur_init + ".tcst" filter_name = os.path.join(self.filtered_out_dir, cur_init, filter_filename) if util.file_exists(filter_name) and not self.overwrite_flag: self.logger.debug("Filter file exists, using Track data file: {}"\ .format(filter_name)) else: # Create the storm track by applying the # filter options defined in the config/param file. # Use TcStatWrapper to build up the tc_stat command and invoke # the MET tool tc_stat to perform the filtering. tiles_list = util.get_files(self.tc_pairs_dir, ".*tcst", self.logger) tiles_list_str = ' '.join(tiles_list) tcs = TcStatWrapper(self.config, self.logger) tcs.build_tc_stat(self.filtered_out_dir, cur_init, tiles_list_str, self.addl_filter_opts) # Remove any empty files and directories that can occur # from filtering. util.prune_empty(filter_name, self.logger) # Now get unique storm ids from the filter file, # filter_yyyymmdd_hh.tcst sorted_storm_ids = util.get_storm_ids(filter_name, self.logger) # Check for empty sorted_storm_ids, if empty, # continue to the next time. if not sorted_storm_ids: # No storms found for init time, cur_init msg = "No storms were found for {} ...continue to next in list"\ .format(cur_init) self.logger.debug(msg) return # Process each storm in the sorted_storm_ids list # Iterate over each filter file in the output directory and # search for the presence of the storm id. Store this # corresponding row of data into a temporary file in the # /tmp/<pid> directory. for cur_storm in sorted_storm_ids: storm_output_dir = os.path.join(self.filtered_out_dir, cur_init, cur_storm) header = open(filter_name, "r").readline() util.mkdir_p(storm_output_dir) util.mkdir_p(tmp_dir) tmp_filename = "filter_" + cur_init + "_" + cur_storm full_tmp_filename = os.path.join(tmp_dir, tmp_filename) storm_match_list = util.grep(cur_storm, filter_name) with open(full_tmp_filename, "a+") as tmp_file: # copy over header information tmp_file.write(header) for storm_match in storm_match_list: tmp_file.write(storm_match) # Perform regridding of the forecast and analysis files # to an n X n degree tile centered on the storm (dimensions # are indicated in the config/param file). feature_util.retrieve_and_regrid(full_tmp_filename, cur_init, cur_storm, self.filtered_out_dir, self.config) # end of for cur_storm # Remove any empty files and directories in the extract_tiles output # directory util.prune_empty(self.filtered_out_dir, self.logger) # Clean up the tmp directory if it exists if os.path.isdir(tmp_dir): util.rmtree(tmp_dir)
def run_at_time_once(self, input_dict): self.clear() if self.c_dict['OBS_INPUT_DIR'] == '': self.logger.error('Must set PB2NC_INPUT_DIR in config file') exit(1) if self.c_dict['OBS_INPUT_TEMPLATE'] == '': self.logger.error('Must set PB2NC_INPUT_TEMPLATE in config file') exit(1) if self.c_dict['OUTPUT_DIR'] == '': self.logger.error('Must set PB2NC_OUTPUT_DIR in config file') exit(1) if self.c_dict['OUTPUT_TEMPLATE'] == '': self.logger.error('Must set PB2NC_OUTPUT_TEMPLATE in config file') exit(1) input_dir = self.c_dict['OBS_INPUT_DIR'] input_template = self.c_dict['OBS_INPUT_TEMPLATE'] output_dir = self.c_dict['OUTPUT_DIR'] output_template = self.c_dict['OUTPUT_TEMPLATE'] infile = None # loop over offset list and find first file that matches for offset in self.c_dict['OFFSETS']: input_dict['offset'] = offset time_info = time_util.ti_calculate(input_dict) infile = self.find_obs(time_info, None) if infile is not None: if isinstance(infile, list): for f in infile: self.infiles.append(f) else: self.infiles.append(infile) self.logger.debug('Adding input file {}'.format(infile)) break if infile is None: self.logger.error('Could not find input file in {} matching template {}' .format(input_dir, input_template)) return False outSts = StringSub(self.logger, output_template, **time_info) outfile = outSts.do_string_sub() outfile = os.path.join(output_dir, outfile) self.set_output_path(outfile) # if we don't overwrite and the output file exists, warn and continue if os.path.exists(outfile) and \ self.c_dict['SKIP_IF_OUTPUT_EXISTS'] is True: self.logger.debug('Skip writing output file {} because it already ' 'exists. Remove file or change ' 'PB2NC_SKIP_IF_OUTPUT_EXISTS to False to process' .format(outfile)) return True # set config file since command is reset after each run self.param = self.c_dict['CONFIG_FILE'] # list of fields to print to log print_list = ["PB2NC_MESSAGE_TYPE", "PB2NC_STATION_ID", "OBS_WINDOW_BEGIN", "OBS_WINDOW_END", "PB2NC_GRID", "PB2NC_POLY", "OBS_BUFR_VAR_LIST", "TIME_SUMMARY_FLAG", "TIME_SUMMARY_BEG", "TIME_SUMMARY_END", "TIME_SUMMARY_VAR_NAMES", "TIME_SUMMARY_TYPES" ] # set environment variables needed for MET application self.add_env_var("PB2NC_MESSAGE_TYPE", self.c_dict['MESSAGE_TYPE']) self.add_env_var("PB2NC_STATION_ID", self.c_dict['STATION_ID']) self.add_env_var("OBS_WINDOW_BEGIN", str(self.c_dict['OBS_WINDOW_BEGIN'])) self.add_env_var("OBS_WINDOW_END", str(self.c_dict['OBS_WINDOW_END'])) self.add_env_var("PB2NC_GRID", self.c_dict['GRID']) self.add_env_var("PB2NC_POLY", self.c_dict['POLY']) tmp_message_type = str(self.c_dict['BUFR_VAR_LIST']).replace("\'", "\"") bufr_var_list = ''.join(tmp_message_type.split()) self.add_env_var("OBS_BUFR_VAR_LIST", bufr_var_list) self.add_env_var('TIME_SUMMARY_FLAG', str(self.c_dict['TIME_SUMMARY_FLAG'])) self.add_env_var('TIME_SUMMARY_BEG', self.c_dict['TIME_SUMMARY_BEG']) self.add_env_var('TIME_SUMMARY_END', self.c_dict['TIME_SUMMARY_END']) self.add_env_var('TIME_SUMMARY_VAR_NAMES', str(self.c_dict['TIME_SUMMARY_VAR_NAMES'])) self.add_env_var('TIME_SUMMARY_TYPES', str(self.c_dict['TIME_SUMMARY_TYPES'])) # send environment variables to logger self.logger.debug("ENVIRONMENT FOR NEXT COMMAND: ") self.print_user_env_items() for l in print_list: self.print_env_item(l) self.logger.debug("COPYABLE ENVIRONMENT FOR NEXT COMMAND: ") self.print_env_copy(print_list) cmd = self.get_command() if cmd is None: self.logger.error("Could not generate command") return self.build()
def run_at_time(self, input_dict): """! Create the arguments to run MET tc_pairs Args: input_dict dictionary containing init or valid time Returns: """ # fill in time info dictionary time_info = time_util.ti_calculate(input_dict) # Set up the environment variable to be used in the TCPairs Config # file (TC_PAIRS_CONFIG_FILE) self.set_env_vars(time_info) # set output dir self.outdir = self.c_dict['OUTPUT_DIR'] # get items to filter bdeck files # set each to default wildcard character unless specified in conf basin_list = ['??'] cyclone_list = ['*'] model_list = ['*'] storm_id_list = ['*'] use_storm_id = False if self.c_dict['STORM_ID']: storm_id_list = self.c_dict['STORM_ID'] use_storm_id = True # if storm id and any other filter is set, error and exit if self.c_dict['BASIN']: if use_storm_id: self.logger.error('Cannot filter by both BASIN and STORM_ID') exit(1) basin_list = self.c_dict['BASIN'] if self.c_dict['CYCLONE']: if use_storm_id: self.logger.error('Cannot filter by both CYCLONE and STORM_ID') exit(1) cyclone_list = self.c_dict['CYCLONE'] if self.c_dict['MODEL']: model_list = self.c_dict['MODEL'] if use_storm_id: for storm_id in storm_id_list: # pull out info from storm_id and process match = re.match(r'(\w{2})(\d{2})(\d{4})', storm_id) if not match: self.logger.error( 'Incorrect STORM_ID format: {}'.format(storm_id)) exit(1) basin = match.group(1).lower() cyclone = match.group(2) year = match.group(3) init_year = time_info['init'].strftime('%Y') if year != init_year: msg = 'Year specified in STORM_ID {}'.format(storm_id) +\ ' ({})'.format(year) +\ ' does not match init time year {}'.format(init_year) msg += '. Skipping...' self.logger.warning(msg) continue self.process_data(basin, cyclone, model_list, time_info) else: for basin in [basin.lower() for basin in basin_list]: for cyclone in cyclone_list: self.process_data(basin, cyclone, model_list, time_info) return True
def run_at_time_once(self, input_dict): if self.c_dict['FCST_INPUT_DIR'] == '': self.logger.error( 'Must set FCST_POINT_STAT_INPUT_DIR in config file') exit(1) if self.c_dict['FCST_INPUT_TEMPLATE'] == '': self.logger.error( 'Must set FCST_POINT_STAT_INPUT_TEMPLATE in config file') exit(1) if self.c_dict['OBS_INPUT_DIR'] == '': self.logger.error( 'Must set OBS_POINT_STAT_INPUT_DIR in config file') exit(1) if self.c_dict['OBS_INPUT_TEMPLATE'] == '': self.logger.error( 'Must set OBS_POINT_STAT_INPUT_TEMPLATE in config file') exit(1) if self.c_dict['OUTPUT_DIR'] == '': self.logger.error('Must set POINT_STAT_OUTPUT_DIR in config file') exit(1) # clear any settings leftover from previous run self.clear() time_info = time_util.ti_calculate(input_dict) var_list = self.c_dict['VAR_LIST'] # get verification mask if available self.get_verification_mask(time_info) # get model to compare model_path = self.find_model(time_info, var_list[0]) if model_path is None: self.logger.error( 'Could not find file in {} matching template {}'.format( self.c_dict['FCST_INPUT_DIR'], self.c_dict['FCST_INPUT_TEMPLATE'])) self.logger.error("Could not find file in " + self.c_dict['FCST_INPUT_DIR'] +\ " for init time " + time_info['init_fmt'] +\ " f" + str(time_info['lead_hours'])) return False # get observation to compare obs_path = None # loop over offset list and find first file that matches for offset in self.c_dict['OFFSETS']: input_dict['offset'] = offset time_info = time_util.ti_calculate(input_dict) obs_path = self.find_obs(time_info, var_list[0]) if obs_path is not None: break if obs_path is None: self.logger.error('Could not find observation file in {} ' 'matching template {}'.format( self.c_dict['OBS_INPUT_DIR'], self.c_dict['OBS_INPUT_TEMPLATE'])) return False # found both fcst and obs self.infiles.append(model_path) if type(obs_path) is list: for obs in obs_path: self.infiles.append(obs) else: self.infiles.append(obs_path) # get field information fcst_field_list = [] obs_field_list = [] for var_info in var_list: next_fcst = self.get_one_field_info(var_info['fcst_level'], var_info['fcst_thresh'], var_info['fcst_name'], var_info['fcst_extra'], 'FCST') next_obs = self.get_one_field_info(var_info['obs_level'], var_info['obs_thresh'], var_info['obs_name'], var_info['obs_extra'], 'OBS') fcst_field_list.append(next_fcst) obs_field_list.append(next_obs) fcst_field = ','.join(fcst_field_list) obs_field = ','.join(obs_field_list) self.process_fields(time_info, fcst_field, obs_field)
def setup_subtract_method(self, time_info, var_info, rl): """!Setup pcp_combine to subtract two files to build desired accumulation Args: @param time_info object containing timing information @param var_info object containing variable information @params rl data type (FCST or OBS) @rtype string @return path to output file""" self.clear() in_dir, in_template = self.get_dir_and_template(rl, 'INPUT') out_dir, out_template = self.get_dir_and_template(rl, 'OUTPUT') if rl == 'FCST': accum = var_info['fcst_level'] else: accum = var_info['obs_level'] if accum[0].isalpha(): accum = accum[1:] lead = time_info['lead_hours'] lead2 = lead - int(accum) self.set_method("SUBTRACT") pcpSts1 = sts.StringSub(self.logger, in_template, level=(int(accum) * 3600), **time_info) file1 = os.path.join(in_dir, pcpSts1.do_string_sub()) file1 = util.preprocess_file(file1, self.c_dict[rl + '_INPUT_DATATYPE'], self.config) if file1 is None: self.logger.error( "Could not find file in {} for init time {} and lead {}". format(in_dir, time_info['init_fmt'], lead)) return None # set time info for second lead input_dict2 = {'init': time_info['init'], 'lead_hours': lead2} time_info2 = time_util.ti_calculate(input_dict2) pcpSts2 = sts.StringSub(self.logger, in_template, level=(int(accum) * 3600), **time_info2) file2 = os.path.join(in_dir, pcpSts2.do_string_sub()) file2 = util.preprocess_file(file2, self.c_dict[rl + '_INPUT_DATATYPE'], self.config) if file2 is None: self.logger.error( "Could not find file in {} for init time {} and lead {}". format(in_dir, time_info2['init_fmt'], lead2)) return None self.add_input_file(file1, lead) self.add_input_file(file2, lead2) outSts = sts.StringSub(self.logger, out_template, level=(int(accum) * 3600), **time_info) out_file = outSts.do_string_sub() self.outfile = out_file self.outdir = out_dir return self.get_command()
def parse_template(self): """!Use template and filename to pull out information""" template_len = len(self.template) i = 0 str_i = 0 match_dict = {} valid_shift = 0 fmt_len = 0 between_template = '' between_filename = '' while i < template_len: # if a tag is found, split contents and extract time if self.template[i] == TEMPLATE_IDENTIFIER_BEGIN: # check that text between tags for template and filename # are the same, return None if they differ # reset both variables if they are the same if between_template != between_filename: return None else: between_template = '' between_filename = '' end_i = self.template.find(TEMPLATE_IDENTIFIER_END, i) tag = self.template[i + 1:end_i] sections = tag.split('?') identifier = sections[0] for section in sections[1:]: items = section.split('=') if items[0] == 'fmt': fmt = items[1] # print("Format for {} is {}".format(identifier, format)) fmt_len = self.get_fmt_info(fmt, self.full_str[str_i:], match_dict, identifier) if fmt_len == -1: return None # extract string that corresponds to format if items[0] == SHIFT_STRING: shift = int(items[1]) # don't allow shift on any identifier except valid if identifier != VALID_STRING: msg = 'Cannot apply a shift to template ' +\ 'item {} when processing inexact '.format(identifier) +\ 'times. Only {} is accepted'.format(VALID_STRING) self.logger.error(msg) exit(1) # if shift has been set before (other than 0) and # this shift differs, report error and exit if valid_shift != 0 and shift != valid_shift: self.logger.error( 'Found multiple shifts for valid time' + '{} differs from {}'.format( shift, valid_shift)) exit(1) # save valid shift to apply to valid time later valid_shift = shift # check if duplicate formatters are found i = end_i + 1 str_i += fmt_len else: # keep track of text in between tags to ensure that it matches # the template, do not return a time if it does not match between_template += self.template[i] between_filename += self.full_str[str_i] # increment indices for template and full_str i += 1 str_i += 1 # check again if between text matches at the end of the loop to # ensure that no text after the last template differs if between_template != between_filename: return None # combine common items and get datetime output_dict = {} valid = {} init = {} da_init = {} lead = {} offset = 0 valid['Y'] = -1 valid['m'] = -1 valid['d'] = -1 valid['j'] = -1 valid['H'] = 0 valid['M'] = 0 init['Y'] = -1 init['m'] = -1 init['d'] = -1 init['j'] = -1 init['H'] = 0 init['M'] = 0 da_init['Y'] = -1 da_init['m'] = -1 da_init['d'] = -1 da_init['j'] = -1 da_init['H'] = 0 da_init['M'] = 0 lead['H'] = 0 lead['M'] = 0 lead['S'] = 0 for key, value in match_dict.items(): if key.startswith(VALID_STRING): valid[key.split('+')[1]] = int(value) set_output_dict_from_time_info(valid, output_dict, 'valid') # shift valid time if applicable if valid_shift != 0: output_dict['valid'] -= datetime.timedelta(seconds=valid_shift) for key, value in match_dict.items(): if key.startswith(INIT_STRING): init[key.split('+')[1]] = int(value) set_output_dict_from_time_info(init, output_dict, 'init') for key, value in match_dict.items(): if key.startswith(DA_INIT_STRING): da_init[key.split('+')[1]] = int(value) set_output_dict_from_time_info(da_init, output_dict, 'da_init') for key, value in match_dict.items(): if key.startswith(LEAD_STRING): lead[key.split('+')[1]] = int(value) lead_seconds = lead['H'] * 3600 + lead['M'] * 60 + lead['S'] output_dict['lead'] = lead_seconds for key, value in match_dict.items(): if key.startswith(OFFSET_STRING): offset = int(value) output_dict['offset'] = offset time_info = time_util.ti_calculate(output_dict) return time_info