def test_crow_variable_hour():
    # Test that StringSub's doStringSub() correctly creates the valid hour
    # without any zero-padding when given the following as input:
    # pgbf{lead?fmt=%H}.gfs.{valid?fmt=%Y%M%D%H}
    # pgbf([0-9]{1,3}).gfs.(2[0-9]{9})
    logger = logging.getLogger("crow_data")

    # crow input files with 3, 2, and 1-digit lead times:
    crow_input_file_3 = 'pgbf219.gfs.2017060418'
    crow_input_file_2 = 'pgbf18.gfs.2017062000'
    crow_input_file_1 = 'pgbf3.gfs.2017060418'
    lead_1 = int('3') * 3600
    lead_2 = int('18') * 3600
    lead_3 = int('219') * 3600
    valid_2 = datetime.datetime.strptime('2017062000', '%Y%m%d%H')
    valid_1 = valid_3 = datetime.datetime.strptime('2017060418', '%Y%m%d%H')
    templ = 'pgbf{lead?fmt=%H}.gfs.{valid?fmt=%Y%m%d%H}'
    ss_1 = StringSub(logger, templ, valid=valid_1, lead=lead_1)
    ss_2 = StringSub(logger, templ, valid=valid_2, lead=lead_2)
    ss_3 = StringSub(logger, templ, valid=valid_3, lead=lead_3)
    crow_1_output = ss_1.doStringSub()
    crow_2_output = ss_2.doStringSub()
    crow_3_output = ss_3.doStringSub()
    # print("crow_1 output: ", crow_1_output)
    # print("crow_2 output: ", crow_2_output)
    # print("crow_3 output: ", crow_3_output)
    assert (crow_1_output == crow_input_file_1
            and crow_2_output == crow_input_file_2
            and crow_3_output == crow_input_file_3)
def test_shift_time_negative():
    init_string = datetime.datetime.strptime("2017060400", '%Y%m%d%H')
    logger = logging.getLogger("testing")
    templ = "{init?fmt=%Y%m%d%H?shift=-86400}"
    expected_filename = "2017060300"
    ss = StringSub(logger, templ, init=init_string)
    filename = ss.doStringSub()
    assert (filename == expected_filename)
def test_h_lead_pad_2_digit_sub():
    logger = logging.getLogger("test")
    file_template = "{init?fmt=%Y%m%d%H}_A{lead?fmt=%.3H}h"
    init_time = datetime.datetime.strptime("1987020103", '%Y%m%d%H')
    lead_time = int("3") * 3600
    fSts = StringSub(logger, file_template, init=init_time, lead=lead_time)
    out_string = fSts.doStringSub()
    assert (out_string == "1987020103_A003h")
def test_offset_hour():
    logger = logging.getLogger("dummy")
    expected_hour = "03"
    offset = 10800
    templ = "{offset?fmt=%2H}"
    ss = StringSub(logger, templ, offset=offset)
    offset_hour = ss.doStringSub()
    assert (offset_hour == expected_hour)
def test_multiple_valid_substitution_init():
    init_string = datetime.datetime.strptime("2017060400", '%Y%m%d%H')
    lead_string = 0
    logger = logging.getLogger("testing")
    templ = "{init?fmt=%Y%m%d%H}/gfs.t{init?fmt=%H}z.pgrb2.0p25.f{lead?fmt=%.2H}"
    expected_filename = "2017060400/gfs.t00z.pgrb2.0p25.f00"
    ss = StringSub(logger, templ, init=init_string, lead=lead_string)
    filename = ss.doStringSub()
    assert (filename == expected_filename)
def test_shift_time_lead_negative():
    init_string = datetime.datetime.strptime("2019020700", '%Y%m%d%H')
    lead_string = int("60") * 3600
    logger = logging.getLogger("testing")
    templ = "dwd_{init?fmt=%Y%m%d%H}_{lead?fmt=%.3H?shift=-86400}_{lead?fmt=%.3H}"
    expected_filename = "dwd_2019020700_036_060"
    ss = StringSub(logger, templ, init=init_string, lead=lead_string)
    filename = ss.doStringSub()
    assert (filename == expected_filename)
def test_cycle_hour():
    cycle_string = 0
    valid_string = datetime.datetime.strptime("20180103", '%Y%m%d')
    logger = logging.getLogger("dummy")
    templ = "prefix.{valid?fmt=%Y%m%d}.tm{cycle?fmt=%2H}"
    expected_filename = "prefix.20180103.tm00"
    ss = StringSub(logger, templ, valid=valid_string, cycle=cycle_string)
    filename = ss.doStringSub()
    assert (filename == expected_filename)
def test_multiple_valid_substitution_init_complex():
    init_string = datetime.datetime.strptime("2016061018", '%Y%m%d%H')
    lead_string = int("6") * 3600
    logger = logging.getLogger("testing")
    templ = "ncar.ral.CoSPA.HRRR.{init?fmt=%Y-%m-%dT%H:%M:%S}.PT{lead?fmt=%.2H}:00.nc"
    expected_filename = "ncar.ral.CoSPA.HRRR.2016-06-10T18:00:00.PT06:00.nc"
    ss = StringSub(logger, templ, init=init_string, lead=lead_string)
    filename = ss.doStringSub()
    assert (filename == expected_filename)
def test_multiple_valid_substitution_valid():
    valid_string = datetime.datetime.strptime("2018020112", '%Y%m%d%H')
    lead_string = int("123") * 3600
    logger = logging.getLogger("testing")
    templ = "{valid?fmt=%Y%m%d%H}/gfs.t{valid?fmt=%H}.pgrb2.0p25.{lead?fmt=%HHH}"
    expected_filename = "2018020112/gfs.t12.pgrb2.0p25.123"
    ss = StringSub(logger, templ, valid=valid_string, lead=lead_string)
    filename = ss.doStringSub()
    assert (filename == expected_filename)
def test_gdas_substitution():
    # Test that the string template substitution works correctly for GDAS
    # prepbufr files, which do not make use of the cycle hour or the offset
    # to generate the valid time.
    valid_string = "2018010411"
    valid_obj = datetime.datetime.strptime(valid_string, '%Y%m%d%H')
    logger = logging.getLogger("testing")
    templ = "prepbufr.gdas.{valid?fmt=%Y%m%d%H}.nc"
    expected_filename = 'prepbufr.gdas.' + valid_string + '.nc'
    ss = StringSub(logger, templ, valid=valid_obj)
    filename = ss.doStringSub()
    assert (filename == expected_filename)
def test_ccpa_template():
    passed = True
    valid_string = datetime.datetime.strptime("2019022403", '%Y%m%d%H')
    lead_string = 10800
    logger = logging.getLogger("testing")
    templ = "ccpa.{valid?fmt=%Y%m%d}/06/ccpa.t{valid?fmt=%H}z.{lead?fmt=%.2H}h.hrap.conus.gb2"
    expected_filename = "ccpa.20190224/06/ccpa.t03z.03h.hrap.conus.gb2"
    ss = StringSub(logger, templ, valid=valid_string, lead=lead_string)
    filename = ss.doStringSub()
    if filename != expected_filename:
        passed = False

    valid_string = datetime.datetime.strptime("2019022406", '%Y%m%d%H')
    lead_string = int("6") * 3600
    expected_filename = "ccpa.20190224/06/ccpa.t06z.06h.hrap.conus.gb2"
    ss = StringSub(logger, templ, valid=valid_string, lead=lead_string)
    filename = ss.doStringSub()
    if filename == expected_filename:
        passed = False

    return passed
def test_ym_date_dir():
    # Test that the ym directory can be read in and does substitution correctly
    logger = logging.getLogger("test")
    # e.g. /d1/METplus_TC/adeck_orig/201708/atcfunix.gfs.2017080100
    date_str = '201708'
    templ = '/d1/METplus_TC/adeck_orig/{date?fmt=%s}/' \
            'atcfunix.gfs.2017080100.dat'
    ss = StringSub(logger, templ, date=date_str)
    filename = ss.doStringSub()
    expected_filename = '/d1/METplus_TC/adeck_orig/201708/' \
                        'atcfunix.gfs.2017080100.dat'
    assert filename == expected_filename
Esempio n. 13
0
    def generate_output_nc_filename(self, prepbufr_file_info):
        """! Create the output netCDF filename as specified in the use
        case/custom configuration file.
             Args:
                 @param prepbufr_file_info - a list of the full filepaths of
                                             prepbufr data of interest.
             Returns:
                 a tuple:
                 nc_output_filepath - the full filepath
                 nc_output_filename - the filename follows the format
                                      specified in the configuration file
        """
        # pylint:disable=protected-access
        # Need to call sys.__getframe() to get the filename and method/func
        # for logging information.
        cur_filename = sys._getframe().f_code.co_filename
        cur_function = sys._getframe().f_code.co_name

        self.logger.debug('DEBUG:|' + cur_function + '|' + cur_filename +
                          ' Generating output NetCDF file name...')

        # Get the output directory
        pb2nc_output_dir = self.pb_dict['PB2NC_OUTPUT_DIR']

        # Get the cycle hour and offset hour from the prepbufr file info named
        # tuple
        if prepbufr_file_info.cycle:
            # Get the cycle hour, offset hour and add the appropriate
            # prefix, validation ymd and .nc extension
            cycle = prepbufr_file_info.cycle
            offset = prepbufr_file_info.offset
            date = prepbufr_file_info.date

            string_sub = StringSub(self.logger,
                                   self.pb_dict['NC_FILE_TMPL'],
                                   init=str(date),
                                   cycle=cycle,
                                   offset=offset)
            nc_output_filename = string_sub.doStringSub()
            nc_output_filepath = os.path.join(pb2nc_output_dir,
                                              nc_output_filename)

        else:
            # Typically for files that aren't separated into dated
            # subdirectories, the date is incorporated in the filename.
            # Append the input file name with .nc extension
            # extract the filename portion of the full_filepath
            filename = os.path.basename(prepbufr_file_info.full_filepath)
            nc_output_filename = filename + ".nc"
            nc_output_filepath = os.path.join(pb2nc_output_dir,
                                              nc_output_filename)
        return nc_output_filepath
def test_ymd_date_dir():
    # Test that the ymd directory can be read in and does substitution correctly
    logger = logging.getLogger("test")
    # e.g. /d1/METplus_TC/adeck_orig/20170811/atcfunix.gfs.2017080100
    init_str = datetime.datetime.strptime('2017081118', '%Y%m%d%H')
    date_str = '20170811'
    templ = '/d1/METplus_TC/adeck_orig/{date?fmt=%s}/atcfunix.gfs.' \
            '{init?fmt=%Y%m%d%H}.dat'
    ss = StringSub(logger, templ, date=date_str, init=init_str)
    filename = ss.doStringSub()
    expected_filename = '/d1/METplus_TC/adeck_orig/20170811/' \
                        'atcfunix.gfs.2017081118.dat'
    assert filename == expected_filename
Esempio n. 15
0
def test_nam_substitution_HHH(key, value):
    # Test that the substitution works correctly when given an init time,
    # cycle hour, and negative offset hour.
    init_string = "20180102"
    cycle_string = key
    offset_string = '03'
    expected_filename = value
    logger = logging.getLogger("test")
    templ = \
        'prepbufr.nam.{valid?fmt=%Y%m%d%H}.t{cycle?fmt=%HHH}z.tm{' \
        'offset?fmt=%HH}.nc'
    ss = StringSub(logger,
                   templ,
                   init=init_string,
                   cycle=cycle_string,
                   offset=offset_string)
    filename = ss.doStringSub()
    print('nam filename: ', filename)
    assert (filename == expected_filename)
def test_nam_substitution_HH(key, value):
    pytest.skip('time offsets no longer computed in StringSub')
    # Test that the substitution works correctly when given an init time,
    # cycle hour, and negative offset hour.
    init_string = datetime.datetime.strptime("20180102", '%Y%m%d')
    cycle_string = key
    offset_string = 10800  #'03'
    expected_filename = value
    logger = logging.getLogger("test")
    templ = \
        'prepbufr.nam.{valid?fmt=%Y%m%d%H}.t{cycle?fmt=%HH}z.tm{' \
        'offset?fmt=%HH}.nc'
    ss = StringSub(logger,
                   templ,
                   init=init_string,
                   cycle=cycle_string,
                   offset=offset_string)
    filename = ss.doStringSub()
    # print('nam filename: ', filename)
    assert (filename == expected_filename)
def test_ymd_region_cyclone():
    # Test that we can recreate the full file path with a date,
    # region, and cyclone
    logger = logging.getLogger("test")
    # /d1/METplus_TC/bdeck_orig/20170811/bal052017.dat
    date_str = '201708'
    region_str = 'al'
    cyclone_str = '05'
    year_str = '2017'
    # templ = '/d1/METplus_TC/bdeck/{date?fmt=%Y%m}/bal{region?fmt=%s}.dat'
    templ = '/d1/METplus_TC/bdeck/{date?fmt=%s}/b{region?fmt=%s}' \
            '{cyclone?fmt=%s}{misc?fmt=%s}.dat'
    ss = StringSub(logger,
                   templ,
                   date=date_str,
                   region=region_str,
                   cyclone=cyclone_str,
                   misc=year_str)
    full_file = ss.doStringSub()
    expected_full_file = '/d1/METplus_TC/bdeck/201708/bal052017.dat'
    assert full_file == expected_full_file
Esempio n. 18
0
def retrieve_and_regrid(tmp_filename, cur_init, cur_storm, out_dir, logger,
                        config):
    """! Retrieves the data from the MODEL_DATA_DIR (defined in metplus.conf)
         that corresponds to the storms defined in the tmp_filename:
        1) create the analysis tile and forecast file names from the
           tmp_filename file.
        2) perform regridding via MET tool (regrid_data_plane) and store
           results (netCDF files) in the out_dir or via
           Regridding via  regrid_data_plane on the forecast and analysis
           files via a latlon string with the following format:
                latlon Nx Ny lat_ll lon_ll delta_lat delta_lon
                NOTE:  these values are defined in the extract_tiles_parm
                parameter/config file as NLAT, NLON.
        Args:
        @param tmp_filename:   Filename of the temporary filter file in
                               the /tmp directory. Contains rows
                               of data corresponding to a storm id of varying
                               times.
        @param cur_init:       The current init time
        @param cur_storm:      The current storm
        @param out_dir:  The directory where regridded netCDF or grib2 output
                         is saved depending on which regridding methodology is
                         requested.  If the MET tool regrid_data_plane is
                         requested, then netCDF data is produced.  If wgrib2
                         is requested, then grib2 data is produced.
        @param logger:  The name of the logger used in logging.
        @param config:  config instance
        Returns:
           None
    """

    # pylint: disable=protected-access
    # Need to call sys._getframe() to get current function and file for
    # logging information.
    # pylint: disable=too-many-arguments
    # all input is needed to perform task

    # For logging
    cur_filename = sys._getframe().f_code.co_filename
    cur_function = sys._getframe().f_code.co_name

    # Get variables, etc. from param/config file.
    model_data_dir = config.getdir('MODEL_DATA_DIR')
    metplus_base = config.getdir('MET_BUILD_BASE')
    regrid_data_plane_exe = os.path.join(metplus_base, 'bin/regrid_data_plane')
    # regrid_data_plane_exe = config.getexe('REGRID_DATA_PLANE_EXE')
    wgrib2_exe = config.getexe('WGRIB2')
    egrep_exe = config.getexe('EGREP_EXE')
    regrid_with_met_tool = config.getbool('config', 'REGRID_USING_MET_TOOL')
    overwrite_flag = config.getbool('config', 'OVERWRITE_TRACK')

    # Extract the columns of interest: init time, lead time,
    # valid time lat and lon of both tropical cyclone tracks, etc.
    # Then calculate the forecast hour and other things.
    with open(tmp_filename, "r") as tf:
        # read header
        header = tf.readline().split()
        # get column number for columns on interest
        # print('header{}:'.format(header))
        header_colnum_init, header_colnum_lead, header_colnum_valid = \
            header.index('INIT'), header.index('LEAD'), header.index('VALID')
        header_colnum_alat, header_colnum_alon =\
            header.index('ALAT'), header.index('ALON')
        header_colnum_blat, header_colnum_blon = \
            header.index('BLAT'), header.index('BLON')
        for line in tf:
            col = line.split()
            init, lead, valid, alat, alon, blat, blon = \
                col[header_colnum_init], col[header_colnum_lead], \
                col[header_colnum_valid], col[header_colnum_alat], \
                col[header_colnum_alon], col[header_colnum_blat], \
                col[header_colnum_blon]

            # integer division for both Python 2 and 3
            lead_time = int(lead)
            fcst_hr = lead_time // 10000

            init_ymd_match = re.match(r'[0-9]{8}', init)
            if init_ymd_match:
                init_ymd = init_ymd_match.group(0)
            else:
                logger.WARN("RuntimeError raised")
                raise RuntimeError('init time has unexpected format for YMD')

            init_ymdh_match = re.match(r'[0-9|_]{11}', init)
            if init_ymdh_match:
                init_ymdh = init_ymdh_match.group(0)
            else:
                logger.WARN("RuntimeError raised")

            valid_ymd_match = re.match(r'[0-9]{8}', valid)
            if valid_ymd_match:
                valid_ymd = valid_ymd_match.group(0)
            else:
                logger.WARN("RuntimeError raised")

            valid_ymdh_match = re.match(r'[0-9|_]{11}', valid)
            if valid_ymdh_match:
                valid_ymdh = valid_ymdh_match.group(0)
            else:
                logger.WARN("RuntimeError raised")

            lead_str = str(fcst_hr).zfill(3)
            fcst_dir = os.path.join(model_data_dir, init_ymd)
            init_ymdh_split = init_ymdh.split("_")
            init_yyyymmddhh = "".join(init_ymdh_split)
            anly_dir = os.path.join(model_data_dir, valid_ymd)
            valid_ymdh_split = valid_ymdh.split("_")
            valid_yyyymmddhh = "".join(valid_ymdh_split)

            # Create output filenames for regridding
            # wgrib2 used to regrid.
            # Create the filename for the regridded file, which is a
            # grib2 file.
            fcst_sts = \
                StringSub(logger, config.getraw('filename_templates',
                                                'GFS_FCST_FILE_TMPL'),
                          init=init_yyyymmddhh, lead=lead_str)

            anly_sts = \
                StringSub(logger, config.getraw('filename_templates',
                                                'GFS_ANLY_FILE_TMPL'),
                          valid=valid_yyyymmddhh, lead=lead_str)

            fcst_file = fcst_sts.doStringSub()
            fcst_filename = os.path.join(fcst_dir, fcst_file)
            anly_file = anly_sts.doStringSub()
            anly_filename = os.path.join(anly_dir, anly_file)

            # Check if the forecast input file exists. If it doesn't
            # exist, just log it
            if file_exists(fcst_filename):
                msg = ("INFO| [" + cur_filename + ":" + cur_function +
                       " ] | Forecast file: " + fcst_filename)
                logger.debug(msg)
            else:
                msg = ("WARNING| [" + cur_filename + ":" + cur_function +
                       " ] | " +
                       "Can't find forecast file, continuing anyway: " +
                       fcst_filename)
                logger.debug(msg)
                continue

            # Check if the analysis input file exists. If it doesn't
            # exist, just log it.
            if file_exists(anly_filename):
                msg = ("INFO| [" + cur_filename + ":" + cur_function +
                       " ] | Analysis file: " + anly_filename)
                logger.debug(msg)

            else:
                msg = ("WARNING| [" + cur_filename + ":" + cur_function +
                       " ] | " +
                       "Can't find analysis file, continuing anyway: " +
                       anly_filename)
                logger.debug(msg)
                continue

            # Create the arguments used to perform regridding.
            # NOTE: the base name
            # is the same for both the fcst and anly filenames,
            # so use either one to derive the base name that will be used to
            # create the fcst_regridded_filename and anly_regridded_filename.
            fcst_anly_base = os.path.basename(fcst_filename)

            fcst_grid_spec = create_grid_specification_string(
                alat, alon, logger, config)
            anly_grid_spec = create_grid_specification_string(
                blat, blon, logger, config)
            if regrid_with_met_tool:
                nc_fcst_anly_base = re.sub("grb2", "nc", fcst_anly_base)
                fcst_anly_base = nc_fcst_anly_base

            tile_dir = os.path.join(out_dir, cur_init, cur_storm)
            fcst_hr_str = str(fcst_hr).zfill(3)

            fcst_regridded_filename = \
                config.getstr('regex_pattern', 'FCST_TILE_PREFIX') + \
                fcst_hr_str + "_" + fcst_anly_base
            fcst_regridded_file = os.path.join(tile_dir,
                                               fcst_regridded_filename)
            anly_regridded_filename = \
                config.getstr('regex_pattern', 'ANLY_TILE_PREFIX') +\
                fcst_hr_str + "_" + fcst_anly_base
            anly_regridded_file = os.path.join(tile_dir,
                                               anly_regridded_filename)

            # Regrid the fcst file only if a fcst tile
            # file does NOT already exist or if the overwrite flag is True.
            # Create new gridded file for fcst tile
            if file_exists(fcst_regridded_file) and not overwrite_flag:
                msg = ("INFO| [" + cur_filename + ":" + cur_function +
                       " ] | Forecast tile file " + fcst_regridded_file +
                       " exists, skip regridding")
                logger.debug(msg)
            else:
                # Perform fcst regridding on the records of interest
                var_level_string = retrieve_var_info(config, logger)
                if regrid_with_met_tool:
                    # Perform regridding using MET Tool regrid_data_plane
                    fcst_cmd_list = [
                        regrid_data_plane_exe, ' ', fcst_filename, ' ',
                        fcst_grid_spec, ' ', fcst_regridded_file, ' ',
                        var_level_string, ' -method NEAREST '
                    ]
                    regrid_cmd_fcst = ''.join(fcst_cmd_list)
                    regrid_cmd_fcst = \
                        batchexe('sh')['-c', regrid_cmd_fcst].err2out()
                    msg = ("INFO|[regrid]| regrid_data_plane regrid command:" +
                           regrid_cmd_fcst.to_shell())
                    logger.debug(msg)
                    run(regrid_cmd_fcst)

                else:
                    # Perform regridding via wgrib2
                    requested_records = retrieve_var_info(config, logger)
                    fcst_cmd_list = [
                        wgrib2_exe, ' ', fcst_filename, ' | ', egrep_exe, ' ',
                        requested_records, '|', wgrib2_exe, ' -i ',
                        fcst_filename, ' -new_grid ', fcst_grid_spec, ' ',
                        fcst_regridded_file
                    ]
                    wgrb_cmd_fcst = ''.join(fcst_cmd_list)
                    wgrb_cmd_fcst = \
                        batchexe('sh')['-c', wgrb_cmd_fcst].err2out()
                    msg = ("INFO|[wgrib2]| wgrib2 regrid command:" +
                           wgrb_cmd_fcst.to_shell())
                    logger.debug(msg)
                    run(wgrb_cmd_fcst)

            # Create new gridded file for anly tile
            if file_exists(anly_regridded_file) and not overwrite_flag:
                logger.debug("INFO| [" + cur_filename + ":" + cur_function +
                             " ] |" + " Analysis tile file: " +
                             anly_regridded_file + " exists, skip regridding")
            else:
                # Perform anly regridding on the records of interest
                var_level_string = retrieve_var_info(config, logger)
                if regrid_with_met_tool:
                    anly_cmd_list = [
                        regrid_data_plane_exe, ' ', anly_filename, ' ',
                        anly_grid_spec, ' ', anly_regridded_file, ' ',
                        var_level_string, ' ', ' -method NEAREST '
                    ]
                    regrid_cmd_anly = ''.join(anly_cmd_list)
                    regrid_cmd_anly = \
                        batchexe('sh')['-c', regrid_cmd_anly].err2out()
                    run(regrid_cmd_anly)
                    msg = ("INFO|[regrid]| on anly file:" +
                           anly_regridded_file)
                    logger.debug(msg)
                else:
                    # Regridding via wgrib2.
                    requested_records = retrieve_var_info(config, logger)
                    anly_cmd_list = [
                        wgrib2_exe, ' ', anly_filename, ' | ', egrep_exe, ' ',
                        requested_records, '|', wgrib2_exe, ' -i ',
                        anly_filename, ' -new_grid ', anly_grid_spec, ' ',
                        anly_regridded_file
                    ]
                    wgrb_cmd_anly = ''.join(anly_cmd_list)
                    wgrb_cmd_anly = \
                        batchexe('sh')['-c', wgrb_cmd_anly].err2out()
                    msg = ("INFO|[wgrib2]| Regridding via wgrib2:" +
                           wgrb_cmd_anly.to_shell())
                    run(wgrb_cmd_anly)
                    logger.debug(msg)