예제 #1
0
def main(grb_ws,
         ancillary_ws,
         output_ws,
         etr_flag=False,
         eto_flag=False,
         scene_list_path=None,
         start_dt=None,
         end_dt=None,
         times_str='',
         extent_path=None,
         output_extent=None,
         daily_flag=True,
         stats_flag=True,
         overwrite_flag=False):
    """Compute hourly ETr/ETo from NLDAS data

    Parameters
    ----------
    grb_ws : str
        Folder of NLDAS GRB files.
    ancillary_ws : str
        Folder of ancillary rasters.
    output_ws : str
        Folder of output rasters.
    etr_flag : bool, optional
        If True, compute alfalfa reference ET (ETr).
    eto_flag : bool, optional
        If True, compute grass reference ET (ETo).
    scene_list_path : str, optional
        Landsat scene keep list file path.
    start_date : str, optional
        ISO format date (YYYY-MM-DD).
    end_date : str, optional
        ISO format date (YYYY-MM-DD).
    times : str, optional
        Comma separated values and/or ranges of UTC hours (i.e. "1, 2, 5-8").
        Parsed with python_common.parse_int_set().
    extent_path : str, optional
        File path defining the output extent.
    output_extent : list, optional
        Decimal degrees values defining output extent.
    daily_flag : bool, optional
        If True, save daily ETr/ETo sum raster (the default is True).
    stats_flag : bool, optional
        If True, compute raster statistics (the default is True).
    overwrite_flag : bool, optional
        If True, overwrite existing files (the default is False).

    Returns
    -------
    None

    """
    logging.info('\nComputing NLDAS hourly ETr/ETo')
    np.seterr(invalid='ignore')

    # Compute ETr and/or ETo
    if not etr_flag and not eto_flag:
        logging.info('  ETo/ETr flag(s) not set, defaulting to ETr')
        etr_flag = True

    # Only process a specific hours
    if not times_str:
        time_list = range(0, 24, 1)
    else:
        time_list = list(_utils.parse_int_set(times_str))
    time_list = ['{:02d}00'.format(t) for t in time_list]

    etr_folder = 'etr'
    eto_folder = 'eto'
    hour_fmt = '{}_{:04d}{:02d}{:02d}_hourly_nldas.img'
    # hour_fmt = '{}_{:04d}{:02d}{:02d}_{4:04d}_nldas.img'
    day_fmt = '{}_{:04d}{:02d}{:02d}_nldas.img'
    # input_fmt = 'NLDAS_FORA0125_H.A{:04d}{:02d}{:02d}.{}.002.grb'
    input_re = re.compile('NLDAS_FORA0125_H.A(?P<YEAR>\d{4})(?P<MONTH>\d{2})' +
                          '(?P<DAY>\d{2}).(?P<TIME>\d{4}).002.grb$')

    # # Landsat Collection 1 Product ID
    # landsat_re = re.compile(
    #     '^(?:LT04|LT05|LE07|LC08)_\w{4}_\d{3}\d{3}_(?P<DATE>\d{8})_'
    #     '\w{8}_\w{2}_\w{2}')

    # Landsat Custom Scene ID
    landsat_re = re.compile('^(?:LT04|LT05|LE07|LC08)_\d{6}_(?P<DATE>\d{8})')

    # Assume NLDAS is NAD83
    # input_epsg = 'EPSG:4269'

    # Ancillary raster paths
    mask_path = os.path.join(ancillary_ws, 'nldas_mask.img')
    elev_path = os.path.join(ancillary_ws, 'nldas_elev.img')
    lat_path = os.path.join(ancillary_ws, 'nldas_lat.img')
    lon_path = os.path.join(ancillary_ws, 'nldas_lon.img')

    # Process Landsat scene list and start/end input parameters
    if not scene_list_path and (not start_dt or not end_dt):
        logging.error(
            '\nERROR: A Landsat scene list or start/end dates must be set, '
            'exiting\n')
        return False
    if scene_list_path is not None and os.path.isfile(scene_list_path):
        # Build a date list from the Landsat scene keep list file
        logging.info('\nReading dates from scene keep list file')
        logging.info('  {}'.format(scene_list_path))
        with open(scene_list_path) as input_f:
            keep_list = input_f.readlines()
        date_list = sorted([
            dt.datetime.strptime(m.group('DATE'),
                                 '%Y%m%d').strftime('%Y-%m-%d')
            for image_id in keep_list for m in [landsat_re.match(image_id)]
            if m
        ])
        logging.debug('  {}'.format(', '.join(date_list)))
    else:
        date_list = []
    if start_dt and end_dt:
        logging.debug('  Start date: {}'.format(start_dt))
        logging.debug('  End date:   {}'.format(end_dt))
    else:
        start_dt = dt.datetime.strptime(date_list[0], '%Y-%m-%d')
        end_dt = dt.datetime.strptime(date_list[-1], '%Y-%m-%d')

    # This allows GDAL to throw Python Exceptions
    # gdal.UseExceptions()
    # mem_driver = gdal.GetDriverByName('MEM')

    # Get the NLDAS spatial reference from the mask raster
    nldas_ds = gdal.Open(mask_path)
    nldas_osr = drigo.raster_ds_osr(nldas_ds)
    nldas_proj = drigo.osr_proj(nldas_osr)
    nldas_cs = drigo.raster_ds_cellsize(nldas_ds, x_only=True)
    nldas_extent = drigo.raster_ds_extent(nldas_ds)
    nldas_geo = nldas_extent.geo(nldas_cs)
    nldas_x, nldas_y = nldas_extent.origin()
    nldas_ds = None
    logging.debug('  Projection: {}'.format(nldas_proj))
    logging.debug('  Cellsize: {}'.format(nldas_cs))
    logging.debug('  Geo: {}'.format(nldas_geo))
    logging.debug('  Extent: {}'.format(nldas_extent))

    # Subset data to a smaller extent
    if output_extent is not None:
        logging.info('\nComputing subset extent & geo')
        logging.debug('  Extent: {}'.format(output_extent))
        nldas_extent = drigo.Extent(output_extent)
        nldas_extent.adjust_to_snap('EXPAND', nldas_x, nldas_y, nldas_cs)
        nldas_geo = nldas_extent.geo(nldas_cs)
        logging.debug('  Geo: {}'.format(nldas_geo))
        logging.debug('  Extent: {}'.format(output_extent))
    elif extent_path is not None:
        logging.info('\nComputing subset extent & geo')
        if not os.path.isfile(extent_path):
            logging.error('\nThe extent object does not exist, exiting\n'
                          '  {}'.format(extent_path))
            return False
        elif extent_path.lower().endswith('.shp'):
            nldas_extent = drigo.feature_path_extent(extent_path)
            extent_osr = drigo.feature_path_osr(extent_path)
            extent_cs = None
        else:
            nldas_extent = drigo.raster_path_extent(extent_path)
            extent_osr = drigo.raster_path_osr(extent_path)
            extent_cs = drigo.raster_path_cellsize(extent_path, x_only=True)
        nldas_extent = drigo.project_extent(nldas_extent, extent_osr,
                                            nldas_osr, extent_cs)
        nldas_extent.adjust_to_snap('EXPAND', nldas_x, nldas_y, nldas_cs)
        nldas_geo = nldas_extent.geo(nldas_cs)
        logging.debug('  Geo: {}'.format(nldas_geo))
        logging.debug('  Extent: {}'.format(nldas_extent))
    logging.debug('')

    # Read the NLDAS mask array if present
    if mask_path and os.path.isfile(mask_path):
        mask_array, mask_nodata = drigo.raster_to_array(
            mask_path,
            mask_extent=nldas_extent,
            fill_value=0,
            return_nodata=True)
        mask_array = mask_array != mask_nodata
    else:
        mask_array = None

    # Read ancillary arrays (or subsets?)
    elev_array = drigo.raster_to_array(elev_path,
                                       mask_extent=nldas_extent,
                                       return_nodata=False)
    # pair_array = et_common.air_pressure_func(elev_array)
    lat_array = drigo.raster_to_array(lat_path,
                                      mask_extent=nldas_extent,
                                      return_nodata=False)
    lon_array = drigo.raster_to_array(lon_path,
                                      mask_extent=nldas_extent,
                                      return_nodata=False)

    # Hourly RefET functions expects lat/lon in radians
    lat_array *= (math.pi / 180)
    lon_array *= (math.pi / 180)

    # Build output folder
    etr_ws = os.path.join(output_ws, etr_folder)
    eto_ws = os.path.join(output_ws, eto_folder)
    if etr_flag and not os.path.isdir(etr_ws):
        os.makedirs(etr_ws)
    if eto_flag and not os.path.isdir(eto_ws):
        os.makedirs(eto_ws)

    # DEADBEEF - Instead of processing all available files, the following
    #   code will process files for target dates
    # for input_dt in date_range(start_dt, end_dt + dt.timedelta(1)):
    #     logging.info(input_dt.date())

    # Iterate all available files and check dates if necessary
    # Each sub folder in the main folder has all imagery for 1 day
    #   (in UTC time)
    # The path for each subfolder is the /YYYY/DOY
    errors = defaultdict(list)
    for root, folders, files in os.walk(grb_ws):
        root_split = os.path.normpath(root).split(os.sep)

        # If the year/doy is outside the range, skip
        if (re.match('\d{4}', root_split[-2])
                and re.match('\d{3}', root_split[-1])):
            root_dt = dt.datetime.strptime(
                '{}_{}'.format(root_split[-2], root_split[-1]), '%Y_%j')
            logging.info('{}'.format(root_dt.date()))
            if ((start_dt is not None and root_dt < start_dt)
                    or (end_dt is not None and root_dt > end_dt)):
                continue
            elif date_list and root_dt.date().isoformat() not in date_list:
                continue
        # If the year is outside the range, don't search subfolders
        elif re.match('\d{4}', root_split[-1]):
            root_year = int(root_split[-1])
            logging.info('Year: {}'.format(root_year))
            if ((start_dt is not None and root_year < start_dt.year)
                    or (end_dt is not None and root_year > end_dt.year)):
                folders[:] = []
            else:
                folders[:] = sorted(folders)
            continue
        else:
            continue
        logging.debug('  {}'.format(root))

        # Start off assuming every file needs to be processed
        day_skip_flag = False

        # Build output folders if necessary
        etr_year_ws = os.path.join(etr_ws, str(root_dt.year))
        eto_year_ws = os.path.join(eto_ws, str(root_dt.year))
        if etr_flag and not os.path.isdir(etr_year_ws):
            os.makedirs(etr_year_ws)
        if eto_flag and not os.path.isdir(eto_year_ws):
            os.makedirs(eto_year_ws)

        # Build daily total paths
        etr_day_path = os.path.join(
            etr_year_ws,
            day_fmt.format('etr', root_dt.year, root_dt.month, root_dt.day))
        eto_day_path = os.path.join(
            eto_year_ws,
            day_fmt.format('eto', root_dt.year, root_dt.month, root_dt.day))
        etr_hour_path = os.path.join(
            etr_year_ws,
            hour_fmt.format('etr', root_dt.year, root_dt.month, root_dt.day))
        eto_hour_path = os.path.join(
            eto_year_ws,
            hour_fmt.format('eto', root_dt.year, root_dt.month, root_dt.day))
        # logging.debug('  {}'.format(etr_hour_path))

        # If daily ETr/ETo files are present, day can be skipped
        if not overwrite_flag and daily_flag:
            if etr_flag and not os.path.isfile(etr_day_path):
                pass
            elif eto_flag and not os.path.isfile(eto_day_path):
                pass
            else:
                day_skip_flag = True

        # If the hour and daily files don't need to be made, skip the day
        if not overwrite_flag:
            if etr_flag and not os.path.isfile(etr_hour_path):
                pass
            elif eto_flag and not os.path.isfile(eto_hour_path):
                pass
            elif day_skip_flag:
                logging.debug('  File(s) already exist, skipping')
                continue

        # Create a single raster for each day with 24 bands
        # Each time step will be stored in a separate band
        if etr_flag:
            logging.debug('  {}'.format(etr_day_path))
            drigo.build_empty_raster(etr_hour_path,
                                     band_cnt=24,
                                     output_dtype=np.float32,
                                     output_proj=nldas_proj,
                                     output_cs=nldas_cs,
                                     output_extent=nldas_extent,
                                     output_fill_flag=True)
        if eto_flag:
            logging.debug('  {}'.format(eto_day_path))
            drigo.build_empty_raster(eto_hour_path,
                                     band_cnt=24,
                                     output_dtype=np.float32,
                                     output_proj=nldas_proj,
                                     output_cs=nldas_cs,
                                     output_extent=nldas_extent,
                                     output_fill_flag=True)

        # Sum all ETr/ETo images in each folder to generate a UTC day total
        etr_day_array = 0
        eto_day_array = 0

        # Process each hour file
        for input_name in sorted(files):
            logging.info('  {}'.format(input_name))
            input_match = input_re.match(input_name)
            if input_match is None:
                logging.debug('    Regular expression didn\'t match, skipping')
                continue
            input_dt = dt.datetime(int(input_match.group('YEAR')),
                                   int(input_match.group('MONTH')),
                                   int(input_match.group('DAY')))
            input_doy = int(input_dt.strftime('%j'))
            time_str = input_match.group('TIME')
            band_num = int(time_str[:2]) + 1
            # if start_dt is not None and input_dt < start_dt:
            #     continue
            # elif end_dt is not None and input_dt > end_dt:
            #     continue
            # elif date_list and input_dt.date().isoformat() not in date_list:
            #     continue
            if not daily_flag and time_str not in time_list:
                logging.debug('    Time not in list and not daily, skipping')
                continue

            input_path = os.path.join(root, input_name)
            logging.debug('    Time: {} {}'.format(input_dt.date(), time_str))
            logging.debug('    Band: {}'.format(band_num))

            # Determine band numbering/naming
            input_band_dict = grib_band_names(input_path)

            # Read input bands
            input_ds = gdal.Open(input_path)

            # Temperature should be in C for et_common.refet_hourly_func()
            if 'Temperature [K]' in input_band_dict.keys():
                temp_band_units = 'K'
                temp_array = drigo.raster_ds_to_array(
                    input_ds,
                    band=input_band_dict['Temperature [K]'],
                    mask_extent=nldas_extent,
                    return_nodata=False)
            elif 'Temperature [C]' in input_band_dict.keys():
                temp_band_units = 'C'
                temp_array = drigo.raster_ds_to_array(
                    input_ds,
                    band=input_band_dict['Temperature [C]'],
                    mask_extent=nldas_extent,
                    return_nodata=False)
            else:
                logging.error('Unknown Temperature units, skipping')
                logging.error('  {}'.format(input_band_dict.keys()))
                continue

            # DEADBEEF - Having issue with T appearing to be C but labeled as K
            # Try to determine temperature units from values
            temp_mean = float(np.nanmean(temp_array))
            temp_units_dict = {20: 'C', 293: 'K'}
            temp_array_units = temp_units_dict[min(
                temp_units_dict, key=lambda x: abs(x - temp_mean))]
            if temp_array_units == 'K' and temp_band_units == 'K':
                logging.debug('  Converting temperature from K to C')
                temp_array -= 273.15
            elif temp_array_units == 'C' and temp_band_units == 'C':
                pass
            elif temp_array_units == 'C' and temp_band_units == 'K':
                logging.debug((
                    '  Temperature units are K in the GRB band name, ' +
                    'but values appear to be C\n    Mean temperature: {:.2f}\n'
                    + '  Values will NOT be adjusted').format(temp_mean))
            elif temp_array_units == 'K' and temp_band_units == 'C':
                logging.debug((
                    '  Temperature units are C in the GRB band name, ' +
                    'but values appear to be K\n    Mean temperature: {:.2f}\n'
                    +
                    '  Values will be adjusted from K to C').format(temp_mean))
                temp_array -= 273.15
            try:
                sph_array = drigo.raster_ds_to_array(
                    input_ds,
                    band=input_band_dict['Specific humidity [kg/kg]'],
                    mask_extent=nldas_extent,
                    return_nodata=False)
                rs_array = drigo.raster_ds_to_array(
                    input_ds,
                    band=input_band_dict[
                        'Downward shortwave radiation flux [W/m^2]'],
                    mask_extent=nldas_extent,
                    return_nodata=False)
                wind_u_array = drigo.raster_ds_to_array(
                    input_ds,
                    band=input_band_dict['u-component of wind [m/s]'],
                    mask_extent=nldas_extent,
                    return_nodata=False)
                wind_v_array = drigo.raster_ds_to_array(
                    input_ds,
                    band=input_band_dict['v-component of wind [m/s]'],
                    mask_extent=nldas_extent,
                    return_nodata=False)
                input_ds = None
            except KeyError as e:
                errors[input_path].append(e)
                logging.error(' KeyError: {} Skipping: {}'.format(
                    e, input_ds.GetDescription()))
                continue

            rs_array *= 0.0036  # W m-2 to MJ m-2 hr-1
            wind_array = np.sqrt(wind_u_array**2 + wind_v_array**2)
            del wind_u_array, wind_v_array

            # Compute vapor pressure from specific humidity
            pair_array = refet.calcs._air_pressure(elev=elev_array)
            ea_array = refet.calcs._actual_vapor_pressure(q=sph_array,
                                                          pair=pair_array)

            refet_obj = refet.Hourly(tmean=temp_array,
                                     ea=ea_array,
                                     rs=rs_array,
                                     uz=wind_array,
                                     zw=10,
                                     elev=elev_array,
                                     lat=lat_array,
                                     lon=lon_array,
                                     doy=input_doy,
                                     time=int(time_str) / 100,
                                     method='asce')

            # ETr
            if etr_flag:
                etr_array = refet_obj.etr()
                if daily_flag:
                    etr_day_array += etr_array
                if time_str in time_list:
                    drigo.array_to_comp_raster(etr_array.astype(np.float32),
                                               etr_hour_path,
                                               band=band_num,
                                               stats_flag=False)
                    del etr_array

            # ETo
            if eto_flag:
                eto_array = refet_obj.eto()
                if eto_flag and daily_flag:
                    eto_day_array += eto_array
                if eto_flag and time_str in time_list:
                    drigo.array_to_comp_raster(eto_array.astype(np.float32),
                                               eto_hour_path,
                                               band=band_num,
                                               stats_flag=False)
                    del eto_array

            del temp_array, sph_array, rs_array, wind_array
            del pair_array, ea_array

        if stats_flag and etr_flag:
            drigo.raster_statistics(etr_hour_path)
        if stats_flag and eto_flag:
            drigo.raster_statistics(eto_hour_path)

        # Save the projected ETr/ETo as 32-bit floats
        if not day_skip_flag and daily_flag:
            if etr_flag:
                try:
                    drigo.array_to_raster(etr_day_array.astype(np.float32),
                                          etr_day_path,
                                          output_geo=nldas_geo,
                                          output_proj=nldas_proj,
                                          stats_flag=stats_flag)
                except AttributeError:
                    pass
            if eto_flag:
                try:

                    drigo.array_to_raster(eto_day_array.astype(np.float32),
                                          eto_day_path,
                                          output_geo=nldas_geo,
                                          output_proj=nldas_proj,
                                          stats_flag=stats_flag)
                except AttributeError:
                    pass

        del etr_day_array, eto_day_array

    if len(errors) > 0:
        logging.info('\nThe following errors were encountered:')
        for key, value in errors.items():
            logging.error(' Filepath: {}, error: {}'.format(key, value))

    logging.debug('\nScript Complete')
예제 #2
0
def main(grb_ws,
         ancillary_ws,
         output_ws,
         scene_list_path=None,
         start_dt=None,
         end_dt=None,
         times_str='',
         extent_path=None,
         output_extent=None,
         stats_flag=True,
         overwrite_flag=False):
    """Extract hourly NLDAS vapour pressure rasters

    Parameters
    ----------
    grb_ws : str
        Folder of NLDAS GRB files.
    ancillary_ws : str
        Folder of ancillary rasters.
    output_ws : str
        Folder of output rasters.
    scene_list_path : str, optional
        Landsat scene keep list file path.
    start_dt : datetime, optional
        Start date.
    end_dt : datetime, optional
        End date.
    times : str, optional
        Comma separated values and/or ranges of UTC hours (i.e. "1, 2, 5-8").
        Parsed with python_common.parse_int_set().
    extent_path : str, optional
        File path defining the output extent.
    output_extent : list, optional
        Decimal degrees values defining output extent.
    stats_flag : bool, optional
        If True, compute raster statistics (the default is True).
    overwrite_flag : bool, optional
        If True, overwrite existing files (the default is False).

    Returns
    -------
    None

    """
    logging.info('\nExtracting NLDAS vapour pressure rasters')

    # input_fmt = 'NLDAS_FORA0125_H.A{:04d}{:02d}{:02d}.{}.002.grb'
    input_re = re.compile('NLDAS_FORA0125_H.A(?P<YEAR>\d{4})(?P<MONTH>\d{2})' +
                          '(?P<DAY>\d{2}).(?P<TIME>\d{4}).002.grb$')

    # # Landsat Collection 1 Product ID
    # landsat_re = re.compile(
    #     '^(?:LT04|LT05|LE07|LC08)_\w{4}_\d{3}\d{3}_(?P<DATE>\d{8})_'
    #     '\w{8}_\w{2}_\w{2}')

    # Landsat Custom Scene ID
    landsat_re = re.compile('^(?:LT04|LT05|LE07|LC08)_\d{6}_(?P<DATE>\d{8})')

    output_folder = 'ea'
    output_fmt = 'ea_{:04d}{:02d}{:02d}_hourly_nldas.img'
    # output_fmt = 'ea_{:04d}{:02d}{:02d}_{:04d}_nldas.img'

    # Only process a specific hours
    if not times_str:
        time_list = range(0, 24, 1)
    else:
        time_list = list(_utils.parse_int_set(times_str))
    time_list = ['{:02d}00'.format(t) for t in time_list]

    # Assume NLDAS is NAD83
    # input_epsg = 'EPSG:4269'

    # Ancillary raster paths
    mask_path = os.path.join(ancillary_ws, 'nldas_mask.img')
    elev_path = os.path.join(ancillary_ws, 'nldas_elev.img')

    # Process Landsat scene list and start/end input parameters
    if not scene_list_path and (not start_dt or not end_dt):
        logging.error(
            '\nERROR: A Landsat scene list or start/end dates must be set, '
            'exiting\n')
        return False
    if scene_list_path is not None and os.path.isfile(scene_list_path):
        # Build a date list from the Landsat scene keep list file
        logging.info('\nReading dates from scene keep list file')
        logging.info('  {}'.format(scene_list_path))
        with open(scene_list_path) as input_f:
            keep_list = input_f.readlines()
        date_list = sorted([
            dt.datetime.strptime(m.group('DATE'),
                                 '%Y%m%d').strftime('%Y-%m-%d')
            for image_id in keep_list for m in [landsat_re.match(image_id)]
            if m
        ])
        logging.debug('  {}'.format(', '.join(date_list)))
    else:
        date_list = []
    if start_dt and end_dt:
        logging.debug('  Start date: {}'.format(start_dt))
        logging.debug('  End date:   {}'.format(end_dt))
    else:
        start_dt = dt.datetime.strptime(date_list[0], '%Y-%m-%d')
        end_dt = dt.datetime.strptime(date_list[-1], '%Y-%m-%d')

    # This allows GDAL to throw Python Exceptions
    # gdal.UseExceptions()
    # mem_driver = gdal.GetDriverByName('MEM')

    # Get the NLDAS spatial reference from the mask raster
    nldas_ds = gdal.Open(mask_path)
    nldas_osr = drigo.raster_ds_osr(nldas_ds)
    nldas_proj = drigo.osr_proj(nldas_osr)
    nldas_cs = drigo.raster_ds_cellsize(nldas_ds, x_only=True)
    nldas_extent = drigo.raster_ds_extent(nldas_ds)
    nldas_geo = nldas_extent.geo(nldas_cs)
    nldas_x, nldas_y = nldas_extent.origin()
    nldas_ds = None
    logging.debug('  Projection: {}'.format(nldas_proj))
    logging.debug('  Cellsize: {}'.format(nldas_cs))
    logging.debug('  Geo: {}'.format(nldas_geo))
    logging.debug('  Extent: {}'.format(nldas_extent))

    # Subset data to a smaller extent
    if output_extent is not None:
        logging.info('\nComputing subset extent & geo')
        logging.debug('  Extent: {}'.format(output_extent))
        nldas_extent = drigo.Extent(output_extent)
        nldas_extent.adjust_to_snap('EXPAND', nldas_x, nldas_y, nldas_cs)
        nldas_geo = nldas_extent.geo(nldas_cs)
        logging.debug('  Geo: {}'.format(nldas_geo))
        logging.debug('  Extent: {}'.format(output_extent))
    elif extent_path is not None:
        logging.info('\nComputing subset extent & geo')
        if not os.path.isfile(extent_path):
            logging.error('\nThe extent object does not exist, exiting\n'
                          '  {}'.format(extent_path))
            return False
        elif extent_path.lower().endswith('.shp'):
            nldas_extent = drigo.feature_path_extent(extent_path)
            extent_osr = drigo.feature_path_osr(extent_path)
            extent_cs = None
        else:
            nldas_extent = drigo.raster_path_extent(extent_path)
            extent_osr = drigo.raster_path_osr(extent_path)
            extent_cs = drigo.raster_path_cellsize(extent_path, x_only=True)
        nldas_extent = drigo.project_extent(nldas_extent, extent_osr,
                                            nldas_osr, extent_cs)
        nldas_extent.adjust_to_snap('EXPAND', nldas_x, nldas_y, nldas_cs)
        nldas_geo = nldas_extent.geo(nldas_cs)
        logging.debug('  Geo: {}'.format(nldas_geo))
        logging.debug('  Extent: {}'.format(nldas_extent))
    logging.debug('')

    # Read the NLDAS mask array if present
    if mask_path and os.path.isfile(mask_path):
        mask_array, mask_nodata = drigo.raster_to_array(
            mask_path,
            mask_extent=nldas_extent,
            fill_value=0,
            return_nodata=True)
        mask_array = mask_array != mask_nodata
    else:
        mask_array = None

    # Read elevation arrays (or subsets?)
    elev_array = drigo.raster_to_array(elev_path,
                                       mask_extent=nldas_extent,
                                       return_nodata=False)
    pair_array = refet.calcs._air_pressure(elev_array)

    # Build output folder
    var_ws = os.path.join(output_ws, output_folder)
    if not os.path.isdir(var_ws):
        os.makedirs(var_ws)

    # Each sub folder in the main folder has all imagery for 1 day
    # The path for each subfolder is the /YYYY/DOY

    # This approach will process files for target dates
    # for input_dt in date_range(start_dt, end_dt + dt.timedelta(1)):
    #     logging.info(input_dt.date())

    # Iterate all available files and check dates if necessary
    for root, folders, files in os.walk(grb_ws):
        root_split = os.path.normpath(root).split(os.sep)

        # If the year/doy is outside the range, skip
        if (re.match('\d{4}', root_split[-2])
                and re.match('\d{3}', root_split[-1])):
            root_dt = dt.datetime.strptime(
                '{}_{}'.format(root_split[-2], root_split[-1]), '%Y_%j')
            logging.info('{}'.format(root_dt.date()))
            if ((start_dt is not None and root_dt < start_dt)
                    or (end_dt is not None and root_dt > end_dt)):
                continue
            elif date_list and root_dt.date().isoformat() not in date_list:
                continue
        # If the year is outside the range, don't search subfolders
        elif re.match('\d{4}', root_split[-1]):
            root_year = int(root_split[-1])
            logging.info('Year: {}'.format(root_year))
            if ((start_dt is not None and root_year < start_dt.year)
                    or (end_dt is not None and root_year > end_dt.year)):
                folders[:] = []
            else:
                folders[:] = sorted(folders)
            continue
        else:
            continue

        # Create a single raster for each day with 24 bands
        # Each time step will be stored in a separate band
        output_name = output_fmt.format(root_dt.year, root_dt.month,
                                        root_dt.day)
        output_path = os.path.join(var_ws, str(root_dt.year), output_name)
        logging.debug('  {}'.format(output_path))
        if os.path.isfile(output_path):
            if not overwrite_flag:
                logging.debug('    File already exists, skipping')
                continue
            else:
                logging.debug('    File already exists, removing existing')
                os.remove(output_path)
        logging.debug('  {}'.format(root))
        if not os.path.isdir(os.path.dirname(output_path)):
            os.makedirs(os.path.dirname(output_path))
        drigo.build_empty_raster(output_path,
                                 band_cnt=24,
                                 output_dtype=np.float32,
                                 output_proj=nldas_proj,
                                 output_cs=nldas_cs,
                                 output_extent=nldas_extent,
                                 output_fill_flag=True)

        # Iterate through hourly files
        for input_name in sorted(files):
            logging.info('  {}'.format(input_name))
            input_path = os.path.join(root, input_name)
            input_match = input_re.match(input_name)
            if input_match is None:
                logging.debug('  Regular expression didn\'t match, skipping')
                continue
            input_dt = dt.datetime(int(input_match.group('YEAR')),
                                   int(input_match.group('MONTH')),
                                   int(input_match.group('DAY')))
            input_doy = int(input_dt.strftime('%j'))
            time_str = input_match.group('TIME')
            band_num = int(time_str[:2]) + 1
            # if start_dt is not None and input_dt < start_dt:
            #     continue
            # elif end_dt is not None and input_dt > end_dt:
            #     continue
            # elif date_list and input_dt.date().isoformat() not in date_list:
            #     continue
            if time_str not in time_list:
                logging.debug('    Time not in list, skipping')
                continue
            logging.debug('    Time: {} {}'.format(input_dt.date(), time_str))
            logging.debug('    Band: {}'.format(band_num))

            # Determine band numbering/naming
            input_band_dict = grib_band_names(input_path)

            # Compute vapour pressure from specific humidity
            input_ds = gdal.Open(input_path)
            sph_array = drigo.raster_ds_to_array(
                input_ds,
                band=input_band_dict['Specific humidity [kg/kg]'],
                mask_extent=nldas_extent,
                return_nodata=False)
            ea_array = refet.calcs._actual_vapor_pressure(q=sph_array,
                                                          pair=pair_array)
            # ea_array = (sph_array * pair_array) / (0.622 + 0.378 * sph_array)

            # Save the projected array as 32-bit floats
            drigo.array_to_comp_raster(ea_array.astype(np.float32),
                                       output_path,
                                       band=band_num)
            # drigo.block_to_raster(
            #     ea_array.astype(np.float32), output_path, band=band)
            # drigo.array_to_raster(
            #     ea_array.astype(np.float32), output_path,
            #     output_geo=nldas_geo, output_proj=nldas_proj,
            #     stats_flag=stats_flag)

            del sph_array
            input_ds = None

        if stats_flag:
            drigo.raster_statistics(output_path)

    logging.debug('\nScript Complete')
예제 #3
0
def main(grb_ws, ancillary_ws, output_ws, variables=['pr'],
         scene_list_path=None, start_dt=None, end_dt=None, times_str='',
         extent_path=None, output_extent=None,
         stats_flag=True, overwrite_flag=False):
    """Extract NLDAS target variable(s)

    Parameters
    ----------
    grb_ws : str
        Folder of NLDAS GRB files.
    ancillary_ws : str
        Folder of ancillary rasters.
    output_ws : str
        Folder of output rasters.
    variable : list
        NLDAS variables to download (the default is ['pr']).
        Choices: 'ppt', 'srad', 'sph', 'tair', tmmn', 'tmmx', 'vs'.
    keep_list_path : str, optional
        Landsat scene keep list file path.
    start_dt : datetime, optional
        Start date.
    end_dt : datetime, optional
        End date.
    times : str
        Comma separated values and/or ranges of UTC hours (i.e. "1, 2, 5-8").
        Parsed with python_common.parse_int_set().
    extent_path : str
        File path defining the output extent.
    output_extent : list
        Decimal degrees values defining output extent.
    stats_flag : bool, optional
        If True, compute raster statistics (the default is True).
    overwrite_flag : bool, optional
        If True, overwrite existing files (the default is False).

    Returns
    -------
    None

    """
    logging.info('\nExtract NLDAS target variable(s)')

    # input_fmt = 'NLDAS_FORA0125_H.A{:04d}{:02d}{:02d}.{}.002.grb'
    input_re = re.compile(
        'NLDAS_FORA0125_H.A(?P<YEAR>\d{4})(?P<MONTH>\d{2})' +
        '(?P<DAY>\d{2}).(?P<TIME>\d{4}).002.grb$')

    output_fmt = '{}_{:04d}{:02d}{:02d}_hourly_nldas.img'
    # output_fmt = '{}_{:04d}{:02d}{:02d}_{:04d}_nldas.img'

    # # Landsat Collection 1 Product ID
    # landsat_re = re.compile(
    #     '^(?:LT04|LT05|LE07|LC08)_\w{4}_\d{3}\d{3}_(?P<DATE>\d{8})_'
    #     '\w{8}_\w{2}_\w{2}')

    # Landsat Custom Scene ID
    landsat_re = re.compile(
        '^(?:LT04|LT05|LE07|LC08)_\d{6}_(?P<DATE>\d{8})')

    # Only process a specific hours
    if not times_str:
        time_list = range(0, 24, 1)
    else:
        time_list = list(_utils.parse_int_set(times_str))
    time_list = ['{:02d}00'.format(t) for t in time_list]

    # Assume NLDAS is NAD83
    # input_epsg = 'EPSG:4269'

    # NLDAS rasters to extract
    data_full_list = ['pr', 'srad', 'sph', 'tair', 'tmmn', 'tmmx', 'vs']
    if not variables:
        logging.error('\nERROR: variables parameter is empty\n')
        sys.exit()
    elif type(variables) is not list:
        # DEADBEEF - I could try converting comma separated strings to lists?
        logging.warning('\nERROR: variables parameter must be a list\n')
        sys.exit()
    elif not set(variables).issubset(set(data_full_list)):
        logging.error('\nERROR: variables parameter is invalid\n  {}'.format(
            variables))
        sys.exit()

    # Ancillary raster paths
    mask_path = os.path.join(ancillary_ws, 'nldas_mask.img')

    # Process Landsat scene list and start/end input parameters
    if not scene_list_path and (not start_dt or not end_dt):
        logging.error(
            '\nERROR: A Landsat scene list or start/end dates must be set, '
            'exiting\n')
        return False
    if scene_list_path is not None and os.path.isfile(scene_list_path):
        # Build a date list from the Landsat scene keep list file
        logging.info('\nReading dates from scene keep list file')
        logging.info('  {}'.format(scene_list_path))
        with open(scene_list_path) as input_f:
            keep_list = input_f.readlines()
        date_list = sorted([
            dt.datetime.strptime(m.group('DATE'), '%Y%m%d').strftime('%Y-%m-%d')
            for image_id in keep_list
            for m in [landsat_re.match(image_id)] if m])
        logging.debug('  {}'.format(', '.join(date_list)))
    else:
        date_list = []
    if start_dt and end_dt:
        logging.debug('  Start date: {}'.format(start_dt))
        logging.debug('  End date:   {}'.format(end_dt))
    else:
        start_dt = dt.datetime.strptime(date_list[0], '%Y-%m-%d')
        end_dt = dt.datetime.strptime(date_list[-1], '%Y-%m-%d')

    # This allows GDAL to throw Python Exceptions
    # gdal.UseExceptions()
    # mem_driver = gdal.GetDriverByName('MEM')

    # Get the NLDAS spatial reference from the mask raster
    nldas_ds = gdal.Open(mask_path)
    nldas_osr = drigo.raster_ds_osr(nldas_ds)
    nldas_proj = drigo.osr_proj(nldas_osr)
    nldas_cs = drigo.raster_ds_cellsize(nldas_ds, x_only=True)
    nldas_extent = drigo.raster_ds_extent(nldas_ds)
    nldas_geo = nldas_extent.geo(nldas_cs)
    nldas_x, nldas_y = nldas_extent.origin()
    nldas_ds = None
    logging.debug('  Projection: {}'.format(nldas_proj))
    logging.debug('  Cellsize: {}'.format(nldas_cs))
    logging.debug('  Geo: {}'.format(nldas_geo))
    logging.debug('  Extent: {}'.format(nldas_extent))

    # Subset data to a smaller extent
    if output_extent is not None:
        logging.info('\nComputing subset extent & geo')
        logging.debug('  Extent: {}'.format(output_extent))
        nldas_extent = drigo.Extent(output_extent)
        nldas_extent.adjust_to_snap('EXPAND', nldas_x, nldas_y, nldas_cs)
        nldas_geo = nldas_extent.geo(nldas_cs)
        logging.debug('  Geo: {}'.format(nldas_geo))
        logging.debug('  Extent: {}'.format(output_extent))
    elif extent_path is not None:
        logging.info('\nComputing subset extent & geo')
        if not os.path.isfile(extent_path):
            logging.error(
                '\nThe extent object does not exist, exiting\n'
                '  {}'.format(extent_path))
            return False
        elif extent_path.lower().endswith('.shp'):
            nldas_extent = drigo.feature_path_extent(extent_path)
            extent_osr = drigo.feature_path_osr(extent_path)
            extent_cs = None
        else:
            nldas_extent = drigo.raster_path_extent(extent_path)
            extent_osr = drigo.raster_path_osr(extent_path)
            extent_cs = drigo.raster_path_cellsize(extent_path, x_only=True)
        nldas_extent = drigo.project_extent(
            nldas_extent, extent_osr, nldas_osr, extent_cs)
        nldas_extent.adjust_to_snap('EXPAND', nldas_x, nldas_y, nldas_cs)
        nldas_geo = nldas_extent.geo(nldas_cs)
        logging.debug('  Geo: {}'.format(nldas_geo))
        logging.debug('  Extent: {}'.format(nldas_extent))
    logging.debug('')

    # Read the NLDAS mask array if present
    if mask_path and os.path.isfile(mask_path):
        mask_array, mask_nodata = drigo.raster_to_array(
            mask_path, mask_extent=nldas_extent, fill_value=0,
            return_nodata=True)
        mask_array = mask_array != mask_nodata
    else:
        mask_array = None

    # NLDAS band name dictionary
    nldas_band_dict = dict()
    nldas_band_dict['pr'] = 'Total precipitation [kg/m^2]'
    nldas_band_dict['srad'] = 'Downward shortwave radiation flux [W/m^2]'
    nldas_band_dict['sph'] = 'Specific humidity [kg/kg]'
    nldas_band_dict['tair'] = 'Temperature [C]'
    nldas_band_dict['tmmn'] = 'Temperature [C]'
    nldas_band_dict['tmmx'] = 'Temperature [C]'
    nldas_band_dict['vs'] = [
        'u-component of wind [m/s]', 'v-component of wind [m/s]']

    # NLDAS band name dictionary
    # nldas_band_dict = dict()
    # nldas_band_dict['pr'] = 'precipitation_amount'
    # nldas_band_dict['srad'] = 'surface_downwelling_shortwave_flux_in_air'
    # nldas_band_dict['sph'] = 'specific_humidity'
    # nldas_band_dict['tmmn'] = 'air_temperature'
    # nldas_band_dict['tmmx'] = 'air_temperature'
    # nldas_band_dict['vs'] = 'wind_speed'

    # NLDAS band name dictionary (EarthEngine keys, GRID_ELEMENT values)
    # nldas_band_dict = dict()
    # nldas_band_dict['total_precipitation'] = 'Total precipitation [kg/m^2]'
    # nldas_band_dict['shortwave_radiation'] = 'Downward shortwave radiation flux [W/m^2]'
    # nldas_band_dict['specific_humidity'] = 'Specific humidity [kg/kg]'
    # nldas_band_dict['pressure'] = 'Pressure [Pa]'
    # nldas_band_dict['temperature'] = 'Temperature [C]'
    # nldas_band_dict['wind_u'] = 'u-component of wind [m/s]'
    # nldas_band_dict['wind_v'] = 'v-component of wind [m/s]'

    # Process each variable
    logging.info('\nReading NLDAS GRIBs')
    for input_var in variables:
        logging.info("Variable: {}".format(input_var))

        # Build output folder
        var_ws = os.path.join(output_ws, input_var)
        if not os.path.isdir(var_ws):
            os.makedirs(var_ws)

        # Each sub folder in the main folde has all imagery for 1 day
        # The path for each subfolder is the /YYYY/DOY

        # This approach will process files for target dates
        # for input_dt in date_range(start_dt, end_dt + dt.timedelta(1)):
        #     logging.info(input_dt.date())

        # Iterate all available files and check dates if necessary
        for root, folders, files in os.walk(grb_ws):
            root_split = os.path.normpath(root).split(os.sep)

            # If the year/doy is outside the range, skip
            if (re.match('\d{4}', root_split[-2]) and
                    re.match('\d{3}', root_split[-1])):
                root_dt = dt.datetime.strptime('{}_{}'.format(
                    root_split[-2], root_split[-1]), '%Y_%j')
                logging.info('{}-{:02d}-{:02d}'.format(
                    root_dt.year, root_dt.month, root_dt.day))
                if ((start_dt is not None and root_dt < start_dt) or
                        (end_dt is not None and root_dt > end_dt)):
                    continue
                elif date_list and root_dt.date().isoformat() not in date_list:
                    continue
            # If the year is outside the range, don't search subfolders
            elif re.match('\d{4}', root_split[-1]):
                root_year = int(root_split[-1])
                logging.info('Year: {}'.format(root_year))
                if ((start_dt is not None and root_year < start_dt.year) or
                        (end_dt is not None and root_year > end_dt.year)):
                    folders[:] = []
                else:
                    folders[:] = sorted(folders)
                continue
            else:
                continue

            # Create a single raster for each day with 24 bands
            # Each time step will be stored in a separate band
            output_name = output_fmt.format(
                input_var, root_dt.year, root_dt.month, root_dt.day)
            output_path = os.path.join(
                var_ws, str(root_dt.year), output_name)
            logging.debug('  {}'.format(output_path))
            if os.path.isfile(output_path):
                if not overwrite_flag:
                    logging.debug('    File already exists, skipping')
                    continue
                else:
                    logging.debug('    File already exists, removing existing')
                    os.remove(output_path)
            logging.debug('  {}'.format(root))
            if not os.path.isdir(os.path.dirname(output_path)):
                os.makedirs(os.path.dirname(output_path))
            drigo.build_empty_raster(
                output_path, band_cnt=24, output_dtype=np.float32,
                output_proj=nldas_proj, output_cs=nldas_cs,
                output_extent=nldas_extent, output_fill_flag=True)

            # Iterate through hourly files
            for input_name in sorted(files):
                logging.info('  {}'.format(input_name))
                input_path = os.path.join(root, input_name)
                input_match = input_re.match(input_name)
                if input_match is None:
                    logging.debug(
                        '  Regular expression didn\'t match, skipping')
                    continue
                input_dt = dt.datetime(
                    int(input_match.group('YEAR')),
                    int(input_match.group('MONTH')),
                    int(input_match.group('DAY')))
                time_str = input_match.group('TIME')
                band_num = int(time_str[:2]) + 1
                # if start_dt is not None and input_dt < start_dt:
                #     continue
                # elif end_dt is not None and input_dt > end_dt:
                #     continue
                # elif date_list and input_dt.date().isoformat() not in date_list:
                #     continue
                if time_str not in time_list:
                    logging.debug('    Time not in list, skipping')
                    continue
                logging.debug('    Time: {} {}'.format(
                    input_dt.date(), time_str))
                logging.debug('    Band: {}'.format(band_num))

                # Determine band numbering/naming
                input_band_dict = grib_band_names(input_path)

                # Extract array and save
                input_ds = gdal.Open(input_path)

                # Convert Kelvin to Celsius (old NLDAS files were in K i think)
                if input_var in ['tair', 'tmmx', 'tmmn']:
                    # Temperature should be in C for et_common.refet_hourly_func()
                    if 'Temperature [K]' in input_band_dict.keys():
                        temp_band_units = 'K'
                        output_array = drigo.raster_ds_to_array(
                            input_ds, band=input_band_dict['Temperature [K]'],
                            mask_extent=nldas_extent, return_nodata=False)
                    elif 'Temperature [C]' in input_band_dict.keys():
                        temp_band_units = 'C'
                        output_array = drigo.raster_ds_to_array(
                            input_ds, band=input_band_dict['Temperature [C]'],
                            mask_extent=nldas_extent, return_nodata=False)
                    else:
                        logging.error('Unknown Temperature units, skipping')
                        logging.error('  {}'.format(input_band_dict.keys()))
                        continue

                    # DEADBEEF - Having issue with T appearing to be C but labeled as K
                    # Try to determine temperature units from values
                    temp_mean = float(np.nanmean(output_array))
                    temp_units_dict = {20: 'C', 293: 'K'}
                    temp_array_units = temp_units_dict[
                        min(temp_units_dict, key=lambda x:abs(x - temp_mean))]
                    if temp_array_units == 'K' and temp_band_units == 'K':
                        logging.debug('  Converting temperature from K to C')
                        output_array -= 273.15
                    elif temp_array_units == 'C' and temp_band_units == 'C':
                        pass
                    elif temp_array_units == 'C' and temp_band_units == 'K':
                        logging.debug(
                            ('  Temperature units are K in the GRB band name, ' +
                             'but values appear to be C\n    Mean temperature: {:.2f}\n' +
                             '  Values will NOT be adjusted').format(temp_mean))
                    elif temp_array_units == 'K' and temp_band_units == 'C':
                        logging.debug(
                            ('  Temperature units are C in the GRB band name, ' +
                             'but values appear to be K\n    Mean temperature: {:.2f}\n' +
                             '  Values will be adjusted from K to C').format(temp_mean))
                        output_array -= 273.15

                # Compute wind speed from vectors
                elif input_var == 'vs':
                    wind_u_array = drigo.raster_ds_to_array(
                        input_ds,
                        band=input_band_dict['u-component of wind [m/s]'],
                        mask_extent=nldas_extent, return_nodata=False)
                    wind_v_array = drigo.raster_ds_to_array(
                        input_ds,
                        band=input_band_dict['v-component of wind [m/s]'],
                        mask_extent=nldas_extent, return_nodata=False)
                    output_array = np.sqrt(
                        wind_u_array ** 2 + wind_v_array ** 2)
                # Read all other variables directly
                else:
                    output_array = drigo.raster_ds_to_array(
                        input_ds,
                        band=input_band_dict[nldas_band_dict[input_var]],
                        mask_extent=nldas_extent, return_nodata=False)

                # Save the projected array as 32-bit floats
                drigo.array_to_comp_raster(
                    output_array.astype(np.float32), output_path,
                    band=band_num)
                # drigo.block_to_raster(
                #     ea_array.astype(np.float32), output_path, band=band)
                # drigo.array_to_raster(
                #     output_array.astype(np.float32), output_path,
                #     output_geo=nldas_geo, output_proj=nldas_proj,
                #     stats_flag=stats_flag)

                del output_array
                input_ds = None

            if stats_flag:
                drigo.raster_statistics(output_path)

    logging.debug('\nScript Complete')
예제 #4
0
def main(grb_ws=os.getcwd(),
         ancillary_ws=os.getcwd(),
         output_ws=os.getcwd(),
         keep_list_path=None,
         start_date=None,
         end_date=None,
         times_str='',
         extent_path=None,
         output_extent=None,
         stats_flag=True,
         overwrite_flag=False):
    """Extract hourly NLDAS wind rasters

    Parameters
    ----------
    grb_ws : str
        Folder of NLDAS GRB files.
    ancillary_ws : str
        Folder of ancillary rasters.
    output_ws : str
        Folder of output rasters.
    keep_list_path : str, optional
        Landsat scene keep list file path.
    start_date : str, optional
        ISO format date (YYYY-MM-DD).
    end_date : str, optional
        ISO format date (YYYY-MM-DD).
    times : str, optional
        Comma separated values and/or ranges of UTC hours (i.e. "1, 2, 5-8").
        Parsed with python_common.parse_int_set().
    extent_path : str, optional
        File path defining the output extent.
    output_extent : ?, optional
        List decimal degrees values defining output extent.
    stats_flag : bool, optional
        If True, compute raster statistics (the default is True).
    overwrite_flag : bool, optional
        If True, overwrite existing files (the default is False).

    Returns
    -------
    None

    """
    logging.info('\nExtracting NLDAS wind rasters')

    # input_fmt = 'NLDAS_FORA0125_H.A{:04d}{:02d}{:02d}.{}.002.grb'
    input_re = re.compile('NLDAS_FORA0125_H.A(?P<YEAR>\d{4})(?P<MONTH>\d{2})' +
                          '(?P<DAY>\d{2}).(?P<TIME>\d{4}).002.grb$')

    output_folder = 'wind'
    output_fmt = 'wind_{:04d}{:02d}{:02d}_hourly_nldas.img'
    # output_fmt = 'wind_{:04d}{:02d}{:02d}_{:04d}_nldas.img'

    # If a date is not set, process 2017
    try:
        start_dt = dt.datetime.strptime(start_date, '%Y-%m-%d')
        logging.debug('  Start date: {}'.format(start_dt))
    except:
        start_dt = dt.datetime(2017, 1, 1)
        logging.info('  Start date: {}'.format(start_dt))
    try:
        end_dt = dt.datetime.strptime(end_date, '%Y-%m-%d')
        logging.debug('  End date:   {}'.format(end_dt))
    except:
        end_dt = dt.datetime(2017, 12, 31)
        logging.info('  End date:   {}'.format(end_dt))

    # Only process a specific hours
    if not times_str:
        time_list = range(0, 24, 1)
    else:
        time_list = list(_utils.parse_int_set(times_str))
    time_list = ['{:02d}00'.format(t) for t in time_list]

    # Assume NLDAS is NAD83
    # input_epsg = 'EPSG:4269'

    # Ancillary raster paths
    mask_path = os.path.join(ancillary_ws, 'nldas_mask.img')

    # Build a date list from the Landsat scene keep list file
    date_list = []
    if keep_list_path is not None and os.path.isfile(keep_list_path):
        logging.info('\nReading dates from scene keep list file')
        logging.info('  {}'.format(keep_list_path))
        landsat_re = re.compile(
            '^(?:LT04|LT05|LE07|LC08)_(?:\d{3})(?:\d{3})_' +
            '(?P<year>\d{4})(?P<month>\d{2})(?P<day>\d{2})')
        with open(keep_list_path) as input_f:
            keep_list = input_f.readlines()
        keep_list = [
            image_id.strip() for image_id in keep_list
            if landsat_re.match(image_id.strip())
        ]
        date_list = [
            dt.datetime.strptime(image_id[12:20],
                                 '%Y%m%d').strftime('%Y-%m-%d')
            for image_id in keep_list
        ]
        logging.debug('  {}'.format(', '.join(date_list)))

    # DEADBEE
    # # Build a date list from landsat_ws scene folders or tar.gz files
    # date_list = []
    # if landsat_ws is not None and os.path.isdir(landsat_ws):
    #     logging.info('\nReading dates from Landsat IDs')
    #     logging.info('  {}'.format(landsat_ws))
    #     landsat_re = re.compile(
    #         '^(?:LT04|LT05|LE07|LC08)_(?:\d{3})(?:\d{3})_' +
    #         '(?P<year>\d{4})(?P<month>\d{2})(?P<day>\d{2})')
    #     for root, dirs, files in os.walk(landsat_ws, topdown=True):
    #         # If root matches, don't explore subfolders
    #         try:
    #             landsat_match = landsat_re.match(os.path.basename(root))
    #             date_list.append(dt.datetime.strptime(
    #                 '_'.join(landsat_match.groups()), '%Y_%m_%d').date().isoformat())
    #             dirs[:] = []
    #         except:
    #             pass
    #
    #         for file in files:
    #             try:
    #                 landsat_match = landsat_re.match(file)
    #                 date_list.append(dt.datetime.strptime(
    #                     '_'.join(landsat_match.groups()), '%Y_%m_%d').date().isoformat())
    #             except:
    #                 pass
    #     date_list = sorted(list(set(date_list)))

    # This allows GDAL to throw Python Exceptions
    # gdal.UseExceptions()
    # mem_driver = gdal.GetDriverByName('MEM')

    # Get the NLDAS spatial reference from the mask raster
    nldas_ds = gdal.Open(mask_path)
    nldas_osr = drigo.raster_ds_osr(nldas_ds)
    nldas_proj = drigo.osr_proj(nldas_osr)
    nldas_cs = drigo.raster_ds_cellsize(nldas_ds, x_only=True)
    nldas_extent = drigo.raster_ds_extent(nldas_ds)
    nldas_geo = nldas_extent.geo(nldas_cs)
    nldas_x, nldas_y = nldas_extent.origin()
    nldas_ds = None
    logging.debug('  Projection: {}'.format(nldas_proj))
    logging.debug('  Cellsize: {}'.format(nldas_cs))
    logging.debug('  Geo: {}'.format(nldas_geo))
    logging.debug('  Extent: {}'.format(nldas_extent))

    # Subset data to a smaller extent
    if output_extent is not None:
        logging.info('\nComputing subset extent & geo')
        logging.debug('  Extent: {}'.format(output_extent))
        nldas_extent = drigo.Extent(output_extent)
        nldas_extent.adjust_to_snap('EXPAND', nldas_x, nldas_y, nldas_cs)
        nldas_geo = nldas_extent.geo(nldas_cs)
        logging.debug('  Geo: {}'.format(nldas_geo))
        logging.debug('  Extent: {}'.format(output_extent))
    elif extent_path is not None:
        logging.info('\nComputing subset extent & geo')
        if extent_path.lower().endswith('.shp'):
            nldas_extent = drigo.feature_path_extent(extent_path)
            extent_osr = drigo.feature_path_osr(extent_path)
            extent_cs = None
        else:
            nldas_extent = drigo.raster_path_extent(extent_path)
            extent_osr = drigo.raster_path_osr(extent_path)
            extent_cs = drigo.raster_path_cellsize(extent_path, x_only=True)
        nldas_extent = drigo.project_extent(nldas_extent, extent_osr,
                                            nldas_osr, extent_cs)
        nldas_extent.adjust_to_snap('EXPAND', nldas_x, nldas_y, nldas_cs)
        nldas_geo = nldas_extent.geo(nldas_cs)
        logging.debug('  Geo: {}'.format(nldas_geo))
        logging.debug('  Extent: {}'.format(nldas_extent))
    logging.debug('')

    # Read the NLDAS mask array if present
    if mask_path and os.path.isfile(mask_path):
        mask_array, mask_nodata = drigo.raster_to_array(
            mask_path,
            mask_extent=nldas_extent,
            fill_value=0,
            return_nodata=True)
        mask_array = mask_array != mask_nodata
    else:
        mask_array = None

    # Build output folder
    var_ws = os.path.join(output_ws, output_folder)
    if not os.path.isdir(var_ws):
        os.makedirs(var_ws)

    # Each sub folder in the main folde has all imagery for 1 day
    # The path for each subfolder is the /YYYY/DOY

    # This approach will process files for target dates
    # for input_dt in date_range(start_dt, end_dt + dt.timedelta(1)):
    #     logging.info(input_dt.date())

    # Iterate all available files and check dates if necessary
    logging.info('\nReading NLDAS GRIBs')
    for root, folders, files in os.walk(grb_ws, topdown=True):
        root_split = os.path.normpath(root).split(os.sep)

        # If the year/doy is outside the range, skip
        if (re.match('\d{4}', root_split[-2])
                and re.match('\d{3}', root_split[-1])):
            root_dt = dt.datetime.strptime(
                '{}_{}'.format(root_split[-2], root_split[-1]), '%Y_%j')
            logging.info('{}-{:02d}-{:02d}'.format(root_dt.year, root_dt.month,
                                                   root_dt.day))
            if ((start_dt is not None and root_dt < start_dt)
                    or (end_dt is not None and root_dt > end_dt)):
                continue
            elif date_list and root_dt.date().isoformat() not in date_list:
                continue
        # If the year is outside the range, don't search subfolders
        elif re.match('\d{4}', root_split[-1]):
            root_year = int(root_split[-1])
            logging.info('Year: {}'.format(root_year))
            if ((start_dt is not None and root_year < start_dt.year)
                    or (end_dt is not None and root_year > end_dt.year)):
                folders[:] = []
            else:
                folders[:] = sorted(folders)
            continue
        else:
            continue

        # Create a single raster for each day with 24 bands
        # Each time step will be stored in a separate band
        output_name = output_fmt.format(root_dt.year, root_dt.month,
                                        root_dt.day)
        output_path = os.path.join(var_ws, str(root_dt.year), output_name)
        logging.debug('  {}'.format(output_path))
        if os.path.isfile(output_path):
            if not overwrite_flag:
                logging.debug('    File already exists, skipping')
                continue
            else:
                logging.debug('    File already exists, removing existing')
                os.remove(output_path)
        logging.debug('  {}'.format(root))
        if not os.path.isdir(os.path.dirname(output_path)):
            os.makedirs(os.path.dirname(output_path))
        drigo.build_empty_raster(output_path,
                                 band_cnt=24,
                                 output_dtype=np.float32,
                                 output_proj=nldas_proj,
                                 output_cs=nldas_cs,
                                 output_extent=nldas_extent,
                                 output_fill_flag=True)

        # Iterate through hourly files
        for input_name in sorted(files):
            logging.info('  {}'.format(input_name))
            input_path = os.path.join(root, input_name)
            input_match = input_re.match(input_name)
            if input_match is None:
                logging.debug('  Regular expression didn\'t match, skipping')
                continue
            input_dt = dt.datetime(int(input_match.group('YEAR')),
                                   int(input_match.group('MONTH')),
                                   int(input_match.group('DAY')))
            time_str = input_match.group('TIME')
            band_num = int(time_str[:2]) + 1
            # if start_dt is not None and input_dt < start_dt:
            #     continue
            # elif end_dt is not None and input_dt > end_dt:
            #     continue
            # elif date_list and input_dt.date().isoformat() not in date_list:
            #     continue
            if time_str not in time_list:
                logging.debug('    Time not in list, skipping')
                continue
            logging.debug('    Time: {} {}'.format(input_dt.date(), time_str))
            logging.debug('    Band: {}'.format(band_num))

            # Determine band numbering/naming
            input_band_dict = grib_band_names(input_path)

            # Compute magnitude of wind from components
            input_ds = gdal.Open(input_path)
            wind_u_array = drigo.raster_ds_to_array(
                input_ds,
                band=input_band_dict['u-component of wind [m/s]'],
                mask_extent=nldas_extent,
                return_nodata=False)
            wind_v_array = drigo.raster_ds_to_array(
                input_ds,
                band=input_band_dict['v-component of wind [m/s]'],
                mask_extent=nldas_extent,
                return_nodata=False)
            wind_array = np.sqrt(wind_u_array**2 + wind_v_array**2)

            # Save the projected array as 32-bit floats
            drigo.array_to_comp_raster(wind_array.astype(np.float32),
                                       output_path,
                                       band=band_num)
            # drigo.block_to_raster(
            #     ea_array.astype(np.float32), output_path, band=band_num)
            # drigo.array_to_raster(
            #     wind_array.astype(np.float32), output_path,
            #     output_geo=nldas_geo, output_proj=nldas_proj,
            #     stats_flag=stats_flag)

            del wind_array, wind_u_array, wind_v_array
            input_ds = None

        if stats_flag:
            drigo.raster_statistics(output_path)

    logging.debug('\nScript Complete')