Example #1
0
def main(grb_ws=os.getcwd(), ancillary_ws=os.getcwd(), output_ws=os.getcwd(),
         variables=['pr'], landsat_ws=None,
         start_date=None, end_date=None, times_str='',
         extent_path=None, output_extent=None,
         stats_flag=True, overwrite_flag=False):
    """Extract NLDAS target variable(s)

    Args:
        grb_ws (str): folder of NLDAS GRB files
        ancillary_ws (str): folder of ancillary rasters
        output_ws (str): folder of output rasters
        variable (list): NLDAS variables to download
          ('ppt', 'srad', 'sph', 'tair', tmmn', 'tmmx', 'vs')
        landsat_ws (str): folder of Landsat scenes or tar.gz files
        start_date (str): ISO format date (YYYY-MM-DD)
        end_date (str): ISO format date (YYYY-MM-DD)
        times (str): comma separated values and/or ranges of UTC hours
            (i.e. "1, 2, 5-8")
            Parsed with python_common.parse_int_set()
        extent_path (str): file path defining the output extent
        output_extent (list): decimal degrees values defining output extent
        stats_flag (bool): if True, compute raster statistics.
            Default is True.
        overwrite_flag (bool): if True, overwrite existing files

    Returns:
        None
    """
    logging.info('\nExtract NLDAS target variable(s)')

    # input_fmt = 'NLDAS_FORA0125_H.A{:04d}{:02d}{:02d}.{}.002.grb'
    input_re = re.compile(
        'NLDAS_FORA0125_H.A(?P<YEAR>\d{4})(?P<MONTH>\d{2})' +
        '(?P<DAY>\d{2}).(?P<TIME>\d{4}).002.grb$')

    output_fmt = '{}_{:04d}{:02d}{:02d}_hourly_nldas.img'
    # output_fmt = '{}_{:04d}{:02d}{:02d}_{:04d}_nldas.img'

    # If a date is not set, process 2017
    try:
        start_dt = dt.datetime.strptime(start_date, '%Y-%m-%d')
        logging.debug('  Start date: {}'.format(start_dt))
    except:
        start_dt = dt.datetime(2017, 1, 1)
        logging.info('  Start date: {}'.format(start_dt))
    try:
        end_dt = dt.datetime.strptime(end_date, '%Y-%m-%d')
        logging.debug('  End date:   {}'.format(end_dt))
    except:
        end_dt = dt.datetime(2017, 12, 31)
        logging.info('  End date:   {}'.format(end_dt))

    # Only process a specific hours
    if not times_str:
        time_list = range(0, 24, 1)
    else:
        time_list = list(parse_int_set(times_str))
    time_list = ['{:02d}00'.format(t) for t in time_list]

    # Assume NLDAS is NAD83
    # input_epsg = 'EPSG:4269'

    # NLDAS rasters to extract
    data_full_list = ['pr', 'srad', 'sph', 'tair', 'tmmn', 'tmmx', 'vs']
    if not variables:
        logging.error('\nERROR: variables parameter is empty\n')
        sys.exit()
    elif type(variables) is not list:
        # DEADBEEF - I could try converting comma separated strings to lists?
        logging.warning('\nERROR: variables parameter must be a list\n')
        sys.exit()
    elif not set(variables).issubset(set(data_full_list)):
        logging.error('\nERROR: variables parameter is invalid\n  {}'.format(
            variables))
        sys.exit()

    # Ancillary raster paths
    mask_path = os.path.join(ancillary_ws, 'nldas_mask.img')

    # Build a date list from landsat_ws scene folders or tar.gz files
    date_list = []
    if landsat_ws is not None and os.path.isdir(landsat_ws):
        logging.info('\nReading dates from Landsat IDs')
        logging.info('  {}'.format(landsat_ws))
        landsat_re = re.compile(
            '^(?:LT04|LT05|LE07|LC08)_(?:\d{3})(?:\d{3})_' +
            '(?P<year>\d{4})(?P<month>\d{2})(?P<day>\d{2})')
        for root, dirs, files in os.walk(landsat_ws, topdown=True):
            # If root matches, don't explore subfolders
            try:
                landsat_match = landsat_re.match(os.path.basename(root))
                date_list.append(dt.datetime.strptime(
                    '_'.join(landsat_match.groups()), '%Y_%m_%d').date().isoformat())
                dirs[:] = []
            except:
                pass

            for file in files:
                try:
                    landsat_match = landsat_re.match(file)
                    date_list.append(dt.datetime.strptime(
                        '_'.join(landsat_match.groups()), '%Y_%m_%d').date().isoformat())
                except:
                    pass
        date_list = sorted(list(set(date_list)))
    # elif landsat_ws is not None and os.path.isfile(landsat_ws):
    #     with open(landsat_ws) as landsat_f:

    # This allows GDAL to throw Python Exceptions
    # gdal.UseExceptions()
    # mem_driver = gdal.GetDriverByName('MEM')

    # Get the NLDAS spatial reference from the mask raster
    nldas_ds = gdal.Open(mask_path)
    nldas_osr = gdc.raster_ds_osr(nldas_ds)
    nldas_proj = gdc.osr_proj(nldas_osr)
    nldas_cs = gdc.raster_ds_cellsize(nldas_ds, x_only=True)
    nldas_extent = gdc.raster_ds_extent(nldas_ds)
    nldas_geo = nldas_extent.geo(nldas_cs)
    nldas_x, nldas_y = nldas_extent.origin()
    nldas_ds = None
    logging.debug('  Projection: {}'.format(nldas_proj))
    logging.debug('  Cellsize: {}'.format(nldas_cs))
    logging.debug('  Geo: {}'.format(nldas_geo))
    logging.debug('  Extent: {}'.format(nldas_extent))

    # Subset data to a smaller extent
    if output_extent is not None:
        logging.info('\nComputing subset extent & geo')
        logging.debug('  Extent: {}'.format(output_extent))
        nldas_extent = gdc.Extent(output_extent)
        nldas_extent.adjust_to_snap('EXPAND', nldas_x, nldas_y, nldas_cs)
        nldas_geo = nldas_extent.geo(nldas_cs)
        logging.debug('  Geo: {}'.format(nldas_geo))
        logging.debug('  Extent: {}'.format(output_extent))
    elif extent_path is not None:
        logging.info('\nComputing subset extent & geo')
        if extent_path.lower().endswith('.shp'):
            nldas_extent = gdc.feature_path_extent(extent_path)
            extent_osr = gdc.feature_path_osr(extent_path)
            extent_cs = None
        else:
            nldas_extent = gdc.raster_path_extent(extent_path)
            extent_osr = gdc.raster_path_osr(extent_path)
            extent_cs = gdc.raster_path_cellsize(extent_path, x_only=True)
        nldas_extent = gdc.project_extent(
            nldas_extent, extent_osr, nldas_osr, extent_cs)
        nldas_extent.adjust_to_snap('EXPAND', nldas_x, nldas_y, nldas_cs)
        nldas_geo = nldas_extent.geo(nldas_cs)
        logging.debug('  Geo: {}'.format(nldas_geo))
        logging.debug('  Extent: {}'.format(nldas_extent))
    logging.debug('')

    # Read the NLDAS mask array if present
    if mask_path and os.path.isfile(mask_path):
        mask_array, mask_nodata = gdc.raster_to_array(
            mask_path, mask_extent=nldas_extent, fill_value=0,
            return_nodata=True)
        mask_array = mask_array != mask_nodata
    else:
        mask_array = None

    # NLDAS band name dictionary
    nldas_band_dict = dict()
    nldas_band_dict['pr'] = 'Total precipitation [kg/m^2]'
    nldas_band_dict['srad'] = 'Downward shortwave radiation flux [W/m^2]'
    nldas_band_dict['sph'] = 'Specific humidity [kg/kg]'
    nldas_band_dict['tair'] = 'Temperature [C]'
    nldas_band_dict['tmmn'] = 'Temperature [C]'
    nldas_band_dict['tmmx'] = 'Temperature [C]'
    nldas_band_dict['vs'] = [
        'u-component of wind [m/s]', 'v-component of wind [m/s]']

    # NLDAS band name dictionary
    # nldas_band_dict = dict()
    # nldas_band_dict['pr'] = 'precipitation_amount'
    # nldas_band_dict['srad'] = 'surface_downwelling_shortwave_flux_in_air'
    # nldas_band_dict['sph'] = 'specific_humidity'
    # nldas_band_dict['tmmn'] = 'air_temperature'
    # nldas_band_dict['tmmx'] = 'air_temperature'
    # nldas_band_dict['vs'] = 'wind_speed'

    # NLDAS band name dictionary (EarthEngine keys, GRID_ELEMENT values)
    # nldas_band_dict = dict()
    # nldas_band_dict['total_precipitation'] = 'Total precipitation [kg/m^2]'
    # nldas_band_dict['shortwave_radiation'] = 'Downward shortwave radiation flux [W/m^2]'
    # nldas_band_dict['specific_humidity'] = 'Specific humidity [kg/kg]'
    # nldas_band_dict['pressure'] = 'Pressure [Pa]'
    # nldas_band_dict['temperature'] = 'Temperature [C]'
    # nldas_band_dict['wind_u'] = 'u-component of wind [m/s]'
    # nldas_band_dict['wind_v'] = 'v-component of wind [m/s]'

    # Process each variable
    logging.info('\nReading NLDAS GRIBs')
    for input_var in variables:
        logging.info("Variable: {}".format(input_var))

        # Build output folder
        var_ws = os.path.join(output_ws, input_var)
        if not os.path.isdir(var_ws):
            os.makedirs(var_ws)

        # Each sub folder in the main folde has all imagery for 1 day
        # The path for each subfolder is the /YYYY/DOY

        # This approach will process files for target dates
        # for input_dt in date_range(start_dt, end_dt + dt.timedelta(1)):
        #     logging.info(input_dt.date())

        # Iterate all available files and check dates if necessary
        for root, folders, files in os.walk(grb_ws):
            root_split = os.path.normpath(root).split(os.sep)

            # If the year/doy is outside the range, skip
            if (re.match('\d{4}', root_split[-2]) and
                    re.match('\d{3}', root_split[-1])):
                root_dt = dt.datetime.strptime('{}_{}'.format(
                    root_split[-2], root_split[-1]), '%Y_%j')
                logging.info('{}-{:02d}-{:02d}'.format(
                    root_dt.year, root_dt.month, root_dt.day))
                if ((start_dt is not None and root_dt < start_dt) or
                        (end_dt is not None and root_dt > end_dt)):
                    continue
                elif date_list and root_dt.date().isoformat() not in date_list:
                    continue
            # If the year is outside the range, don't search subfolders
            elif re.match('\d{4}', root_split[-1]):
                root_year = int(root_split[-1])
                logging.info('Year: {}'.format(root_year))
                if ((start_dt is not None and root_year < start_dt.year) or
                        (end_dt is not None and root_year > end_dt.year)):
                    folders[:] = []
                else:
                    folders[:] = sorted(folders)
                continue
            else:
                continue

            # Create a single raster for each day with 24 bands
            # Each time step will be stored in a separate band
            output_name = output_fmt.format(
                input_var, root_dt.year, root_dt.month, root_dt.day)
            output_path = os.path.join(
                var_ws, str(root_dt.year), output_name)
            logging.debug('  {}'.format(output_path))
            if os.path.isfile(output_path):
                if not overwrite_flag:
                    logging.debug('    File already exists, skipping')
                    continue
                else:
                    logging.debug('    File already exists, removing existing')
                    os.remove(output_path)
            logging.debug('  {}'.format(root))
            if not os.path.isdir(os.path.dirname(output_path)):
                os.makedirs(os.path.dirname(output_path))
            gdc.build_empty_raster(
                output_path, band_cnt=24, output_dtype=np.float32,
                output_proj=nldas_proj, output_cs=nldas_cs,
                output_extent=nldas_extent, output_fill_flag=True)

            # Iterate through hourly files
            for input_name in sorted(files):
                logging.info('  {}'.format(input_name))
                input_path = os.path.join(root, input_name)
                input_match = input_re.match(input_name)
                if input_match is None:
                    logging.debug(
                        '  Regular expression didn\'t match, skipping')
                    continue
                input_dt = dt.datetime(
                    int(input_match.group('YEAR')),
                    int(input_match.group('MONTH')),
                    int(input_match.group('DAY')))
                time_str = input_match.group('TIME')
                band_num = int(time_str[:2]) + 1
                # if start_dt is not None and input_dt < start_dt:
                #     continue
                # elif end_dt is not None and input_dt > end_dt:
                #     continue
                # elif date_list and input_dt.date().isoformat() not in date_list:
                #     continue
                if time_str not in time_list:
                    logging.debug('    Time not in list, skipping')
                    continue
                logging.debug('    Time: {} {}'.format(
                    input_dt.date(), time_str))
                logging.debug('    Band: {}'.format(band_num))

                # Determine band numbering/naming
                input_band_dict = grib_band_names(input_path)

                # Extract array and save
                input_ds = gdal.Open(input_path)

                # Convert Kelvin to Celsius (old NLDAS files were in K i think)
                if input_var in ['tair', 'tmmx', 'tmmn']:
                    # Temperature should be in C for et_common.refet_hourly_func()
                    if 'Temperature [K]' in input_band_dict.keys():
                        temp_band_units = 'K'
                        output_array = gdc.raster_ds_to_array(
                            input_ds, band=input_band_dict['Temperature [K]'],
                            mask_extent=nldas_extent, return_nodata=False)
                    elif 'Temperature [C]' in input_band_dict.keys():
                        temp_band_units = 'C'
                        output_array = gdc.raster_ds_to_array(
                            input_ds, band=input_band_dict['Temperature [C]'],
                            mask_extent=nldas_extent, return_nodata=False)
                    else:
                        logging.error('Unknown Temperature units, skipping')
                        logging.error('  {}'.format(input_band_dict.keys()))
                        continue

                    # DEADBEEF - Having issue with T appearing to be C but labeled as K
                    # Try to determine temperature units from values
                    temp_mean = float(np.nanmean(output_array))
                    temp_units_dict = {20: 'C', 293: 'K'}
                    temp_array_units = temp_units_dict[
                        min(temp_units_dict, key=lambda x:abs(x - temp_mean))]
                    if temp_array_units == 'K' and temp_band_units == 'K':
                        logging.debug('  Converting temperature from K to C')
                        output_array -= 273.15
                    elif temp_array_units == 'C' and temp_band_units == 'C':
                        pass
                    elif temp_array_units == 'C' and temp_band_units == 'K':
                        logging.debug(
                            ('  Temperature units are K in the GRB band name, ' +
                             'but values appear to be C\n    Mean temperature: {:.2f}\n' +
                             '  Values will NOT be adjusted').format(temp_mean))
                    elif temp_array_units == 'K' and temp_band_units == 'C':
                        logging.debug(
                            ('  Temperature units are C in the GRB band name, ' +
                             'but values appear to be K\n    Mean temperature: {:.2f}\n' +
                             '  Values will be adjusted from K to C').format(temp_mean))
                        output_array -= 273.15

                # Compute wind speed from vectors
                elif input_var == 'vs':
                    wind_u_array = gdc.raster_ds_to_array(
                        input_ds,
                        band=input_band_dict['u-component of wind [m/s]'],
                        mask_extent=nldas_extent, return_nodata=False)
                    wind_v_array = gdc.raster_ds_to_array(
                        input_ds,
                        band=input_band_dict['v-component of wind [m/s]'],
                        mask_extent=nldas_extent, return_nodata=False)
                    output_array = np.sqrt(
                        wind_u_array ** 2 + wind_v_array ** 2)
                # Read all other variables directly
                else:
                    output_array = gdc.raster_ds_to_array(
                        input_ds,
                        band=input_band_dict[nldas_band_dict[input_var]],
                        mask_extent=nldas_extent, return_nodata=False)

                # Save the projected array as 32-bit floats
                gdc.array_to_comp_raster(
                    output_array.astype(np.float32), output_path,
                    band=band_num)
                # gdc.block_to_raster(
                #     ea_array.astype(np.float32), output_path, band=band)
                # gdc.array_to_raster(
                #     output_array.astype(np.float32), output_path,
                #     output_geo=nldas_geo, output_proj=nldas_proj,
                #     stats_flag=stats_flag)

                del output_array
                input_ds = None

            if stats_flag:
                gdc.raster_statistics(output_path)

    logging.debug('\nScript Complete')
Example #2
0
def main(ini_path, tile_list=None, overwrite_flag=False, mp_procs=1):
    """Prep Landsat path/row specific data

    Parameters
    ----------
    ini_path : str
        File path of the input parameters file.
    tile_list : list, optional
        Landsat path/rows to process (i.e. [p045r043, p045r033]).
        This will override the tile list in the INI file.
    overwrite_flag : bool, optional
        If True, overwrite existing files (the default is False).
    mp_procs : int, optional
        Number of cores to use (the default is 1).

    Returns
    -------
    None

    """
    logging.info('\nPrepare path/row data')

    # Open config file
    config = python_common.open_ini(ini_path)

    # Get input parameters
    logging.debug('  Reading Input File')
    year = config.getint('INPUTS', 'year')
    if tile_list is None:
        tile_list = python_common.read_param('tile_list', [], config, 'INPUTS')
    project_ws = config.get('INPUTS', 'project_folder')
    logging.debug('  Year: {}'.format(year))
    logging.debug('  Path/rows: {}'.format(', '.join(tile_list)))
    logging.debug('  Project: {}'.format(project_ws))

    # study_area_path = config.get('INPUTS', 'study_area_path')
    footprint_path = config.get('INPUTS', 'footprint_path')
    # For now, assume the UTM zone file is colocated with the footprints shapefile
    utm_path = python_common.read_param(
        'utm_path',
        os.path.join(os.path.dirname(footprint_path),
                     'wrs2_tile_utm_zones.json'), config, 'INPUTS')
    skip_list_path = python_common.read_param('skip_list_path', '', config,
                                              'INPUTS')

    landsat_flag = python_common.read_param('landsat_flag', True, config,
                                            'INPUTS')
    ledaps_flag = False
    dem_flag = python_common.read_param('dem_flag', True, config, 'INPUTS')
    nlcd_flag = python_common.read_param('nlcd_flag', True, config, 'INPUTS')
    cdl_flag = python_common.read_param('cdl_flag', False, config, 'INPUTS')
    landfire_flag = python_common.read_param('landfire_flag', False, config,
                                             'INPUTS')
    field_flag = python_common.read_param('field_flag', False, config,
                                          'INPUTS')

    tile_gcs_buffer = python_common.read_param('tile_buffer', 0.25, config)

    # Input/output folder and file paths
    if landsat_flag:
        landsat_input_ws = config.get('INPUTS', 'landsat_input_folder')
    else:
        landsat_input_ws = None
    # if ledaps_flag:
    #     ledaps_input_ws = config.get('INPUTS', 'ledaps_input_folder')
    # else:
    #     ledaps_input_ws = None

    if dem_flag:
        dem_input_ws = config.get('INPUTS', 'dem_input_folder')
        dem_tile_fmt = config.get('INPUTS', 'dem_tile_fmt')
        dem_output_ws = config.get('INPUTS', 'dem_output_folder')
        dem_output_name = python_common.read_param('dem_output_name',
                                                   'dem.img', config)
        # dem_output_name = config.get('INPUTS', 'dem_output_name')
    else:
        dem_input_ws, dem_tile_fmt = None, None
        dem_output_ws, dem_output_name = None, None

    if nlcd_flag:
        nlcd_input_path = config.get('INPUTS', 'nlcd_input_path')
        nlcd_output_ws = config.get('INPUTS', 'nlcd_output_folder')
        nlcd_output_fmt = python_common.read_param('nlcd_output_fmt',
                                                   'nlcd_{:04d}.img', config)
    else:
        nlcd_input_path, nlcd_output_ws, nlcd_output_fmt = None, None, None

    if cdl_flag:
        cdl_input_path = config.get('INPUTS', 'cdl_input_path')
        cdl_ag_list = config.get('INPUTS', 'cdl_ag_list')
        cdl_ag_list = list(python_common.parse_int_set(cdl_ag_list))
        # default_cdl_ag_list = range(1,62) + range(66,78) + range(204,255)
        # cdl_ag_list = python_common.read_param(
        #    'cdl_ag_list', default_cdl_ag_list, config)
        # cdl_ag_list = list(map(int, cdl_ag_list))
        # cdl_non_ag_list = python_common.read_param(
        #    'cdl_non_ag_list', [], config)
        cdl_output_ws = config.get('INPUTS', 'cdl_output_folder')
        cdl_output_fmt = python_common.read_param('cdl_output_fmt',
                                                  'cdl_{:04d}.img', config)
        cdl_ag_output_fmt = python_common.read_param('cdl_ag_output_fmt',
                                                     'cdl_ag_{:04d}.img',
                                                     config)
    else:
        cdl_input_path, cdl_ag_list = None, None
        cdl_output_ws, cdl_output_fmt, cdl_ag_output_fmt = None, None, None

    if landfire_flag:
        landfire_input_path = config.get('INPUTS', 'landfire_input_path')
        landfire_ag_list = config.get('INPUTS', 'landfire_ag_list')
        landfire_ag_list = list(python_common.parse_int_set(landfire_ag_list))
        # default_landfire_ag_list = range(3960,4000)
        # landfire_ag_list = python_common.read_param(
        #    'landfire_ag_list', default_landfire_ag_list, config)
        # landfire_ag_list = list(map(int, landfire_ag_list))
        landfire_output_ws = config.get('INPUTS', 'landfire_output_folder')
        landfire_output_fmt = python_common.read_param('landfire_output_fmt',
                                                       'landfire_{:04d}.img',
                                                       config)
        landfire_ag_output_fmt = python_common.read_param(
            'landfire_ag_output_fmt', 'landfire_ag_{:04d}.img', config)
    else:
        landfire_input_path, landfire_ag_list = None, None
        landfire_output_ws = None
        landfire_output_fmt, landfire_ag_output_fmt = None, None

    if field_flag:
        field_input_path = config.get('INPUTS', 'field_input_path')
        field_output_ws = config.get('INPUTS', 'field_output_folder')
        field_output_fmt = python_common.read_param('field_output_fmt',
                                                    'fields_{:04d}.img',
                                                    config)
    else:
        field_input_path = None
        field_output_ws, field_output_fmt = None, None

    # File/folder names
    orig_data_folder_name = 'ORIGINAL_DATA'

    # Check inputs folders/paths
    logging.info('\nChecking input folders/files')
    file_check(footprint_path)
    file_check(utm_path)
    if landsat_flag:
        folder_check(landsat_input_ws)
    # if ledaps_flag:
    #     folder_check(ledaps_input_ws)
    if dem_flag:
        folder_check(dem_input_ws)
    if nlcd_flag:
        file_check(nlcd_input_path)
    if cdl_flag:
        file_check(cdl_input_path)
    if landfire_flag:
        # Landfire will likely be an ESRI grid (set as a folder)
        if not (os.path.isdir(landfire_input_path)
                or os.path.isfile(landfire_input_path)):
            logging.error('\n  {} does not exist'.format(landfire_input_path))
    if field_flag:
        file_check(field_input_path)
    if skip_list_path:
        file_check(skip_list_path)

    # Build output folders
    if not os.path.isdir(project_ws):
        os.makedirs(project_ws)
    if dem_flag and not os.path.isdir(dem_output_ws):
        os.makedirs(dem_output_ws)
    if nlcd_flag and not os.path.isdir(nlcd_output_ws):
        os.makedirs(nlcd_output_ws)
    if cdl_flag and not os.path.isdir(cdl_output_ws):
        os.makedirs(cdl_output_ws)
    if landfire_flag and not os.path.isdir(landfire_output_ws):
        os.makedirs(landfire_output_ws)
    if field_flag and not os.path.isdir(field_output_ws):
        os.makedirs(field_output_ws)

    # For now assume path/row are two digit numbers
    tile_fmt = 'p{:03d}r{:03d}'
    tile_re = re.compile('p(\d{3})r(\d{3})')
    image_re = re.compile(
        '^(LT04|LT05|LE07|LC08)_(\d{3})(\d{3})_(\d{4})(\d{2})(\d{2})')
    snap_cs = 30
    snap_xmin, snap_ymin = (15, 15)

    # Set snap environment parameters
    env = drigo.env
    env.cellsize = snap_cs
    env.snap_xmin, env.snap_ymin = snap_xmin, snap_ymin

    # Use WGSS84 (EPSG 4326) for GCS spatial reference
    # Could also use NAD83 (EPSG 4269)
    # gcs_epsg = 4326
    # gcs_osr = epsg_osr(4326)
    # gcs_proj = osr_proj(gcs_osr)

    # Landsat Footprints (WRS2 Descending Polygons)
    logging.debug('\nFootprint (WRS2 descending should be GCS84):')
    tile_gcs_osr = drigo.feature_path_osr(footprint_path)
    logging.debug('  OSR: {}'.format(tile_gcs_osr))

    # Doublecheck that WRS2 descending shapefile is GCS84
    # if tile_gcs_osr != epsg_osr(4326):
    #     logging.error('  WRS2 is not GCS84')
    #     sys.exit()

    # Get geometry for each path/row
    tile_gcs_wkt_dict = path_row_wkt_func(footprint_path,
                                          path_field='PATH',
                                          row_field='ROW')

    # Get UTM zone for each path/row
    # DEADBEEF - Using "eval" is considered unsafe and should be changed
    tile_utm_zone_dict = eval(open(utm_path, 'r').read())

    # Project study area geometry to GCS coordinates
    # logging.debug('\nStudy area')
    # study_area_geom = feature_path_geom_union(study_area_path)
    # study_area_gcs_geom = study_area_geom.Clone()
    # study_area_gcs_geom.TransformTo(tile_gcs_osr)

    # Get list of all intersecting Landsat path/rows
    # logging.info('\nLandsat path/rows')
    # tile_list = []
    # for tile_name, tile_gcs_wkt in tile_gcs_wkt_dict.items():
    #     tile_gcs_geom = ogr.CreateGeometryFromWkt(tile_gcs_wkt)
    #     if tile_gcs_geom.Intersects(study_area_gcs_geom):
    #         tile_list.append(tile_name)
    # for tile_name in sorted(tile_list):
    #     logging.debug('  {}'.format(tile_name))

    # Check that each path/row extent and UTM zone exist
    logging.info('\nChecking path/row list against footprint shapefile')
    for tile_name in sorted(tile_list):
        if tile_name not in tile_gcs_wkt_dict.keys():
            logging.error(
                '  {} feature not in footprint shapefile'.format(tile_name))
            continue
        elif tile_name not in tile_utm_zone_dict.keys():
            logging.error(
                '  {} UTM zone not in footprint shapefile'.format(tile_name))
            continue
        elif tile_utm_zone_dict[tile_name] == 0:
            logging.error(('  UTM zone is not set for {} in ' +
                           'footprint shapefile').format(tile_name))
            continue

    # Build output folders for each path/row
    logging.info('\nBuilding path/row folders')
    for tile_name in tile_list:
        logging.debug('  {} {}'.format(year, tile_name))
        tile_output_ws = os.path.join(project_ws, str(year), tile_name)
        if ((landsat_flag or ledaps_flag)
                and not os.path.isdir(tile_output_ws)):
            os.makedirs(tile_output_ws)
        if (dem_flag
                and not os.path.isdir(os.path.join(dem_output_ws, tile_name))):
            os.makedirs(os.path.join(dem_output_ws, tile_name))
        if (nlcd_flag and
                not os.path.isdir(os.path.join(nlcd_output_ws, tile_name))):
            os.makedirs(os.path.join(nlcd_output_ws, tile_name))
        if (cdl_flag
                and not os.path.isdir(os.path.join(cdl_output_ws, tile_name))):
            os.makedirs(os.path.join(cdl_output_ws, tile_name))
        if (landfire_flag and not os.path.isdir(
                os.path.join(landfire_output_ws, tile_name))):
            os.makedirs(os.path.join(landfire_output_ws, tile_name))
        if (field_flag and
                not os.path.isdir(os.path.join(field_output_ws, tile_name))):
            os.makedirs(os.path.join(field_output_ws, tile_name))

    # Read skip list
    if (landsat_flag or ledaps_flag) and skip_list_path:
        logging.debug('\nReading scene skiplist')
        with open(skip_list_path) as skip_list_f:
            skip_list = skip_list_f.readlines()
            skip_list = [
                scene.strip() for scene in skip_list
                if image_re.match(scene.strip())
            ]
    else:
        logging.debug('\nSkip list not set in INI')
        skip_list = []

    # Copy and unzip raw Landsat scenes
    # Use these for thermal band, MTL file (scene time), and to run FMask
    if landsat_flag:
        logging.info('\nExtract raw Landsat scenes')
        # Process each path/row
        extract_targz_list = []
        for tile_name in tile_list:
            tile_output_ws = os.path.join(project_ws, str(year), tile_name)

            # path/row as strings with leading zeros
            path, row = map(str, tile_re.match(tile_name).groups())
            tile_input_ws = os.path.join(landsat_input_ws, path, row,
                                         str(year))
            if not os.path.isdir(tile_input_ws):
                continue
            logging.info('  {} {}'.format(year, tile_name))

            # Process each tar.gz file
            for input_name in sorted(os.listdir(tile_input_ws)):
                if (not image_re.match(input_name)
                        and not input_name.endswith('.tar.gz')):
                    continue

                # Get Landsat scene ID from tar.gz file name
                # DEADBEEF - For now this is the EE scene ID, but it could be
                #   changed to the full collection 1 ID
                scene_id = input_name.split('.')[0]

                # Output workspace
                image_output_ws = os.path.join(tile_output_ws, scene_id)
                orig_data_ws = os.path.join(image_output_ws,
                                            orig_data_folder_name)

                if skip_list and scene_id in skip_list:
                    logging.debug('    {} - Skipping scene'.format(scene_id))
                    # DEADBEEF - Should the script always remove the scene
                    #   if it is in the skip list?
                    # Maybe only if overwrite is set?
                    if os.path.isdir(image_output_ws):
                        # input('Press ENTER to delete {}'.format(scene_id))
                        shutil.rmtree(image_output_ws)
                    continue

                # If orig_data_ws doesn't exist, don't check images
                if not os.path.isdir(orig_data_ws):
                    os.makedirs(orig_data_ws)
                elif (not overwrite_flag
                      and landsat_files_check(image_output_ws)):
                    continue

                # Extract Landsat tar.gz file
                input_path = os.path.join(tile_input_ws, input_name)
                print(orig_data_ws)
                # sys.exit()
                if mp_procs > 1:
                    extract_targz_list.append([input_path, orig_data_ws])
                else:
                    python_common.extract_targz_func(input_path, orig_data_ws)

                # # Use a command line call
                # input_path = os.path.join(tile_input_ws, input_name)
                # if job_i % pbs_jobs != 0:
                #     job_list.append('tar -zxvf {} -C {} &\n'.format(
                #         input_path, orig_data_ws))
                # else:
                #     job_list.append('tar -zxvf {} -C {}\n'.format(
                #         input_path, orig_data_ws))
                #     # job_list.append('tar -zxvf {} -C {} &\n'.format(
                #     #     input_path, orig_data_ws))
                #     # job_list.append('wait\n')
                # job_i += 1

        # Extract Landsat tar.gz files using multiprocessing
        if extract_targz_list:
            pool = mp.Pool(mp_procs)
            results = pool.map(python_common.extract_targz_mp,
                               extract_targz_list,
                               chunksize=1)
            pool.close()
            pool.join()
            del results, pool

    # Get projected extent for each path/row
    # This should probably be in a function
    if (dem_flag or nlcd_flag or cdl_flag or landfire_flag or field_flag):
        tile_utm_extent_dict = gcs_to_utm_dict(tile_list, tile_utm_zone_dict,
                                               tile_gcs_osr, tile_gcs_wkt_dict,
                                               tile_gcs_buffer, snap_xmin,
                                               snap_ymin, snap_cs)

    # Mosaic DEM tiles for each path/row
    if dem_flag:
        logging.info('\nBuild DEM for each path/row')
        mosaic_mp_list = []
        for tile_name in tile_list:
            # Output folder and path
            tile_output_path = os.path.join(dem_output_ws, tile_name,
                                            dem_output_name)
            if not overwrite_flag and os.path.isfile(tile_output_path):
                logging.debug('    {} already exists, skipping'.format(
                    os.path.basename(tile_output_path)))
                continue
            logging.info('  {}'.format(tile_name))

            # Get the path/row geometry in GCS for selecting intersecting tiles
            tile_gcs_geom = ogr.CreateGeometryFromWkt(
                tile_gcs_wkt_dict[tile_name])
            # Apply a small buffer (in degrees) to the extent
            # DEADBEEF - Buffer fails if GDAL is not built with GEOS support
            # tile_gcs_geom = tile_gcs_geom.Buffer(tile_gcs_buffer)
            tile_gcs_extent = drigo.Extent(tile_gcs_geom.GetEnvelope())
            tile_gcs_extent = tile_gcs_extent.ogrenv_swap()
            tile_gcs_extent.buffer_extent(tile_gcs_buffer)
            # tile_gcs_extent.ymin, tile_gcs_extent.xmax = tile_gcs_extent.xmax, tile_gcs_extent.ymin

            # Offsets are needed since tile name is upper left corner of tile
            # Tile n36w120 spans -120 <-> -119 and 35 <-> 36
            lon_list = range(
                int(tile_gcs_extent.xmin) - 1, int(tile_gcs_extent.xmax))
            lat_list = range(
                int(tile_gcs_extent.ymin) + 1,
                int(tile_gcs_extent.ymax) + 2)

            # Get list of DEM tile rasters
            dem_tile_list = []
            for lat, lon in itertools.product(lat_list, lon_list):
                # Convert sign of lat/lon to letter
                lat = ('n' + '{:02d}'.format(abs(lat)) if lat >= 0 else 's' +
                       '{:02d}'.format(abs(lat)))
                lon = ('w' + '{:03d}'.format(abs(lon)) if lon < 0 else 'e' +
                       '{:03d}'.format(abs(lon)))
                dem_tile_path = os.path.join(dem_input_ws,
                                             dem_tile_fmt.format(lat, lon))
                if os.path.isfile(dem_tile_path):
                    dem_tile_list.append(dem_tile_path)
            if not dem_tile_list:
                logging.warning('    WARNING: No DEM tiles were selected')
                continue

            # Mosaic tiles using mosaic function
            tile_utm_osr = drigo.epsg_osr(32600 +
                                          int(tile_utm_zone_dict[tile_name]))
            tile_utm_proj = drigo.epsg_proj(32600 +
                                            int(tile_utm_zone_dict[tile_name]))
            tile_utm_extent = tile_utm_extent_dict[tile_name]
            tile_utm_ullr = tile_utm_extent.ul_lr_swap()

            # Mosaic, clip, project using custom function
            if mp_procs > 1:
                mosaic_mp_list.append([
                    dem_tile_list, tile_output_path, tile_utm_proj, snap_cs,
                    tile_utm_extent
                ])
            else:
                drigo.mosaic_tiles(dem_tile_list, tile_output_path,
                                   tile_utm_osr, snap_cs, tile_utm_extent)

            # Cleanup
            del tile_output_path
            del tile_gcs_geom, tile_gcs_extent, tile_utm_extent
            del tile_utm_osr, tile_utm_proj
            del lon_list, lat_list, dem_tile_list
        # Mosaic DEM rasters using multiprocessing
        if mosaic_mp_list:
            pool = mp.Pool(mp_procs)
            results = pool.map(mosaic_tiles_mp, mosaic_mp_list, chunksize=1)
            pool.close()
            pool.join()
            del results, pool

    # Project/clip NLCD for each path/row
    if nlcd_flag:
        logging.info('\nBuild NLCD for each path/row')
        project_mp_list = []
        for tile_name in tile_list:
            nlcd_output_path = os.path.join(nlcd_output_ws, tile_name,
                                            nlcd_output_fmt.format(year))
            if not overwrite_flag and os.path.isfile(nlcd_output_path):
                logging.debug('    {} already exists, skipping'.format(
                    os.path.basename(nlcd_output_path)))
                continue
            logging.info('  {}'.format(tile_name))

            # Set the nodata value on the NLCD raster if it is not set
            nlcd_ds = gdal.Open(nlcd_input_path, 0)
            nlcd_band = nlcd_ds.GetRasterBand(1)
            nlcd_nodata = nlcd_band.GetNoDataValue()
            nlcd_ds = None
            if nlcd_nodata is None:
                nlcd_nodata = 255

            # Clip and project
            tile_utm_osr = drigo.epsg_osr(32600 +
                                          int(tile_utm_zone_dict[tile_name]))
            tile_utm_proj = drigo.epsg_proj(32600 +
                                            int(tile_utm_zone_dict[tile_name]))
            tile_utm_extent = tile_utm_extent_dict[tile_name]
            tile_utm_ullr = tile_utm_extent.ul_lr_swap()

            if mp_procs > 1:
                project_mp_list.append([
                    nlcd_input_path, nlcd_output_path,
                    gdal.GRA_NearestNeighbour, tile_utm_proj, snap_cs,
                    tile_utm_extent, nlcd_nodata
                ])
            else:
                drigo.project_raster(nlcd_input_path, nlcd_output_path,
                                     gdal.GRA_NearestNeighbour, tile_utm_osr,
                                     snap_cs, tile_utm_extent, nlcd_nodata)

            # Cleanup
            del nlcd_output_path
            del nlcd_ds, nlcd_band, nlcd_nodata
            del tile_utm_osr, tile_utm_proj, tile_utm_extent
        # Project NLCD rasters using multiprocessing
        if project_mp_list:
            pool = mp.Pool(mp_procs)
            results = pool.map(drigo.project_raster_mp,
                               project_mp_list,
                               chunksize=1)
            pool.close()
            pool.join()
            del results, pool

    # Project/clip CDL for each path/row
    if cdl_flag:
        logging.info('\nBuild CDL for each path/row')
        project_mp_list, remap_mp_list = [], []
        for tile_name in tile_list:
            cdl_output_path = os.path.join(cdl_output_ws, tile_name,
                                           cdl_output_fmt.format(year))
            cdl_ag_output_path = os.path.join(cdl_output_ws, tile_name,
                                              cdl_ag_output_fmt.format(year))
            if not os.path.isfile(cdl_input_path):
                logging.error('\n\n  {} does not exist'.format(cdl_input_path))
                sys.exit()
            if not overwrite_flag and os.path.isfile(cdl_output_path):
                logging.debug('    {} already exists, skipping'.format(
                    os.path.basename(cdl_output_path)))
                continue
            logging.info('  {}'.format(tile_name))

            # Set the nodata value on the CDL raster if it is not set
            cdl_ds = gdal.Open(cdl_input_path, 0)
            cdl_band = cdl_ds.GetRasterBand(1)
            cdl_nodata = cdl_band.GetNoDataValue()
            cdl_ds = None
            if cdl_nodata is None:
                cdl_nodata = 255

            # Clip and project
            tile_utm_osr = drigo.epsg_osr(32600 +
                                          int(tile_utm_zone_dict[tile_name]))
            tile_utm_proj = drigo.epsg_proj(32600 +
                                            int(tile_utm_zone_dict[tile_name]))
            tile_utm_extent = tile_utm_extent_dict[tile_name]
            if mp_procs > 1:
                project_mp_list.append([
                    cdl_input_path, cdl_output_path, gdal.GRA_NearestNeighbour,
                    tile_utm_proj, snap_cs, tile_utm_extent, cdl_nodata
                ])
                remap_mp_list.append(
                    [cdl_output_path, cdl_ag_output_path, cdl_ag_list])
            else:
                drigo.project_raster(cdl_input_path, cdl_output_path,
                                     gdal.GRA_NearestNeighbour, tile_utm_osr,
                                     snap_cs, tile_utm_extent, cdl_nodata)
                # Build a mask of CDL ag lands
                remap_mask_func(cdl_output_path, cdl_ag_output_path,
                                cdl_ag_list)

            # Cleanup
            del cdl_output_path
            del cdl_ds, cdl_band, cdl_nodata
            del tile_utm_osr, tile_utm_proj, tile_utm_extent
        # Project CDL rasters using multiprocessing
        if project_mp_list:
            pool = mp.Pool(mp_procs)
            results = pool.map(drigo.project_raster_mp,
                               project_mp_list,
                               chunksize=1)
            pool.close()
            pool.join()
            del results, pool
        if remap_mp_list:
            pool = mp.Pool(mp_procs)
            results = pool.map(remap_mask_mp, remap_mp_list, chunksize=1)
            pool.close()
            pool.join()
            del results, pool

    # Project/clip LANDFIRE for each path/row
    if landfire_flag:
        logging.info('\nBuild LANDFIRE for each path/row')
        project_mp_list, remap_mp_list = [], []
        for tile_name in tile_list:
            landfire_output_path = os.path.join(
                landfire_output_ws, tile_name,
                landfire_output_fmt.format(year))
            landfire_ag_output_path = os.path.join(
                landfire_output_ws, tile_name,
                landfire_ag_output_fmt.format(year))
            if not overwrite_flag and os.path.isfile(landfire_output_path):
                logging.debug('    {} already exists, skipping'.format(
                    os.path.basename(landfire_output_path)))
                continue
            logging.info('  {}'.format(tile_name))

            # Set the nodata value on the LANDFIRE raster if it is not set
            # landfire_ds = gdal.Open(landfire_input_path, 0)
            # landfire_band = landfire_ds.GetRasterBand(1)
            # landfire_nodata = landfire_band.GetNoDataValue()
            # landfire_ds = None
            # if landfire_nodata is None:
            #     landfire_nodata = 32767
            # del landfire_ds, landfire_band
            landfire_nodata = 32767

            # Clip and project
            tile_utm_osr = drigo.epsg_osr(32600 +
                                          int(tile_utm_zone_dict[tile_name]))
            tile_utm_proj = drigo.epsg_proj(32600 +
                                            int(tile_utm_zone_dict[tile_name]))
            tile_utm_extent = tile_utm_extent_dict[tile_name]
            if mp_procs > 1:
                project_mp_list.append([
                    landfire_input_path, landfire_output_path,
                    gdal.GRA_NearestNeighbour, tile_utm_proj, snap_cs,
                    tile_utm_extent, landfire_nodata
                ])
                remap_mp_list.append([
                    landfire_output_path, landfire_ag_output_path,
                    landfire_ag_list
                ])
            else:
                drigo.project_raster(landfire_input_path, landfire_output_path,
                                     gdal.GRA_NearestNeighbour, tile_utm_osr,
                                     snap_cs, tile_utm_extent, landfire_nodata)
                # Build a mask of LANDFIRE ag lands
                remap_mask_func(landfire_output_path, landfire_ag_output_path,
                                landfire_ag_list)

            # Cleanup
            del landfire_output_path
            del tile_utm_osr, tile_utm_proj, tile_utm_extent
        # Project LANDFIRE rasters using multiprocessing
        if project_mp_list:
            pool = mp.Pool(mp_procs)
            results = pool.map(drigo.project_raster_mp,
                               project_mp_list,
                               chunksize=1)
            pool.close()
            pool.join()
            del results, pool
        if remap_mp_list:
            pool = mp.Pool(mp_procs)
            results = pool.map(remap_mask_mp, remap_mp_list, chunksize=1)
            pool.close()
            pool.join()
            del results, pool

    # Convert field shapefiles to raster
    if field_flag:
        logging.info('\nBuild field rasters for each path/row')
        for tile_name in tile_list:
            logging.info('  {}'.format(tile_name))
            tile_output_ws = os.path.join(field_output_ws, tile_name)

            # Shapefile paths
            field_proj_name = (
                os.path.splitext(field_output_fmt.format(year))[0] +
                "_wgs84z{}.shp".format(tile_utm_zone_dict[tile_name]))
            field_proj_path = os.path.join(tile_output_ws, field_proj_name)
            field_output_path = os.path.join(tile_output_ws,
                                             field_output_fmt.format(year))
            if not overwrite_flag and os.path.isfile(field_output_path):
                logging.debug('    {} already exists, skipping'.format(
                    os.path.basename(field_output_path)))
                continue

            # The ogr2ogr spatial query is in the input spatial reference
            # Project the path/row extent to the field osr/proj
            field_input_osr = drigo.feature_path_osr(field_input_path)
            tile_utm_osr = drigo.epsg_osr(32600 +
                                          int(tile_utm_zone_dict[tile_name]))
            # field_input_proj = drigo.osr_proj(field_input_osr)
            # tile_utm_proj = drigo.osr_proj(tile_utm_osr)
            field_tile_extent = drigo.project_extent(
                tile_utm_extent_dict[tile_name], tile_utm_osr, field_input_osr,
                30)

            # Project shapefile to the path/row zone
            # Clipping requires GDAL to be built with GEOS support
            subprocess.call(
                [
                    'ogr2ogr', '-t_srs', 'EPSG:326{}'.format(
                        tile_utm_zone_dict[tile_name]), '-f', 'ESRI Shapefile',
                    '-overwrite'
                ] + ['-spat'] + list(map(str, field_tile_extent)) +
                ['-clipdst'] +
                list(map(str, tile_utm_extent_dict[tile_name])) +
                # ['-clipdst'] + list(map(str, tile_utm_extent_dict[tile_name])) +
                # ['-clipsrc'] + list(map(str, field_tile_extent)) +
                # ['-clipsrc'] + list(map(str, field_tile_extent)) +
                [field_proj_path, field_input_path])

            # Convert shapefile to raster
            field_mem_ds = drigo.polygon_to_raster_ds(
                field_proj_path,
                nodata_value=0,
                burn_value=1,
                output_osr=tile_utm_osr,
                output_extent=tile_utm_extent_dict[tile_name])
            field_output_driver = drigo.raster_driver(field_output_path)
            if field_output_path.lower().endswith('.img'):
                field_output_ds = field_output_driver.CreateCopy(
                    field_output_path, field_mem_ds, 0, ['COMPRESS=YES'])
            else:
                field_output_ds = field_output_driver.CreateCopy(
                    field_output_path, field_mem_ds, 0)
            field_output_ds, field_mem_ds = None, None

            # Remove field shapefile
            # try:
            #     remove_file(field_proj_path)
            # except:
            #     pass

            # Cleanup
            del tile_utm_osr, field_tile_extent, field_input_osr
            # del tile_utm_proj, field_input_proj
            del field_proj_name, field_proj_path, field_output_path

    logging.debug('\nScript complete')
Example #3
0
def main(ini_path, mc_iter_str='', tile_list=None,
         cold_tgt_pct=None, hot_tgt_pct=None, groupsize=64, blocksize=2048,
         multipoint_flag=True, shapefile_flag=True, stats_flag=True,
         overwrite_flag=False, mp_procs=1, delay=0, debug_flag=False,
         new_window_flag=False, no_file_logging=False,
         no_final_plots=None, no_temp_plots=None):
    """Run METRIC Monte Carlo for all Landsat scenes

    Parameters
    ----------
    ini_path : str
        File path of the input parameters file.
    mc_iter_str : str
        MonteCarlo iteration list and/or range.
    tile_list : list, optional
        Landsat path/rows to process (i.e. [p045r043, p045r033]).
        This will override the tile list in the INI file.
    cold_tgt_pct : float, optional
        Target percentage of pixels with ETrF greater than cold Kc.
    hot_tgt_pct : float, optional
        Target percentage of pixels with ETrF less than hot Kc.
    groupsize : int, optional
        Script will try to place calibration point randomly into a labeled
        group of clustered values with at least n pixels (the default is 64).
        -1 = In the largest group
         0 = Anywhere in the image (not currently implemented)
         1 >= In any group with a pixel count greater or equal to n
    blocksize : int, optional
        Processing block size (the default is 2048).
    multipoint_flag : bool, optional
        If True, save cal. points to multipoint shapefile (the default is True).
    shapefile_flag : bool, optional
        If True, save calibration points to shapefile (the default False).
    stats_flag : bool, optional
        If True, compute raster statistics (the default is True).
    overwrite_flag : bool, optional
        If True, overwrite existing files (the default is False).
    mp_procs : int, optional
        Number of cores to use (the default is 1).
    delay : float, optional
        Max random delay starting function in seconds (the default is 0).
    debug_flag : bool, optional
        If True, enable debug level logging (the default is False).
    new_window_flag : bool, optional
        If True, open each process in new terminal window (the default is False).
        Microsoft Windows only.
    no_file_logging : bool
        If True, don't write logging to file (the default is False).
    no_final_plots : bool
        If True, don't save final ETrF histograms (the default is None).
        This will override the flag in the INI file
    no_temp_plots : bool
        If True, don't save temp ETrF histograms (the default is None).
        This will override the flag in the INI file

    Returns
    -------
    None
    """
    logging.info('\nRunning METRIC Monte Carlo')

    # Open config file
    config = dripy.open_ini(ini_path)

    # Get input parameters
    logging.debug('  Reading Input File')
    year = config.getint('INPUTS', 'year')
    if tile_list is None:
        tile_list = dripy.read_param('tile_list', [], config, 'INPUTS')
    project_ws = config.get('INPUTS', 'project_folder')
    logging.debug('  Year: {}'.format(year))
    logging.debug('  Path/rows: {}'.format(', '.join(tile_list)))
    logging.debug('  Project: {}'.format(project_ws))

    func_path = config.get('INPUTS', 'monte_carlo_func')
    keep_list_path = dripy.read_param('keep_list_path', '', config, 'INPUTS')
    # skip_list_path = dripy.read_param('skip_list_path', '', config, 'INPUTS')

    # For now, get mc_iter list from command line, not from project file
    # mc_iter_list = config.get('INPUTS', 'mc_iter_list')
    mc_iter_list = list(dripy.parse_int_set(mc_iter_str))

    # Need soemthing in mc_iter_list to iterate over
    if not mc_iter_list:
        mc_iter_list = [None]

    # For now build INI file name from template INI names
    metric_ini_name = os.path.basename(config.get('INPUTS', 'metric_ini'))
    metric_ini_name = os.path.splitext(os.path.basename(metric_ini_name))[0]
    mc_ini_name = os.path.basename(config.get('INPUTS', 'monte_carlo_ini'))
    mc_ini_name = os.path.splitext(os.path.basename(mc_ini_name))[0]

    # INI file is built as a function of year and tile_name
    metric_ini_fmt = '{}_{}_{}.ini'
    mc_ini_fmt = '{}_{}_{}.ini'

    # Only allow new terminal windows on Windows
    if os.name is not 'nt':
        new_window_flag = False

    # if len(tile_list) == 1:
    #     devel_flag = True
    # else:
    #     devel_flag = False
    # # devel_flag = True

    # Regular expressions
    # For now assume path/row are two digit numbers
    tile_re = re.compile('p\d{3}r\d{3}', re.IGNORECASE)
    image_id_re = re.compile(
        '^(LT04|LT05|LE07|LC08)_(?:\w{4})_(\d{3})(\d{3})_'
        '(\d{4})(\d{2})(\d{2})_(?:\d{8})_(?:\d{2})_(?:\w{2})$')

    # Check inputs folders/paths
    if not os.path.isdir(project_ws):
        logging.error('\n Folder {} does not exist'.format(project_ws))
        sys.exit()

    # Read keep/skip lists
    if keep_list_path:
        logging.debug('\nReading scene keep list')
        with open(keep_list_path) as keep_list_f:
            image_keep_list = keep_list_f.readlines()
            image_keep_list = [image_id.strip() for image_id in image_keep_list
                               if image_id_re.match(image_id.strip())]
    else:
        logging.debug('\nScene keep list not set in INI')
        image_keep_list = []
    # if skip_list_path:
    #     logging.debug('\nReading scene skip list')
    #     with open(skip_list_path) as skip_list_f:
    #         image_skip_list = skip_list_f.readlines()
    #         image_skip_list = [image_id.strip() for image_id in image_skip_list
    #                      if image_re.match(image_id.strip())]
    # else:
    #     logging.debug('\nScene skip list not set in INI')
    #     image_skip_list = []


    mp_list = []
    for tile_name in sorted(tile_list):
        logging.debug('\nTile: {}'.format(tile_name))
        tile_ws = os.path.join(project_ws, str(year), tile_name)
        if not os.path.isdir(tile_ws) and not tile_re.match(tile_name):
            logging.debug('  {} {} - invalid tile, skipping'.format(
                year, tile_name))
            continue

        # Check that there are image folders
        image_id_list = [
            image_id for image_id in sorted(os.listdir(tile_ws))
            if (image_id_re.match(image_id) and
                os.path.isdir(os.path.join(tile_ws, image_id)) and
                (image_keep_list and image_id in image_keep_list))]
            #     (image_skip_list and image_id not in image_skip_list))]
        if not image_id_list:
            logging.debug('  {} {} - no available images, skipping'.format(
                year, tile_name))
            continue
        else:
            logging.debug('  {} {}'.format(year, tile_name))

        for image_id in image_id_list:
            image_ws = os.path.join(tile_ws, image_id)
            pixel_ws = os.path.join(image_ws, 'PIXELS')
            if not os.path.isdir(pixel_ws):
                os.mkdir(pixel_ws)
            # Since the multipoint shapefile will be appended, delete it
            #  in the wrapper script
            if multipoint_flag and os.path.isdir(pixel_ws):
                for pixel_file in os.listdir(pixel_ws):
                    if re.match('\w+_\w+.shp$', pixel_file):
                        logging.info('\n Removing {}'.format(pixel_file))
                        os.remove(os.path.join(pixel_ws, pixel_file))
        logging.debug('  {} {}'.format(year, tile_name))

        # Check that there is an input file for the path/row
        metric_ini_path = os.path.join(
            tile_ws, metric_ini_fmt.format(metric_ini_name, year, tile_name))
        mc_ini_path = os.path.join(
            tile_ws, mc_ini_fmt.format(mc_ini_name, year, tile_name))
        if not os.path.join(metric_ini_path):
            logging.warning('    METRIC Input file {} does not exist'.format(
                metric_ini_path))
            continue
        elif not os.path.join(mc_ini_path):
            logging.warning(
                '    Monte Carlo Input file {} does not exist'.format(
                    mc_ini_path))
            continue

        # Setup command line argument
        # call_args = [sys.executable, mc_func_path, '-i', ini_path]
        call_args = [sys.executable, func_path,
                     '--metric_ini', metric_ini_path,
                     '--mc_ini', mc_ini_path,
                     '--groupsize', str(groupsize)]
        if cold_tgt_pct is not None and hot_tgt_pct is not None:
            call_args.extend(['-t', str(cold_tgt_pct), str(hot_tgt_pct)])
            if blocksize:
                call_args.extend(['--blocksize', str(blocksize)])
        if shapefile_flag:
            call_args.append('--shapefile')
        if multipoint_flag:
            call_args.append('--multipoint')
        if stats_flag:
            call_args.append('--stats')
        if overwrite_flag:
            call_args.append('--overwrite')
        if debug_flag:
            call_args.append('--debug')
        if no_file_logging:
            call_args.append('--no_file_logging')
        if no_final_plots:
            call_args.append('--no_final_plots')
        if no_temp_plots:
            call_args.append('--no_temp_plots')

        # Run all scenes for each Monte Carlo iteration
        for mc_iter in mc_iter_list:
            if mc_iter is not None:
                mc_args = ['-mc', str(mc_iter)]
            else:
                mc_args = []
            for image_id in image_id_list:
                image_folder = os.path.join(tile_ws, image_id)
                logging.debug('  {}'.format(os.path.basename(image_folder)))
                if mp_procs > 1:
                    mp_list.append([
                        call_args + mc_args, image_folder, delay,
                        new_window_flag])
                else:
                    subprocess.call(call_args + mc_args, cwd=image_folder)

    if mp_list:
        pool = mp.Pool(mp_procs)
        results = pool.map(dripy.call_mp, mp_list, chunksize=1)
        pool.close()
        pool.join()
        del results, pool

    logging.debug('\nScript complete')
def main(ini_path, tile_list=None, overwrite_flag=False):
    """Prep Landsat path/row specific data

    Args:
        ini_path (str): file path of the input parameters file
        tile_list (list): list of Landsat path/row (i.e. [p45r43, p45r33])
            This will override the tile list in the INI file
        overwrite_flag (bool): boolean, overwrite existing files
        mp_procs (int): number of cores to use

    Returns:
        None
    """
    logging.info('\nPrepare path/row INI files')

    # Open config file
    config = python_common.open_ini(ini_path)

    # Get input parameters
    logging.debug('  Reading Input File')
    year = config.getint('INPUTS', 'year')
    if tile_list is None:
        tile_list = python_common.read_param('tile_list', [], config, 'INPUTS')
    project_ws = config.get('INPUTS', 'project_folder')
    logging.debug('  Year: {}'.format(year))
    logging.debug('  Path/rows: {}'.format(', '.join(tile_list)))
    logging.debug('  Project: {}'.format(project_ws))

    ini_file_flag = python_common.read_param('ini_file_flag', True, config,
                                             'INPUTS')
    landsat_flag = python_common.read_param('landsat_flag', True, config,
                                            'INPUTS')
    ledaps_flag = python_common.read_param('ledaps_flag', False, config,
                                           'INPUTS')
    dem_flag = python_common.read_param('dem_flag', True, config, 'INPUTS')
    nlcd_flag = python_common.read_param('nlcd_flag', True, config, 'INPUTS')
    cdl_flag = python_common.read_param('cdl_flag', True, config, 'INPUTS')
    landfire_flag = python_common.read_param('landfire_flag', False, config,
                                             'INPUTS')
    field_flag = python_common.read_param('field_flag', False, config,
                                          'INPUTS')
    metric_flag = python_common.read_param('metric_flag', True, config,
                                           'INPUTS')
    monte_carlo_flag = python_common.read_param('monte_carlo_flag', False,
                                                config, 'INPUTS')
    interp_rasters_flag = python_common.read_param('interpolate_rasters_flag',
                                                   False, config, 'INPUTS')
    interp_tables_flag = python_common.read_param('interpolate_tables_flag',
                                                  False, config, 'INPUTS')

    metric_hourly_weather = python_common.read_param('metric_hourly_weather',
                                                     'NLDAS', config, 'INPUTS')

    project_ws = config.get('INPUTS', 'project_folder')
    footprint_path = config.get('INPUTS', 'footprint_path')
    # For now, assume the UTM zone file is colocated with the footprints shapefile
    utm_path = python_common.read_param(
        'utm_path',
        os.path.join(os.path.dirname(footprint_path),
                     'wrs2_tile_utm_zones.json'), config, 'INPUTS')
    skip_list_path = python_common.read_param('skip_list_path', '', config,
                                              'INPUTS')

    # Ts and albedo corrections
    ts_correction_flag = python_common.read_param('Ts_correction_flag', True,
                                                  config, 'INPUTS')
    k_value = python_common.read_param('K_value', 2, config, 'INPUTS')
    albedo_correction_flag = python_common.read_param(
        'albedo_correction_flag ', True, config, 'INPUTS')
    dense_veg_min_albedo = python_common.read_param('dense_veg_min_albedo ',
                                                    0.18, config, 'INPUTS')

    # tile_gcs_buffer = read_param('tile_buffer', 0.1, config)

    # Template input files for scripts
    if metric_flag:
        metric_ini = config.get('INPUTS', 'metric_ini')
        pixel_rating_ini = config.get('INPUTS', 'pixel_rating_ini')
    if monte_carlo_flag:
        monte_carlo_ini = config.get('INPUTS', 'monte_carlo_ini')

    if interp_rasters_flag or interp_tables_flag:
        interpolate_folder = python_common.read_param('interpolate_folder',
                                                      'ET', config)
        interpolate_ini = config.get('INPUTS', 'interpolate_ini')
    if interp_rasters_flag:
        study_area_path = config.get('INPUTS', 'study_area_path')
        study_area_mask_flag = python_common.read_param(
            'study_area_mask_flag', True, config)
        study_area_snap = python_common.read_param('study_area_snap', (0, 0),
                                                   config)
        study_area_cellsize = python_common.read_param('study_area_cellsize',
                                                       30, config)
        study_area_buffer = python_common.read_param('study_area_buffer', 0,
                                                     config)
        study_area_proj = python_common.read_param('study_area_proj', '',
                                                   config)
    if interp_tables_flag:
        zones_path = config.get('INPUTS', 'zones_path')
        zones_name_field = python_common.read_param('zones_name_field', 'FID',
                                                    config)
        # zones_buffer = read_param('zones_buffer', 0, config)
        zones_snap = python_common.read_param('zones_snap', (0, 0), config)
        zones_cellsize = python_common.read_param('zones_cellsize', 30, config)
        # zones_proj = read_param('zones_proj', '', config)
        zones_mask = python_common.read_param('zones_mask', None, config)
        zones_buffer = None
        zones_proj = None

    # Input/output folder and file paths
    if landsat_flag:
        landsat_input_ws = config.get('INPUTS', 'landsat_input_folder')
    else:
        landsat_input_ws = None
    if ledaps_flag:
        ledaps_input_ws = config.get('INPUTS', 'ledaps_input_folder')
    else:
        ledaps_input_ws = None

    if dem_flag:
        dem_input_ws = config.get('INPUTS', 'dem_input_folder')
        dem_tile_fmt = config.get('INPUTS', 'dem_tile_fmt')
        dem_output_ws = config.get('INPUTS', 'dem_output_folder')
        dem_output_name = python_common.read_param('dem_output_name',
                                                   'dem.img', config)
        # dem_output_name = config.get('INPUTS', 'dem_output_name')
    else:
        dem_input_ws, dem_tile_fmt = None, None
        dem_output_ws, dem_output_name = None, None

    if nlcd_flag:
        nlcd_input_path = config.get('INPUTS', 'nlcd_input_path')
        nlcd_output_ws = config.get('INPUTS', 'nlcd_output_folder')
        nlcd_output_fmt = python_common.read_param('nlcd_output_fmt',
                                                   'nlcd_{:04d}.img', config)
    else:
        nlcd_input_path, nlcd_output_ws, nlcd_output_fmt = None, None, None

    if cdl_flag:
        cdl_input_path = config.get('INPUTS', 'cdl_input_path')
        cdl_ag_list = config.get('INPUTS', 'cdl_ag_list')
        cdl_ag_list = list(python_common.parse_int_set(cdl_ag_list))
        # default_cdl_ag_list = range(1,62) + range(66,78) + range(204,255)
        # cdl_ag_list = read_param(
        #    'cdl_ag_list', default_cdl_ag_list, config)
        # cdl_ag_list = list(map(int, cdl_ag_list))
        # cdl_non_ag_list = read_param(
        #    'cdl_non_ag_list', [], config)
        cdl_output_ws = config.get('INPUTS', 'cdl_output_folder')
        cdl_output_fmt = python_common.read_param('cdl_output_fmt',
                                                  'cdl_{:04d}.img', config)
        cdl_ag_output_fmt = python_common.read_param('cdl_ag_output_fmt',
                                                     'cdl_ag_{:04d}.img',
                                                     config)
    else:
        cdl_input_path, cdl_ag_list = None, None
        cdl_output_ws, cdl_output_fmt, cdl_ag_output_fmt = None, None, None

    if landfire_flag:
        landfire_input_path = config.get('INPUTS', 'landfire_input_path')
        landfire_ag_list = config.get('INPUTS', 'landfire_ag_list')
        landfire_ag_list = list(python_common.parse_int_set(landfire_ag_list))
        # default_landfire_ag_list = range(3960,4000)
        # landfire_ag_list = read_param(
        #    'landfire_ag_list', default_landfire_ag_list, config)
        # landfire_ag_list = list(map(int, landfire_ag_list))
        landfire_output_ws = config.get('INPUTS', 'landfire_output_folder')
        landfire_output_fmt = python_common.read_param('landfire_output_fmt',
                                                       'landfire_{:04d}.img',
                                                       config)
        landfire_ag_output_fmt = python_common.read_param(
            'landfire_ag_output_fmt', 'landfire_ag_{:04d}.img', config)
    else:
        landfire_input_path, landfire_ag_list = None, None
        landfire_output_ws = None
        landfire_output_fmt, landfire_ag_output_fmt = None, None

    if field_flag:
        field_input_path = config.get('INPUTS', 'field_input_path')
        field_output_ws = config.get('INPUTS', 'field_output_folder')
        field_output_fmt = python_common.read_param('field_output_fmt',
                                                    'fields_{:04d}.img',
                                                    config)
    else:
        field_input_path = None
        field_output_ws, field_output_fmt = None, None

    if monte_carlo_flag:
        etrf_training_path = config.get('INPUTS', 'etrf_training_path')
        # mc_iter_list = config.get('INPUTS', 'mc_iter_list')
        # mc_iter_list = list(python_common.parse_int_set(mc_iter_list))
    if monte_carlo_flag or interp_rasters_flag or interp_tables_flag:
        etrf_input_ws = python_common.read_param('etrf_input_folder', None,
                                                 config)
        # if etrf_input_ws is None:
        #     etrf_input_ws = os.path.join(project_ws, year)
        etr_input_ws = config.get('INPUTS', 'etr_input_folder')
        ppt_input_ws = config.get('INPUTS', 'ppt_input_folder')
        etr_input_re = config.get('INPUTS', 'etr_input_re')
        ppt_input_re = config.get('INPUTS', 'ppt_input_re')
    if monte_carlo_flag or interp_rasters_flag or interp_tables_flag:
        awc_input_path = config.get('INPUTS', 'awc_input_path')
        spinup_days = python_common.read_param('swb_spinup_days', 30, config,
                                               'INPUTS')
        min_spinup_days = python_common.read_param('swb_min_spinup_days', 5,
                                                   config, 'INPUTS')

    # Weather data parameters
    if metric_flag:
        metric_hourly_weather_list = ['NLDAS', 'REFET']
        metric_hourly_weather = config.get('INPUTS',
                                           'metric_hourly_weather').upper()
        if metric_hourly_weather not in metric_hourly_weather_list:
            logging.error(
                ('\nERROR: The METRIC hourly weather type {} is invalid.' +
                 '\nERROR: Set metric_hourly_weather to {}').format(
                     metric_hourly_weather,
                     ','.join(metric_hourly_weather_list)))
            sys.exit()
        elif metric_hourly_weather == 'REFET':
            refet_params_path = os.path.normpath(
                config.get('INPUTS', 'refet_params_path'))
        elif metric_hourly_weather == 'NLDAS':
            # metric_hourly_re = config.get('INPUTS', 'metric_hourly_re')
            # metric_daily_re = config.get('INPUTS', 'metric_daily_re')
            metric_ea_input_ws = config.get('INPUTS', 'metric_ea_input_folder')
            metric_wind_input_ws = config.get('INPUTS',
                                              'metric_wind_input_folder')
            metric_etr_input_ws = config.get('INPUTS',
                                             'metric_etr_input_folder')
            try:
                calc_metric_tair_flag = config.getboolean(
                    'INPUTS', 'calc_metric_tair_flag')
                metric_tair_input_ws = config.get('INPUTS',
                                                  'metric_tair_input_folder')
            except:
                calc_metric_tair_flag = False
                metric_tair_input_ws = ''

    # Check inputs folders/paths
    logging.info('\nChecking input folders/files')
    file_check(footprint_path)
    if landsat_flag:
        folder_check(landsat_input_ws)
    if ledaps_flag:
        folder_check(ledaps_input_ws)
    if dem_flag:
        folder_check(dem_input_ws)
    if nlcd_flag:
        file_check(nlcd_input_path)
    if cdl_flag:
        file_check(cdl_input_path)
    if landfire_flag:
        # Landfire will likely be an ESRI grid (set as a folder)
        if not (os.path.isdir(landfire_input_path)
                or os.path.isfile(landfire_input_path)):
            logging.error('  {} does not exist.'.format(landfire_input_path))
            sys.exit()
    if field_flag:
        file_check(field_input_path)
    if metric_flag:
        file_check(metric_ini)
        file_check(pixel_rating_ini)
    if interp_rasters_flag or interp_tables_flag or monte_carlo_flag:
        if etrf_input_ws is not None:
            folder_check(etrf_input_ws)
        folder_check(etr_input_ws)
        folder_check(ppt_input_ws)
        file_check(awc_input_path)
    if monte_carlo_flag:
        file_check(monte_carlo_ini)
        file_check(etrf_training_path)
    if metric_flag:
        if metric_hourly_weather == 'REFET':
            file_check(refet_params_path)
        elif metric_hourly_weather == 'NLDAS':
            folder_check(metric_ea_input_ws)
            folder_check(metric_wind_input_ws)
            folder_check(metric_etr_input_ws)
            if calc_metric_tair_flag:
                folder_check(metric_tair_input_ws)
    if skip_list_path:
        file_check(skip_list_path)

    # Build output folders
    if not os.path.isdir(project_ws):
        os.makedirs(project_ws)

    # For now assume path/row are two digit numbers
    tile_fmt = 'p{:03d}r{:03d}'

    # Set snap environment parameters
    snap_cs = 30
    snap_xmin, snap_ymin = (15, 15)
    env = gdc.env
    env.cellsize = snap_cs
    env.snap_xmin, env.snap_ymin = snap_xmin, snap_ymin

    # Use WGSS84 (EPSG 4326) for GCS spatial reference
    # Could also use NAD83 (EPSG 4269)
    # gcs_epsg = 4326
    # gcs_osr = epsg_osr(4326)
    # gcs_proj = osr_proj(gcs_osr)

    # Landsat Footprints (WRS2 Descending Polygons)
    logging.debug('\nFootprint (WRS2 descending should be GCS84):')
    tile_gcs_osr = gdc.feature_path_osr(footprint_path)
    logging.debug('  OSR: {}'.format(tile_gcs_osr))

    # Doublecheck that WRS2 descending shapefile is GCS84
    # if tile_gcs_osr != epsg_osr(4326):
    #     logging.error('  WRS2 is not GCS84')
    #     sys.exit()

    # Get geometry for each path/row
    tile_gcs_wkt_dict = path_row_wkt_func(footprint_path,
                                          path_field='PATH',
                                          row_field='ROW')

    # Get UTM zone for each path/row
    # DEADBEEF - Using "eval" is considered unsafe and should be changed
    tile_utm_zone_dict = eval(open(utm_path, 'r').read())

    # Check that each path/row extent and UTM zone exist
    logging.info('\nChecking path/row list against footprint shapefile')
    for tile_name in sorted(tile_list):
        if tile_name not in tile_gcs_wkt_dict.keys():
            logging.error(
                '  {} feature not in footprint shapefile'.format(tile_name))
            continue
        elif tile_name not in tile_utm_zone_dict.keys():
            logging.error(
                '  {} UTM zone not in footprint shapefile'.format(tile_name))
            continue
        elif tile_utm_zone_dict[tile_name] == 0:
            logging.error(('  UTM zone is not set for {} in ' +
                           'footprint shapefile').format(tile_name))
            continue

    # Read RefET parameters
    if metric_hourly_weather == 'REFET':
        refet_ws = os.path.dirname(refet_params_path)
        with open(refet_params_path, 'r') as input_f:
            lines = input_f.readlines()
        lines = [line.strip() for line in lines]
        lines = [line.split(',') for line in lines if line]
        columns = lines.pop(0)
        refet_params_dict = defaultdict(dict)
        for line in lines:
            tile_name = tile_fmt.format(int(line[columns.index('PATH')]),
                                        int(line[columns.index('ROW')]))
            yr_tile_name = '{}_{}'.format(line[columns.index('YEAR')],
                                          tile_name)
            for i, column in enumerate(columns):
                if column not in ['YEAR', 'PATH', 'ROW']:
                    refet_params_dict[yr_tile_name][column.lower()] = line[i]

    # Process input files for each year and path/row
    logging.info('\nBuilding path/row specific input files')
    for tile_name in tile_list:
        tile_output_ws = os.path.join(project_ws, str(year), tile_name)
        logging.info('{} {}'.format(year, tile_name))
        yr_tile_name = '{}_{}'.format(year, tile_name)
        if not os.path.isdir(tile_output_ws):
            os.makedirs(tile_output_ws)

        # File paths
        if metric_flag:
            tile_metric_ini = os.path.join(
                tile_output_ws,
                os.path.basename(metric_ini).replace(
                    '.ini', '_{}_{}.ini'.format(year, tile_name)))
            tile_pixel_rating_ini = os.path.join(
                tile_output_ws,
                os.path.basename(pixel_rating_ini).replace(
                    '.ini', '_{}_{}.ini'.format(year, tile_name)))
            if overwrite_flag and os.path.isfile(tile_metric_ini):
                os.remove(tile_metric_ini)
            if overwrite_flag and os.path.isfile(tile_pixel_rating_ini):
                os.remove(tile_pixel_rating_ini)

        # Monte Carlo is independent of tile and year, but process
        #   with METRIC input file
        if monte_carlo_flag:
            tile_monte_carlo_ini = os.path.join(
                tile_output_ws,
                os.path.basename(monte_carlo_ini).replace(
                    '.ini', '_{}_{}.ini'.format(year, tile_name)))
            if overwrite_flag and os.path.isfile(tile_monte_carlo_ini):
                os.remove(tile_monte_carlo_ini)

        if dem_flag:
            dem_output_path = os.path.join(dem_output_ws, tile_name,
                                           dem_output_name)
        if nlcd_flag:
            nlcd_output_path = os.path.join(nlcd_output_ws, tile_name,
                                            nlcd_output_fmt.format(year))
        if cdl_flag:
            cdl_ag_output_path = os.path.join(cdl_output_ws, tile_name,
                                              cdl_ag_output_fmt.format(year))
        if landfire_flag:
            landfire_ag_output_path = os.path.join(
                landfire_output_ws, tile_name,
                landfire_output_fmt.format(year))
        if field_flag:
            field_output_path = os.path.join(field_output_ws, tile_name,
                                             field_output_fmt.format(year))

        # Check that the path/row was in the RefET parameters file
        if (metric_flag and metric_hourly_weather == 'REFET'
                and yr_tile_name not in refet_params_dict.keys()):
            logging.error(
                ('    The year {} & path/row {} is not in the ' +
                 'RefET parameters csv, skipping').format(year, tile_name))
            continue

        if metric_flag and not os.path.isfile(tile_metric_ini):
            # DEADBEEF - This approach removes all formatting and comments
            config = configparser.RawConfigParser()
            config.read(metric_ini)
            # shutil.copy(metric_ini, tile_metric_ini)
            # config.read(tile_metric_ini)

            if metric_hourly_weather == 'REFET':
                # Add RefET options
                config.set('INPUTS', 'weather_data_source', 'REFET')
                config.set(
                    'INPUTS', 'refet_file',
                    os.path.join(
                        refet_ws,
                        os.path.normpath(
                            refet_params_dict[yr_tile_name]['refet_file'])))
                config.set('INPUTS', 'gmt_offset',
                           refet_params_dict[yr_tile_name]['gmt_offset'])
                config.set('INPUTS', 'datum',
                           refet_params_dict[yr_tile_name]['datum'])
            elif metric_hourly_weather == 'NLDAS':
                # Add NLDAS options
                config.set('INPUTS', 'weather_data_source', 'NLDAS')
                # Remove RefET options
                try:
                    config.remove_option('INPUTS', 'refet_file')
                except:
                    pass
                try:
                    config.remove_option('INPUTS', 'gmt_offset')
                except:
                    pass
                # try:
                #     config.remove_option('INPUTS', 'datum')
                # except:
                #     pass

            if dem_flag:
                config.set('INPUTS', 'dem_raster', dem_output_path)
            else:
                try:
                    config.remove_option('INPUTS', 'dem_raster')
                except:
                    pass
                # config.set('INPUTS', 'dem_raster', 'None')

            if nlcd_flag:
                config.set('INPUTS', 'landuse_raster', nlcd_output_path)
            else:
                try:
                    config.remove_option('INPUTS', 'landuse_raster')
                except:
                    pass
                # config.set('INPUTS', 'landuse_raster', 'None')

            logging.debug('  {}'.format(tile_metric_ini))
            with open(tile_metric_ini, 'w') as config_f:
                config.write(config_f)

        if metric_flag and not os.path.isfile(tile_pixel_rating_ini):
            config = configparser.RawConfigParser()
            config.read(pixel_rating_ini)
            if nlcd_flag:
                config.set('INPUTS', 'landuse_raster', nlcd_output_path)
            else:
                try:
                    config.remove_option('INPUTS', 'landuse_raster')
                except:
                    pass
                # config.set('INPUTS', 'landuse_raster', 'None')
            if cdl_flag:
                config.set('INPUTS', 'apply_cdl_ag_mask', True)
                config.set('INPUTS', 'cdl_ag_raster', cdl_ag_output_path)
            else:
                config.set('INPUTS', 'apply_cdl_ag_mask', False)
                try:
                    config.remove_option('INPUTS', 'cdl_ag_raster')
                except:
                    pass
                # config.set('INPUTS', 'cdl_ag_raster', 'None')
            if field_flag:
                config.set('INPUTS', 'apply_field_mask', True)
                config.set('INPUTS', 'fields_raster', field_output_path)
            else:
                config.set('INPUTS', 'apply_field_mask', False)
                try:
                    config.remove_option('INPUTS', 'fields_raster')
                except:
                    pass
                # config.set('INPUTS', 'fields_raster', 'None')
            # if landfire_flag:
            #     config.set('INPUTS', 'apply_landfire_ag_mask', True)
            #     config.set('INPUTS', 'landfire_ag_raster', cdl_ag_output_path)
            # else:
            #     config.set('INPUTS', 'apply_landfire_ag_mask', False)
            #     try: config.remove_option('INPUTS', 'landfire_ag_raster')
            #     except: pass
            #     # config.set('INPUTS', 'landfire_ag_raster', 'None')

            logging.debug('  {}'.format(tile_pixel_rating_ini))
            with open(tile_pixel_rating_ini, 'w') as config_f:
                config.write(config_f)

        if monte_carlo_flag and not os.path.isfile(tile_monte_carlo_ini):
            config = configparser.RawConfigParser()
            config.read(monte_carlo_ini)
            config.set('INPUTS', 'etrf_training_path', etrf_training_path)
            config.set('INPUTS', 'etr_ws', etr_input_ws)
            config.set('INPUTS', 'ppt_ws', ppt_input_ws)
            config.set('INPUTS', 'etr_re', etr_input_re)
            config.set('INPUTS', 'ppt_re', ppt_input_re)
            config.set('INPUTS', 'awc_path', awc_input_path)
            config.set('INPUTS', 'swb_spinup_days', spinup_days)
            config.set('INPUTS', 'swb_min_spinup_days', min_spinup_days)

            logging.debug('  {}'.format(tile_monte_carlo_ini))
            with open(tile_monte_carlo_ini, 'w') as config_f:
                config.write(config_f)

        # Cleanup
        del tile_output_ws, yr_tile_name

    # Interpolator input file
    if interp_rasters_flag or interp_tables_flag:
        logging.info('\nBuilding interpolator input files')
        year_interpolator_name = os.path.basename(interpolate_ini).replace(
            '.ini', '_{}_{}.ini'.format(year, interpolate_folder.lower()))
        year_interpolator_ini = os.path.join(project_ws, str(year),
                                             year_interpolator_name)
        if overwrite_flag and os.path.isfile(year_interpolator_ini):
            os.remove(year_interpolator_ini)
        if not os.path.isfile(year_interpolator_ini):
            # First copy the template config file to the year folder
            shutil.copy(interpolate_ini, year_interpolator_ini)

            # Open the existing config file and update the values
            # DEADBEEF - This approach removes all formatting and comments
            config = configparser.RawConfigParser()
            config.read(year_interpolator_ini)
            config.set('INPUTS', 'folder_name', interpolate_folder)
            config.set('INPUTS', 'tile_list', ', '.join(tile_list))
            if interp_rasters_flag:
                config.set('INPUTS', 'study_area_path', study_area_path)
                config.set('INPUTS', 'study_area_mask_flag',
                           study_area_mask_flag)
                config.set('INPUTS', 'study_area_snap',
                           ', '.join(map(str, study_area_snap)))
                config.set('INPUTS', 'study_area_cellsize',
                           study_area_cellsize)
                config.set('INPUTS', 'study_area_buffer', study_area_buffer)
                if study_area_proj:
                    config.set('INPUTS', 'study_area_proj', study_area_proj)
                else:
                    try:
                        config.remove_option('INPUTS', 'study_area_proj',
                                             study_area_proj)
                    except:
                        pass
            if interp_tables_flag:
                config.set('INPUTS', 'zones_path', zones_path)
                config.set('INPUTS', 'zones_snap',
                           ', '.join(map(str, zones_snap)))
                config.set('INPUTS', 'zones_cellsize', zones_cellsize)
                config.set('INPUTS', 'zones_name_field', zones_name_field)
                # zones_buffer is not currently implemented
                if zones_buffer:
                    config.set('INPUTS', 'zones_buffer', zones_buffer)
                else:
                    try:
                        config.remove_option('INPUTS', 'zones_buffer',
                                             zones_buffer)
                    except:
                        pass
                # zones proj., cellsize, and snap are not needed or
                #   read in if zones_mask is set
                # zones_proj is not currently implemented
                if zones_mask:
                    config.set('INPUTS', 'zones_mask', zones_mask)
                    try:
                        config.remove_option('INPUTS', 'zones_proj')
                    except:
                        pass
                    try:
                        config.remove_option('INPUTS', 'zones_cellsize')
                    except:
                        pass
                    try:
                        config.remove_option('INPUTS', 'zones_snap')
                    except:
                        pass
                # elif zones_proj:
                #     config.set('INPUTS', 'zones_proj', zones_proj)
                #     try:
                #         config.remove_option('INPUTS', 'zones_mask')
                #     except:
                #         pass
                else:
                    try:
                        config.remove_option('INPUTS', 'zones_proj')
                    except:
                        pass
                    try:
                        config.remove_option('INPUTS', 'zones_mask')
                    except:
                        pass
            config.set('INPUTS', 'year', year)
            config.set('INPUTS', 'footprint_path', footprint_path)
            if etrf_input_ws is not None:
                config.set('INPUTS', 'etrf_input_folder', etrf_input_ws)
            config.set('INPUTS', 'etr_input_folder', etr_input_ws)
            config.set('INPUTS', 'etr_input_re', etr_input_re)
            config.set('INPUTS', 'ppt_input_folder', ppt_input_ws)
            config.set('INPUTS', 'ppt_input_re', ppt_input_re)
            # DEADBEEF - add check for SWB flag
            config.set('INPUTS', 'awc_input_path', awc_input_path)
            config.set('INPUTS', 'swb_spinup_days', spinup_days)
            config.set('INPUTS', 'swb_min_spinup_days', min_spinup_days)

            # Albdeo and Ts correction
            config.set('INPUTS', 'Ts_correction_flag', ts_correction_flag)
            config.set('INPUTS', 'K_value ', k_value)
            config.set('INPUTS', 'albedo_correction_flag',
                       albedo_correction_flag)
            config.set('INPUTS', 'dense_veg_min_albedo', dense_veg_min_albedo)

            logging.debug('  {}'.format(year_interpolator_ini))
            with open(year_interpolator_ini, 'w') as config_f:
                config.write(config_f)

    logging.debug('\nScript complete')
def main(grb_ws=os.getcwd(),
         ancillary_ws=os.getcwd(),
         output_ws=os.getcwd(),
         etr_flag=False,
         eto_flag=False,
         landsat_ws=None,
         start_date=None,
         end_date=None,
         times_str='',
         extent_path=None,
         output_extent=None,
         daily_flag=True,
         stats_flag=True,
         overwrite_flag=False):
    """Compute hourly ETr/ETo from NLDAS data

    Args:
        grb_ws (str): folder of NLDAS GRB files
        ancillary_ws (str): folder of ancillary rasters
        output_ws (str): folder of output rasters
        etr_flag (bool): if True, compute alfalfa reference ET (ETr)
        eto_flag (bool): if True, compute grass reference ET (ETo)
        landsat_ws (str): folder of Landsat scenes or tar.gz files
        start_date (str): ISO format date (YYYY-MM-DD)
        end_date (str): ISO format date (YYYY-MM-DD)
        times (str): comma separated values and/or ranges of UTC hours
            (i.e. "1, 2, 5-8")
            Parsed with python_common.parse_int_set()
        extent_path (str): file path defining the output extent
        output_extent (list): decimal degrees values defining output extent
        daily_flag (bool): if True, save daily ETr/ETo sum raster.
            Default is True
        stats_flag (bool): if True, compute raster statistics.
            Default is True.
        overwrite_flag (bool): if True, overwrite existing files

    Returns:
        None
    """
    logging.info('\nComputing NLDAS hourly ETr/ETo')
    np.seterr(invalid='ignore')

    # Compute ETr and/or ETo
    if not etr_flag and not eto_flag:
        logging.info('  ETo/ETr flag(s) not set, defaulting to ETr')
        etr_flag = True

    # If a date is not set, process 2017
    try:
        start_dt = dt.datetime.strptime(start_date, '%Y-%m-%d')
        logging.debug('  Start date: {}'.format(start_dt))
    except:
        start_dt = dt.datetime(2017, 1, 1)
        logging.info('  Start date: {}'.format(start_dt))
    try:
        end_dt = dt.datetime.strptime(end_date, '%Y-%m-%d')
        logging.debug('  End date:   {}'.format(end_dt))
    except:
        end_dt = dt.datetime(2017, 12, 31)
        logging.info('  End date:   {}'.format(end_dt))

    # Only process a specific hours
    if not times_str:
        time_list = range(0, 24, 1)
    else:
        time_list = list(parse_int_set(times_str))
    time_list = ['{:02d}00'.format(t) for t in time_list]

    etr_folder = 'etr'
    eto_folder = 'eto'
    hour_fmt = '{}_{:04d}{:02d}{:02d}_hourly_nldas.img'
    # hour_fmt = '{}_{:04d}{:02d}{:02d}_{4:04d}_nldas.img'
    day_fmt = '{}_{:04d}{:02d}{:02d}_nldas.img'
    # input_fmt = 'NLDAS_FORA0125_H.A{:04d}{:02d}{:02d}.{}.002.grb'
    input_re = re.compile('NLDAS_FORA0125_H.A(?P<YEAR>\d{4})(?P<MONTH>\d{2})' +
                          '(?P<DAY>\d{2}).(?P<TIME>\d{4}).002.grb$')

    # Assume NLDAS is NAD83
    # input_epsg = 'EPSG:4269'

    # Ancillary raster paths
    mask_path = os.path.join(ancillary_ws, 'nldas_mask.img')
    elev_path = os.path.join(ancillary_ws, 'nldas_elev.img')
    lat_path = os.path.join(ancillary_ws, 'nldas_lat.img')
    lon_path = os.path.join(ancillary_ws, 'nldas_lon.img')

    # Build a date list from landsat_ws scene folders or tar.gz files
    date_list = []
    if landsat_ws is not None and os.path.isdir(landsat_ws):
        logging.info('\nReading dates from Landsat IDs')
        logging.info('  {}'.format(landsat_ws))
        landsat_re = re.compile(
            '^(?:LT04|LT05|LE07|LC08)_(?:\d{3})(?:\d{3})_' +
            '(?P<year>\d{4})(?P<month>\d{2})(?P<day>\d{2})')
        for root, dirs, files in os.walk(landsat_ws, topdown=True):
            # If root matches, don't explore subfolders
            try:
                landsat_match = landsat_re.match(os.path.basename(root))
                date_list.append(
                    dt.datetime.strptime('_'.join(landsat_match.groups()),
                                         '%Y_%m_%d').date().isoformat())
                dirs[:] = []
            except:
                pass

            for file in files:
                try:
                    landsat_match = landsat_re.match(file)
                    date_list.append(
                        dt.datetime.strptime('_'.join(landsat_match.groups()),
                                             '%Y_%m_%d').date().isoformat())
                except:
                    pass
        date_list = sorted(list(set(date_list)))
    # elif landsat_ws is not None and os.path.isfile(landsat_ws):
    #     with open(landsat_ws) as landsat_f:

    # This allows GDAL to throw Python Exceptions
    # gdal.UseExceptions()
    # mem_driver = gdal.GetDriverByName('MEM')

    # Get the NLDAS spatial reference from the mask raster
    nldas_ds = gdal.Open(mask_path)
    nldas_osr = gdc.raster_ds_osr(nldas_ds)
    nldas_proj = gdc.osr_proj(nldas_osr)
    nldas_cs = gdc.raster_ds_cellsize(nldas_ds, x_only=True)
    nldas_extent = gdc.raster_ds_extent(nldas_ds)
    nldas_geo = nldas_extent.geo(nldas_cs)
    nldas_x, nldas_y = nldas_extent.origin()
    nldas_ds = None
    logging.debug('  Projection: {}'.format(nldas_proj))
    logging.debug('  Cellsize: {}'.format(nldas_cs))
    logging.debug('  Geo: {}'.format(nldas_geo))
    logging.debug('  Extent: {}'.format(nldas_extent))

    # Subset data to a smaller extent
    if output_extent is not None:
        logging.info('\nComputing subset extent & geo')
        logging.debug('  Extent: {}'.format(output_extent))
        nldas_extent = gdc.Extent(output_extent)
        nldas_extent.adjust_to_snap('EXPAND', nldas_x, nldas_y, nldas_cs)
        nldas_geo = nldas_extent.geo(nldas_cs)
        logging.debug('  Geo: {}'.format(nldas_geo))
        logging.debug('  Extent: {}'.format(output_extent))
    elif extent_path is not None:
        logging.info('\nComputing subset extent & geo')
        if extent_path.lower().endswith('.shp'):
            nldas_extent = gdc.feature_path_extent(extent_path)
            extent_osr = gdc.feature_path_osr(extent_path)
            extent_cs = None
        else:
            nldas_extent = gdc.raster_path_extent(extent_path)
            extent_osr = gdc.raster_path_osr(extent_path)
            extent_cs = gdc.raster_path_cellsize(extent_path, x_only=True)
        nldas_extent = gdc.project_extent(nldas_extent, extent_osr, nldas_osr,
                                          extent_cs)
        nldas_extent.adjust_to_snap('EXPAND', nldas_x, nldas_y, nldas_cs)
        nldas_geo = nldas_extent.geo(nldas_cs)
        logging.debug('  Geo: {}'.format(nldas_geo))
        logging.debug('  Extent: {}'.format(nldas_extent))
    logging.debug('')

    # Read the NLDAS mask array if present
    if mask_path and os.path.isfile(mask_path):
        mask_array, mask_nodata = gdc.raster_to_array(mask_path,
                                                      mask_extent=nldas_extent,
                                                      fill_value=0,
                                                      return_nodata=True)
        mask_array = mask_array != mask_nodata
    else:
        mask_array = None

    # Read ancillary arrays (or subsets?)
    elev_array = gdc.raster_to_array(elev_path,
                                     mask_extent=nldas_extent,
                                     return_nodata=False)
    # pair_array = et_common.air_pressure_func(elev_array)
    lat_array = gdc.raster_to_array(lat_path,
                                    mask_extent=nldas_extent,
                                    return_nodata=False)
    lon_array = gdc.raster_to_array(lon_path,
                                    mask_extent=nldas_extent,
                                    return_nodata=False)

    # Hourly RefET functions expects lat/lon in radians
    lat_array *= (math.pi / 180)
    lon_array *= (math.pi / 180)

    # Build output folder
    etr_ws = os.path.join(output_ws, etr_folder)
    eto_ws = os.path.join(output_ws, eto_folder)
    if etr_flag and not os.path.isdir(etr_ws):
        os.makedirs(etr_ws)
    if eto_flag and not os.path.isdir(eto_ws):
        os.makedirs(eto_ws)

    # DEADBEEF - Instead of processing all available files, the following
    #   code will process files for target dates
    # for input_dt in date_range(start_dt, end_dt + dt.timedelta(1)):
    #     logging.info(input_dt.date())

    # Iterate all available files and check dates if necessary
    # Each sub folder in the main folder has all imagery for 1 day
    #   (in UTC time)
    # The path for each subfolder is the /YYYY/DOY
    errors = defaultdict(list)
    for root, folders, files in os.walk(grb_ws):
        root_split = os.path.normpath(root).split(os.sep)

        # If the year/doy is outside the range, skip
        if (re.match('\d{4}', root_split[-2])
                and re.match('\d{3}', root_split[-1])):
            root_dt = dt.datetime.strptime(
                '{}_{}'.format(root_split[-2], root_split[-1]), '%Y_%j')
            logging.info('{}'.format(root_dt.date()))
            if ((start_dt is not None and root_dt < start_dt)
                    or (end_dt is not None and root_dt > end_dt)):
                continue
            elif date_list and root_dt.date().isoformat() not in date_list:
                continue
        # If the year is outside the range, don't search subfolders
        elif re.match('\d{4}', root_split[-1]):
            root_year = int(root_split[-1])
            logging.info('Year: {}'.format(root_year))
            if ((start_dt is not None and root_year < start_dt.year)
                    or (end_dt is not None and root_year > end_dt.year)):
                folders[:] = []
            else:
                folders[:] = sorted(folders)
            continue
        else:
            continue
        logging.debug('  {}'.format(root))

        # Start off assuming every file needs to be processed
        day_skip_flag = False

        # Build output folders if necessary
        etr_year_ws = os.path.join(etr_ws, str(root_dt.year))
        eto_year_ws = os.path.join(eto_ws, str(root_dt.year))
        if etr_flag and not os.path.isdir(etr_year_ws):
            os.makedirs(etr_year_ws)
        if eto_flag and not os.path.isdir(eto_year_ws):
            os.makedirs(eto_year_ws)

        # Build daily total paths
        etr_day_path = os.path.join(
            etr_year_ws,
            day_fmt.format('etr', root_dt.year, root_dt.month, root_dt.day))
        eto_day_path = os.path.join(
            eto_year_ws,
            day_fmt.format('eto', root_dt.year, root_dt.month, root_dt.day))
        etr_hour_path = os.path.join(
            etr_year_ws,
            hour_fmt.format('etr', root_dt.year, root_dt.month, root_dt.day))
        eto_hour_path = os.path.join(
            eto_year_ws,
            hour_fmt.format('eto', root_dt.year, root_dt.month, root_dt.day))
        # logging.debug('  {}'.format(etr_hour_path))

        # If daily ETr/ETo files are present, day can be skipped
        if not overwrite_flag and daily_flag:
            if etr_flag and not os.path.isfile(etr_day_path):
                pass
            elif eto_flag and not os.path.isfile(eto_day_path):
                pass
            else:
                day_skip_flag = True

        # If the hour and daily files don't need to be made, skip the day
        if not overwrite_flag:
            if etr_flag and not os.path.isfile(etr_hour_path):
                pass
            elif eto_flag and not os.path.isfile(eto_hour_path):
                pass
            elif day_skip_flag:
                logging.debug('  File(s) already exist, skipping')
                continue

        # Create a single raster for each day with 24 bands
        # Each time step will be stored in a separate band
        if etr_flag:
            logging.debug('  {}'.format(etr_day_path))
            gdc.build_empty_raster(etr_hour_path,
                                   band_cnt=24,
                                   output_dtype=np.float32,
                                   output_proj=nldas_proj,
                                   output_cs=nldas_cs,
                                   output_extent=nldas_extent,
                                   output_fill_flag=True)
        if eto_flag:
            logging.debug('  {}'.format(eto_day_path))
            gdc.build_empty_raster(eto_hour_path,
                                   band_cnt=24,
                                   output_dtype=np.float32,
                                   output_proj=nldas_proj,
                                   output_cs=nldas_cs,
                                   output_extent=nldas_extent,
                                   output_fill_flag=True)

        # Sum all ETr/ETo images in each folder to generate a UTC day total
        etr_day_array = 0
        eto_day_array = 0

        # Process each hour file
        for input_name in sorted(files):
            logging.info('  {}'.format(input_name))
            input_match = input_re.match(input_name)
            if input_match is None:
                logging.debug('    Regular expression didn\'t match, skipping')
                continue
            input_dt = dt.datetime(int(input_match.group('YEAR')),
                                   int(input_match.group('MONTH')),
                                   int(input_match.group('DAY')))
            input_doy = int(input_dt.strftime('%j'))
            time_str = input_match.group('TIME')
            band_num = int(time_str[:2]) + 1
            # if start_dt is not None and input_dt < start_dt:
            #     continue
            # elif end_dt is not None and input_dt > end_dt:
            #     continue
            # elif date_list and input_dt.date().isoformat() not in date_list:
            #     continue
            if not daily_flag and time_str not in time_list:
                logging.debug('    Time not in list and not daily, skipping')
                continue

            input_path = os.path.join(root, input_name)
            logging.debug('    Time: {} {}'.format(input_dt.date(), time_str))
            logging.debug('    Band: {}'.format(band_num))

            # Determine band numbering/naming
            try:
                input_band_dict = grib_band_names(input_path)
            except RuntimeError as e:
                errors[input_path].append(e)
                logging.error(' RuntimeError: {} Skipping: {}'.format(
                    e, input_path))
                continue

            # Read input bands
            input_ds = gdal.Open(input_path)

            # Temperature should be in C for et_common.refet_hourly_func()
            if 'Temperature [K]' in input_band_dict.keys():
                temp_band_units = 'K'
                temp_array = gdc.raster_ds_to_array(
                    input_ds,
                    band=input_band_dict['Temperature [K]'],
                    mask_extent=nldas_extent,
                    return_nodata=False)
            elif 'Temperature [C]' in input_band_dict.keys():
                temp_band_units = 'C'
                temp_array = gdc.raster_ds_to_array(
                    input_ds,
                    band=input_band_dict['Temperature [C]'],
                    mask_extent=nldas_extent,
                    return_nodata=False)
            else:
                logging.error('Unknown Temperature units, skipping')
                logging.error('  {}'.format(input_band_dict.keys()))
                continue

            # DEADBEEF - Having issue with T appearing to be C but labeled as K
            # Try to determine temperature units from values
            temp_mean = float(np.nanmean(temp_array))
            temp_units_dict = {20: 'C', 293: 'K'}
            temp_array_units = temp_units_dict[min(
                temp_units_dict, key=lambda x: abs(x - temp_mean))]
            if temp_array_units == 'K' and temp_band_units == 'K':
                logging.debug('  Converting temperature from K to C')
                temp_array -= 273.15
            elif temp_array_units == 'C' and temp_band_units == 'C':
                pass
            elif temp_array_units == 'C' and temp_band_units == 'K':
                logging.debug((
                    '  Temperature units are K in the GRB band name, ' +
                    'but values appear to be C\n    Mean temperature: {:.2f}\n'
                    + '  Values will NOT be adjusted').format(temp_mean))
            elif temp_array_units == 'K' and temp_band_units == 'C':
                logging.debug((
                    '  Temperature units are C in the GRB band name, ' +
                    'but values appear to be K\n    Mean temperature: {:.2f}\n'
                    +
                    '  Values will be adjusted from K to C').format(temp_mean))
                temp_array -= 273.15
            try:
                sph_array = gdc.raster_ds_to_array(
                    input_ds,
                    band=input_band_dict['Specific humidity [kg/kg]'],
                    mask_extent=nldas_extent,
                    return_nodata=False)
                rs_array = gdc.raster_ds_to_array(
                    input_ds,
                    band=input_band_dict[
                        'Downward shortwave radiation flux [W/m^2]'],
                    mask_extent=nldas_extent,
                    return_nodata=False)
                wind_u_array = gdc.raster_ds_to_array(
                    input_ds,
                    band=input_band_dict['u-component of wind [m/s]'],
                    mask_extent=nldas_extent,
                    return_nodata=False)
                wind_v_array = gdc.raster_ds_to_array(
                    input_ds,
                    band=input_band_dict['v-component of wind [m/s]'],
                    mask_extent=nldas_extent,
                    return_nodata=False)
                input_ds = None
            except KeyError as e:
                errors[input_path].append(e)
                logging.error(' KeyError: {} Skipping: {}'.format(
                    e, input_ds.GetDescription()))
                continue

            rs_array *= 0.0036  # W m-2 to MJ m-2 hr-1
            wind_array = np.sqrt(wind_u_array**2 + wind_v_array**2)
            del wind_u_array, wind_v_array

            # ETr
            if etr_flag:
                etr_array = et_common.refet_hourly_func(temp_array,
                                                        sph_array,
                                                        rs_array,
                                                        wind_array,
                                                        zw=10,
                                                        elev=elev_array,
                                                        lat=lat_array,
                                                        lon=lon_array,
                                                        doy=input_doy,
                                                        time=int(time_str) /
                                                        100,
                                                        ref_type='ETR')
                if daily_flag:
                    etr_day_array += etr_array
                if time_str in time_list:
                    gdc.array_to_comp_raster(etr_array.astype(np.float32),
                                             etr_hour_path,
                                             band=band_num,
                                             stats_flag=False)
                    del etr_array

            # ETo
            if eto_flag:
                eto_array = et_common.refet_hourly_func(temp_array,
                                                        sph_array,
                                                        rs_array,
                                                        wind_array,
                                                        zw=10,
                                                        elev=elev_array,
                                                        lat=lat_array,
                                                        lon=lon_array,
                                                        doy=input_doy,
                                                        time=int(time_str) /
                                                        100,
                                                        ref_type='ETO')
                if eto_flag and daily_flag:
                    eto_day_array += eto_array
                if eto_flag and time_str in time_list:
                    gdc.array_to_comp_raster(eto_array.astype(np.float32),
                                             eto_hour_path,
                                             band=band_num,
                                             stats_flag=False)
                    del eto_array

            del temp_array, sph_array, rs_array, wind_array

        if stats_flag and etr_flag:
            gdc.raster_statistics(etr_hour_path)
        if stats_flag and eto_flag:
            gdc.raster_statistics(eto_hour_path)

        # Save the projected ETr/ETo as 32-bit floats
        if not day_skip_flag and daily_flag:
            if etr_flag:
                try:
                    gdc.array_to_raster(etr_day_array.astype(np.float32),
                                        etr_day_path,
                                        output_geo=nldas_geo,
                                        output_proj=nldas_proj,
                                        stats_flag=stats_flag)
                except AttributeError:
                    pass
            if eto_flag:
                try:

                    gdc.array_to_raster(eto_day_array.astype(np.float32),
                                        eto_day_path,
                                        output_geo=nldas_geo,
                                        output_proj=nldas_proj,
                                        stats_flag=stats_flag)
                except AttributeError:
                    pass

        del etr_day_array, eto_day_array

    if len(errors) > 0:
        logging.info('\nThe following errors were encountered:')
        for key, value in errors.items():
            logging.error(' Filepath: {}, error: {}'.format(key, value))

    logging.debug('\nScript Complete')
Example #6
0
def main(grb_ws=os.getcwd(),
         ancillary_ws=os.getcwd(),
         output_ws=os.getcwd(),
         landsat_ws=None,
         start_date=None,
         end_date=None,
         times_str='',
         extent_path=None,
         output_extent=None,
         stats_flag=True,
         overwrite_flag=False):
    """Extract hourly NLDAS vapour pressure rasters

    Args:
        grb_ws (str): folder of NLDAS GRB files
        ancillary_ws (str): folder of ancillary rasters
        output_ws (str): folder of output rasters
        landsat_ws (str): folder of Landsat scenes or tar.gz files
        start_date (str): ISO format date (YYYY-MM-DD)
        end_date (str): ISO format date (YYYY-MM-DD)
        times (str): comma separated values and/or ranges of UTC hours
            (i.e. "1, 2, 5-8")
            Parsed with python_common.parse_int_set()
        extent_path (str): file path defining the output extent
        output_extent (list): decimal degrees values defining output extent
        stats_flag (bool): if True, compute raster statistics.
            Default is True.
        overwrite_flag (bool): if True, overwrite existing files

    Returns:
        None
    """
    logging.info('\nExtracting NLDAS vapour pressure rasters')

    # input_fmt = 'NLDAS_FORA0125_H.A{:04d}{:02d}{:02d}.{}.002.grb'
    input_re = re.compile('NLDAS_FORA0125_H.A(?P<YEAR>\d{4})(?P<MONTH>\d{2})' +
                          '(?P<DAY>\d{2}).(?P<TIME>\d{4}).002.grb$')

    output_folder = 'ea'
    output_fmt = 'ea_{:04d}{:02d}{:02d}_hourly_nldas.img'
    # output_fmt = 'ea_{:04d}{:02d}{:02d}_{:04d}_nldas.img'

    # If a date is not set, process 2017
    try:
        start_dt = dt.datetime.strptime(start_date, '%Y-%m-%d')
        logging.debug('  Start date: {}'.format(start_dt))
    except:
        start_dt = dt.datetime(2017, 1, 1)
        logging.info('  Start date: {}'.format(start_dt))
    try:
        end_dt = dt.datetime.strptime(end_date, '%Y-%m-%d')
        logging.debug('  End date:   {}'.format(end_dt))
    except:
        end_dt = dt.datetime(2017, 12, 31)
        logging.info('  End date:   {}'.format(end_dt))

    # Only process a specific hours
    if not times_str:
        time_list = range(0, 24, 1)
    else:
        time_list = list(parse_int_set(times_str))
    time_list = ['{:02d}00'.format(t) for t in time_list]

    # Assume NLDAS is NAD83
    # input_epsg = 'EPSG:4269'

    # Ancillary raster paths
    mask_path = os.path.join(ancillary_ws, 'nldas_mask.img')
    elev_path = os.path.join(ancillary_ws, 'nldas_elev.img')

    # Build a date list from landsat_ws scene folders or tar.gz files
    date_list = []
    if landsat_ws is not None and os.path.isdir(landsat_ws):
        logging.info('\nReading dates from Landsat IDs')
        logging.info('  {}'.format(landsat_ws))
        landsat_re = re.compile(
            '^(?:LT04|LT05|LE07|LC08)_(?:\d{3})(?:\d{3})_' +
            '(?P<year>\d{4})(?P<month>\d{2})(?P<day>\d{2})')
        for root, dirs, files in os.walk(landsat_ws, topdown=True):
            # If root matches, don't explore subfolders
            try:
                landsat_match = landsat_re.match(os.path.basename(root))
                date_list.append(
                    dt.datetime.strptime('_'.join(landsat_match.groups()),
                                         '%Y_%m_%d').date().isoformat())
                dirs[:] = []
            except:
                pass

            for file in files:
                try:
                    landsat_match = landsat_re.match(file)
                    date_list.append(
                        dt.datetime.strptime('_'.join(landsat_match.groups()),
                                             '%Y_%m_%d').date().isoformat())
                except:
                    pass
        date_list = sorted(list(set(date_list)))
    # elif landsat_ws is not None and os.path.isfile(landsat_ws):
    #     with open(landsat_ws) as landsat_f:

    # This allows GDAL to throw Python Exceptions
    # gdal.UseExceptions()
    # mem_driver = gdal.GetDriverByName('MEM')

    # Get the NLDAS spatial reference from the mask raster
    nldas_ds = gdal.Open(mask_path)
    nldas_osr = gdc.raster_ds_osr(nldas_ds)
    nldas_proj = gdc.osr_proj(nldas_osr)
    nldas_cs = gdc.raster_ds_cellsize(nldas_ds, x_only=True)
    nldas_extent = gdc.raster_ds_extent(nldas_ds)
    nldas_geo = nldas_extent.geo(nldas_cs)
    nldas_x, nldas_y = nldas_extent.origin()
    nldas_ds = None
    logging.debug('  Projection: {}'.format(nldas_proj))
    logging.debug('  Cellsize: {}'.format(nldas_cs))
    logging.debug('  Geo: {}'.format(nldas_geo))
    logging.debug('  Extent: {}'.format(nldas_extent))

    # Subset data to a smaller extent
    if output_extent is not None:
        logging.info('\nComputing subset extent & geo')
        logging.debug('  Extent: {}'.format(output_extent))
        nldas_extent = gdc.Extent(output_extent)
        nldas_extent.adjust_to_snap('EXPAND', nldas_x, nldas_y, nldas_cs)
        nldas_geo = nldas_extent.geo(nldas_cs)
        logging.debug('  Geo: {}'.format(nldas_geo))
        logging.debug('  Extent: {}'.format(output_extent))
    elif extent_path is not None:
        logging.info('\nComputing subset extent & geo')
        if extent_path.lower().endswith('.shp'):
            nldas_extent = gdc.feature_path_extent(extent_path)
            extent_osr = gdc.feature_path_osr(extent_path)
            extent_cs = None
        else:
            nldas_extent = gdc.raster_path_extent(extent_path)
            extent_osr = gdc.raster_path_osr(extent_path)
            extent_cs = gdc.raster_path_cellsize(extent_path, x_only=True)
        nldas_extent = gdc.project_extent(nldas_extent, extent_osr, nldas_osr,
                                          extent_cs)
        nldas_extent.adjust_to_snap('EXPAND', nldas_x, nldas_y, nldas_cs)
        nldas_geo = nldas_extent.geo(nldas_cs)
        logging.debug('  Geo: {}'.format(nldas_geo))
        logging.debug('  Extent: {}'.format(nldas_extent))
    logging.debug('')

    # Read the NLDAS mask array if present
    if mask_path and os.path.isfile(mask_path):
        mask_array, mask_nodata = gdc.raster_to_array(mask_path,
                                                      mask_extent=nldas_extent,
                                                      fill_value=0,
                                                      return_nodata=True)
        mask_array = mask_array != mask_nodata
    else:
        mask_array = None

    # Read elevation arrays (or subsets?)
    elev_array = gdc.raster_to_array(elev_path,
                                     mask_extent=nldas_extent,
                                     return_nodata=False)
    pair_array = et_common.air_pressure_func(elev_array)

    # Build output folder
    var_ws = os.path.join(output_ws, output_folder)
    if not os.path.isdir(var_ws):
        os.makedirs(var_ws)

    # Each sub folder in the main folder has all imagery for 1 day
    # The path for each subfolder is the /YYYY/DOY

    # This approach will process files for target dates
    # for input_dt in date_range(start_dt, end_dt + dt.timedelta(1)):
    #     logging.info(input_dt.date())

    # Iterate all available files and check dates if necessary
    for root, folders, files in os.walk(grb_ws):
        root_split = os.path.normpath(root).split(os.sep)

        # If the year/doy is outside the range, skip
        if (re.match('\d{4}', root_split[-2])
                and re.match('\d{3}', root_split[-1])):
            root_dt = dt.datetime.strptime(
                '{}_{}'.format(root_split[-2], root_split[-1]), '%Y_%j')
            logging.info('{}'.format(root_dt.date()))
            if ((start_dt is not None and root_dt < start_dt)
                    or (end_dt is not None and root_dt > end_dt)):
                continue
            elif date_list and root_dt.date().isoformat() not in date_list:
                continue
        # If the year is outside the range, don't search subfolders
        elif re.match('\d{4}', root_split[-1]):
            root_year = int(root_split[-1])
            logging.info('Year: {}'.format(root_year))
            if ((start_dt is not None and root_year < start_dt.year)
                    or (end_dt is not None and root_year > end_dt.year)):
                folders[:] = []
            else:
                folders[:] = sorted(folders)
            continue
        else:
            continue

        # Create a single raster for each day with 24 bands
        # Each time step will be stored in a separate band
        output_name = output_fmt.format(root_dt.year, root_dt.month,
                                        root_dt.day)
        output_path = os.path.join(var_ws, str(root_dt.year), output_name)
        logging.debug('  {}'.format(output_path))
        if os.path.isfile(output_path):
            if not overwrite_flag:
                logging.debug('    File already exists, skipping')
                continue
            else:
                logging.debug('    File already exists, removing existing')
                os.remove(output_path)
        logging.debug('  {}'.format(root))
        if not os.path.isdir(os.path.dirname(output_path)):
            os.makedirs(os.path.dirname(output_path))
        gdc.build_empty_raster(output_path,
                               band_cnt=24,
                               output_dtype=np.float32,
                               output_proj=nldas_proj,
                               output_cs=nldas_cs,
                               output_extent=nldas_extent,
                               output_fill_flag=True)

        # Iterate through hourly files
        for input_name in sorted(files):
            logging.info('  {}'.format(input_name))
            input_path = os.path.join(root, input_name)
            input_match = input_re.match(input_name)
            if input_match is None:
                logging.debug('  Regular expression didn\'t match, skipping')
                continue
            input_dt = dt.datetime(int(input_match.group('YEAR')),
                                   int(input_match.group('MONTH')),
                                   int(input_match.group('DAY')))
            time_str = input_match.group('TIME')
            band_num = int(time_str[:2]) + 1
            # if start_dt is not None and input_dt < start_dt:
            #     continue
            # elif end_dt is not None and input_dt > end_dt:
            #     continue
            # elif date_list and input_dt.date().isoformat() not in date_list:
            #     continue
            if time_str not in time_list:
                logging.debug('    Time not in list, skipping')
                continue
            logging.debug('    Time: {} {}'.format(input_dt.date(), time_str))
            logging.debug('    Band: {}'.format(band_num))

            # Determine band numbering/naming
            input_band_dict = grib_band_names(input_path)

            # Compute vapour pressure from specific humidity
            input_ds = gdal.Open(input_path)
            sph_array = gdc.raster_ds_to_array(
                input_ds,
                band=input_band_dict['Specific humidity [kg/kg]'],
                mask_extent=nldas_extent,
                return_nodata=False)
            ea_array = (sph_array * pair_array) / (0.622 + 0.378 * sph_array)

            # Save the projected array as 32-bit floats
            gdc.array_to_comp_raster(ea_array.astype(np.float32),
                                     output_path,
                                     band=band_num)
            # gdc.block_to_raster(
            #     ea_array.astype(np.float32), output_path, band=band)
            # gdc.array_to_raster(
            #     ea_array.astype(np.float32), output_path,
            #     output_geo=nldas_geo, output_proj=nldas_proj,
            #     stats_flag=stats_flag)

            del sph_array
            input_ds = None

        if stats_flag:
            gdc.raster_statistics(output_path)

    logging.debug('\nScript Complete')
Example #7
0
def main(ini_path,
         rasters_flag=None,
         tables_flag=None,
         mc_iter_str='',
         tile_list=None,
         pyramids_flag=True,
         stats_flag=True,
         overwrite_flag=False,
         mp_procs=1,
         delay=0,
         debug_flag=False,
         no_file_logging=False):
    """Run interpolater for all Landsat scenes

    Parameters
    ----------
    ini_path : str
        File path of the input parameters file.
    rasters_flag : bool, optional
        If True, override INI and interpolate rasters.
    tables_flag : bool, optional
        If True, override INI and interpolate zone tables.
    mc_iter_str : str, optional
        MonteCarlo iteration list and/or range.
    tile_list : list, optional
        Landsat path/rows to process (i.e. [p045r043, p045r033]).
        This will override the tile list in the INI file.
    pyramids_flag : bool, optional
        If True, compute raster pyramids (the default is True).
    stats_flag : bool, optional
        If True, compute raster statistics (the default is True).
    overwrite_flag : bool, optional
        If True, overwrite existing files (the default is False).
    mp_procs : int, optional
        Number of cpu cores to use (the default is 1).
    delay : float, optional
        Max random delay starting function in seconds (the default is 0).
    debug_flag : bool, optional
        If True, enable debug level logging (the default is False).
    no_file_logging : bool, optional
        If True, don't write logging to file (the default is False).

    Returns
    -------
    None

    """
    logging.info('\nRunning Interpolator')

    # Open config file
    config = open_ini(ini_path)

    # Get input parameters
    logging.debug('  Reading Input File')
    year = config.getint('INPUTS', 'year')
    if tile_list is None:
        tile_list = read_param('tile_list', [], config, 'INPUTS')
    project_ws = config.get('INPUTS', 'project_folder')
    logging.debug('  Year: {}'.format(year))
    logging.debug('  Path/rows: {}'.format(', '.join(tile_list)))
    logging.debug('  Project: {}'.format(project_ws))

    interpolate_folder = config.get('INPUTS', 'interpolate_folder')
    logging.debug('  Folder: {}'.format(interpolate_folder))

    # If both flags were not set, read from INI
    if rasters_flag is None and tables_flag is None:
        logging.info('  Reading interpolator flags from INI file')
        if rasters_flag is None:
            rasters_flag = read_param('interpolate_rasters_flag', True, config,
                                      'INPUTS')
        if tables_flag is None:
            tables_flag = read_param('interpolate_tables_flag', True, config,
                                     'INPUTS')
    # If both flags were set false, for now, exit the script
    # It may make more sense to assumethe user wants to interpolate something
    elif rasters_flag is False and tables_flag is False:
        logging.error('Raster and table interpolator flags are both False\n')
        logging.error('  Exiting the script')
        return False
        # sys.exit()
        # logging.info('Raster and table interpolator flags are both False\n')
        # logging.info('    Defaulting to rasters_flag=True')
        # rasters_flag = True

    if rasters_flag:
        rasters_func_path = config.get('INPUTS', 'interpolate_rasters_func')
    if tables_flag:
        tables_func_path = config.get('INPUTS', 'interpolate_tables_func')

    # For now, get mc_iter list from command line, not from project file
    # mc_iter_list = config.get('INPUTS', 'mc_iter_list')
    mc_iter_list = list(parse_int_set(mc_iter_str))

    # Need soemthing in mc_iter_list to iterate over
    if not mc_iter_list:
        mc_iter_list = [None]

    # For now build INI file name from template INI names
    ini_name = os.path.basename(config.get('INPUTS', 'interpolate_ini'))
    ini_name = os.path.splitext(os.path.basename(ini_name))[0]

    # INI file is built as a function of year
    ini_fmt = '{}_{}_{}.ini'

    # Regular expressions
    # For now assume path/row are two digit numbers
    # tile_re = re.compile('p(\d{3})r(\d{3})', re.IGNORECASE)
    # image_re = re.compile(
    #     '^(LT04|LT05|LE07|LC08)_(\d{3})(\d{3})_(\d{4})(\d{2})(\d{2})')

    # Check inputs folders/paths
    if not os.path.isdir(project_ws):
        logging.error('\n Folder {} does not exist'.format(project_ws))
        sys.exit()

    # Check that there is an input file for the year and folder
    year_ws = os.path.join(project_ws, str(year))
    ini_path = os.path.join(
        year_ws, ini_fmt.format(ini_name, str(year),
                                interpolate_folder.lower()))
    if not os.path.join(ini_path):
        logging.warning('    Input file does not exist\n  {}'.format(ini_path))
        return False

    # Run Interpolater for each Monte Carlo iteration
    # mp_list = []
    for mc_iter in sorted(mc_iter_list):
        logging.debug('  Year: {} Iteration: {}'.format(str(year), mc_iter))
        rasters_args = []
        tables_args = []
        if rasters_flag:
            rasters_args = [
                'python', rasters_func_path, year_ws, '-i', ini_path
            ]
        if tables_flag:
            tables_args = ['python', tables_func_path, year_ws, '-i', ini_path]
        if mc_iter is not None:
            rasters_args.extend(['-mc', str(mc_iter)])
            tables_args.extend(['-mc', str(mc_iter)])
        if pyramids_flag:
            rasters_args.append('--pyramids')
        if stats_flag:
            rasters_args.append('--stats')
        if overwrite_flag:
            rasters_args.append('--overwrite')
            tables_args.append('--overwrite')
        if debug_flag:
            rasters_args.append('--debug')
            tables_args.append('--debug')
        if delay > 0:
            rasters_args.extend(['--delay', str(delay)])
            tables_args.extend(['--delay', str(delay)])
        if no_file_logging:
            rasters_args.append('--no_file_logging')
            tables_args.append('--no_file_logging')
        if mp_procs > 1:
            rasters_args.extend(['-mp', str(mp_procs)])
            tables_args.extend(['-mp', str(mp_procs)])
        if rasters_flag:
            subprocess.call(rasters_args, cwd=year_ws)
        if tables_flag:
            subprocess.call(tables_args, cwd=year_ws)

    logging.debug('\nScript complete')
Example #8
0
def main(csv_path,
         output_folder,
         fid_list='',
         bin_min=0,
         bin_max=5,
         bin_size=0.25,
         start_dt=None,
         end_dt=None,
         plots='all'):
    """Create Summary Histogram Plots from pymetric zonal csv output files
    Args:
        csv_path (str): zonal stats file path
        output_folder (str): Folder path where files will be saved
                            default(...pymetric/summary_histograms)
        fid_list (list): list or range of FIDs to skip
        bin_min (int): Histogram Minimum (default: 0)
        bin_max (int): Histogram Max (default: 5)
        bin_size (int): Histogram bin size (default: 0.25)
        start_dt : datetime (start date; optional)
        end_dt : datetime (end date; optional)
        plots (str): Output plot options: all, acreage, or field (default: all)
    Returns:
        None
    """
    logging.info('\nReading input csv file: {}'.format(csv_path))

    # Check if csv file exist
    if not csv_path:
        logging.error('ERROR: csv file does not exist')
        sys.exit()
    # Attempt to read csv_file
    try:
        input_df = pd.read_csv(csv_path, sep=',')
    except:
        logging.error('Error reading file. Check csv path.')
        sys.exit()

    # Create Output Folder if it doesn't exist
    if not os.path.isdir(output_folder):
        os.makedirs(output_folder)

    # Filter FIDs based on fid_list (default [])
    fid_skiplist = []
    if fid_list:
        fid_skiplist = sorted(list(dripy.parse_int_set(fid_list)))
        logging.info('Skipping FIDs: {}'.format(fid_skiplist))
        input_df = input_df[~input_df['FID'].isin(fid_skiplist)]

    if (start_dt and not end_dt) or (end_dt and not start_dt):
        logging.error('\nPlease Specify Both Start and End Date:'
                      '\nStart Date: {}'
                      '\nEnd Date: {}'.format(start_dt, end_dt))
        sys.exit()

    if (start_dt and end_dt) and (end_dt < start_dt):
        logging.error('End date cannot be before start date.' ' Exiting.')
        sys.exit()

    # Filter dataset if start and end dates are specified
    if start_dt and end_dt:
        if 'DATE' in input_df.columns:
            input_df['DATE'] = pd.to_datetime(input_df['DATE'])
            logging.info('\nFiltering By Date. Start: {:%Y-%m-%d}, '
                         'End: {:%Y-%m-%d}'.format(start_dt, end_dt))
            input_df = input_df[(input_df['DATE'] >= start_dt)
                                & (input_df['DATE'] <= end_dt)]
            if input_df.empty:
                logging.error('Date Filter Removed All Data. Exiting.')
                sys.exit()
        else:
            logging.error('Cannot Apply Custom Date Range On Monthly OR Annual'
                          ' Datasets. \nUse Daily Output. Exiting.')
            sys.exit()

    # Unit Conversions
    pix2acre = 0.222395  # 30x30m pixel to acres; From Google
    mm2ft = 0.00328084  # From Google

    # Add Acres
    input_df['Area_acres'] = input_df.PIXELS * pix2acre
    # Add FT Fields
    input_df['ET_FT'] = input_df.ET_MM * mm2ft
    input_df['ETR_FT'] = input_df.ETR_MM * mm2ft
    # Daily Volume Field
    input_df['Volume_acft'] = input_df.Area_acres * input_df.ET_FT
    # Net ET Field
    input_df['NetET_mm'] = input_df.ET_MM - input_df.PPT_MM
    input_df['NetET_FT'] = input_df.NetET_mm * mm2ft
    input_df['NetVolume_acft'] = input_df.Area_acres * input_df['NetET_FT']

    # Growing Season Start/End Months (inclusive)
    start_month = 4
    end_month = 10

    # Create Growing Season Only Dataframe
    if 'MONTH' in input_df.columns:
        gs_df = input_df[(input_df['MONTH'] >= start_month)
                         & (input_df['MONTH'] <= end_month)]

    # Dictionary to control agg of each variable
    a = {
        'FID': 'mean',
        'YEAR': 'mean',
        'PIXELS': 'mean',
        'NDVI': 'mean',
        'ETRF': 'mean',
        'ETR_MM': 'sum',
        'ET_MM': 'sum',
        'PPT_MM': 'sum',
        'Area_acres': 'mean',
        'ET_FT': 'sum',
        'ETR_FT': 'sum',
        'NetET_FT': 'sum',
        'Volume_acft': 'sum',
        'NetVolume_acft': 'sum'
    }

    # GS Grouped Dataframe (only for daily and monthly csv)
    if 'MONTH' in input_df.columns:
        gs_grp_df = gs_df.groupby('FID', as_index=True).agg(a)

    # Annual Grouped Dataframe
    ann_grp_df = input_df.groupby('FID', as_index=True).agg(a)

    # Field Count Histogram Function
    def field_count_hist(grp_df, rate_var, vol_var, title, xlab, filedesc):
        # Annotation Box Stats
        y = grp_df['YEAR'].mean()
        total_area = grp_df['Area_acres'].sum()
        total_vol = grp_df[vol_var].sum()
        m = total_vol / total_area.round(1)

        # Bins
        et_bins = np.linspace(bin_min, bin_max,
                              ((bin_max - bin_min) / bin_size) + 1)

        # Make Figure
        font_size = 12
        ann_font_size = 10
        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.hist(grp_df[rate_var], bins=et_bins, align='mid', edgecolor='black')
        ax.set_title(title, size=font_size)
        ax.set_xlabel(xlab, size=font_size)
        ax.set_ylabel('Field Count', size=font_size)
        ax.set_xticks(np.arange(0, bin_max + (2 * bin_size), 2 * bin_size))
        ax.tick_params(axis='x', labelsize=font_size)
        ax.tick_params(axis='y', labelsize=font_size)
        ymin, ymax = plt.ylim()  # return the current ylim
        plt.ylim((ymin, ymax + ymax * 0.3))  # shift ymax for annotation space
        # Add mean vertical line
        ax.axvline(m, color='gray', linestyle='dashed', linewidth=1)

        # Add Annotation Text Box
        antext = ('Year {:.0f}\n' + 'Mean ET = {:.1f} ft\n' +
                  'Total Area = {:.1f} acres\n' +
                  'ET Volume = {:.1f} ac-ft').format(y, m, total_area,
                                                     total_vol)
        at = AnchoredText(antext,
                          prop=dict(size=ann_font_size),
                          frameon=True,
                          loc=2)
        at.patch.set_boxstyle("round,pad=0.,rounding_size=0.2")
        ax.add_artist(at)
        # Save Figure
        file_name = '{:.0f}_{}_Fields'.format(y, filedesc)
        fig.tight_layout(pad=3)
        plt.savefig(os.path.join(output_folder, file_name), dpi=300)
        plt.close(fig)
        fig.clf()
        return True

    # Acreage histogram (Bar Plot)
    def acreage_histogram(grp_df, rate_var, vol_var, title, xlab, filedesc):
        # Annotation Box Stats
        y = grp_df['YEAR'].mean()
        total_area = grp_df['Area_acres'].sum()
        total_vol = grp_df[vol_var].sum()
        m = total_vol / total_area.round(1)

        # Bins
        et_bins = np.linspace(bin_min, bin_max,
                              ((bin_max - bin_min) / bin_size) + 1)

        # Acreage/ET Bins
        et_area_hist, et_bins, binnum = stats.binned_statistic(
            grp_df[rate_var], grp_df.Area_acres, 'sum', et_bins)

        # Make Figure
        font_size = 12
        ann_font_size = 10
        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.bar(et_bins[:-1],
               et_area_hist,
               width=bin_size,
               edgecolor='black',
               align='edge',
               color='r')
        ax.set_title(title, size=font_size)
        ax.set_xlabel(xlab, size=font_size)
        ax.set_ylabel('Acreage', size=font_size)
        ax.set_xticks(np.arange(0, bin_max + (2 * bin_size), 2 * bin_size))
        ax.tick_params(axis='x', labelsize=font_size)
        ax.tick_params(axis='y', labelsize=font_size)
        ymin, ymax = plt.ylim()  # return the current ylim
        plt.ylim((ymin, ymax + ymax * 0.3))  # shift ymax for annotation space
        # Add mean vertical line
        ax.axvline(m, color='gray', linestyle='dashed', linewidth=1)

        # Add Annotation Text Box
        antext = ('Year {:.0f}\n' + 'Mean ET = {:.1f} ft\n' +
                  'Total Area = {:.1f} acres\n' +
                  'ET Volume = {:.1f} ac-ft').format(y, m, total_area,
                                                     total_vol)
        at = AnchoredText(antext,
                          prop=dict(size=ann_font_size),
                          frameon=True,
                          loc=2)
        at.patch.set_boxstyle("round,pad=0.,rounding_size=0.2")
        ax.add_artist(at)
        # Save Figure
        file_name = '{:.0f}_{}_Acreage'.format(y, filedesc)
        fig.tight_layout(pad=3)
        plt.savefig(os.path.join(output_folder, file_name), dpi=300)
        plt.close(fig)
        fig.clf()
        return True

    logging.info('\nCreating Summary Histogram Plots.')
    if plots in ['acreage', 'field']:
        logging.info('Only outputting {} plots.'.format(plots))

    if start_dt and end_dt:
        # custom date range plots
        if plots in ['all', 'acreage']:
            acreage_histogram(
                ann_grp_df, 'ET_FT', 'Volume_acft',
                'Total ET: {:%Y-%m-%d} to {:%Y-%m-%d}'.format(
                    start_dt, end_dt), 'Total ET (Feet)', 'TotalET')
            acreage_histogram(
                ann_grp_df, 'NetET_FT', 'NetVolume_acft',
                'Net ET: {:%Y-%m-%d} to {:%Y-%m-%d}'.format(start_dt, end_dt),
                'Net ET (Feet)', 'NetET')
        if plots in ['all', 'field']:
            field_count_hist(
                ann_grp_df, 'ET_FT', 'Volume_acft',
                'Total ET: {:%Y-%m-%d} to {:%Y-%m-%d}'.format(
                    start_dt, end_dt), 'Total ET (Feet)', 'TotalET')
            field_count_hist(
                ann_grp_df, 'NetET_FT', 'NetVolume_acft',
                'Net ET: {:%Y-%m-%d} to {:%Y-%m-%d}'.format(start_dt, end_dt),
                'Net ET (Feet)', 'NetET')
    else:
        # Default Annual and Growing Season Plots if no start/end date
        # Annual Plots
        if plots in ['all', 'acreage']:
            acreage_histogram(ann_grp_df, 'ET_FT', 'Volume_acft', 'Annual ET',
                              'Total ET (Feet)', 'Ann_TotalET')
            acreage_histogram(ann_grp_df, 'NetET_FT', 'NetVolume_acft',
                              'Annual Net ET', 'Net ET (Feet)', 'Ann_NetET')

        if plots in ['all', 'field']:
            field_count_hist(ann_grp_df, 'ET_FT', 'Volume_acft', 'Annual ET',
                             'Total ET (Feet)', 'Ann_TotalET')
            field_count_hist(ann_grp_df, 'NetET_FT', 'NetVolume_acft',
                             'Annual Net ET', 'Net ET (Feet)', 'Ann_NetET')

        # Growing Season Plots
        if 'MONTH' in input_df.columns:
            if plots in ['all', 'acreage']:
                acreage_histogram(gs_grp_df, 'ET_FT', 'Volume_acft',
                                  'Growing Season ET', 'Total ET (Feet)',
                                  'GS_TotalET')
                acreage_histogram(gs_grp_df, 'NetET_FT', 'NetVolume_acft',
                                  'Growing Season Net ET', 'Net ET (Feet)',
                                  'GS_NetET')
            if plots in ['all', 'field']:
                field_count_hist(gs_grp_df, 'ET_FT', 'Volume_acft',
                                 'Growing Season ET', 'Total ET (Feet)',
                                 'GS_TotalET')
                field_count_hist(gs_grp_df, 'NetET_FT', 'NetVolume_acft',
                                 'Growing Season Net ET', 'Net ET (Feet)',
                                 'GS_NetET')