Beispiel #1
0
def main(start_dt, end_dt, netcdf_ws, ancillary_ws, output_ws,
         variables=['etr', 'pr'], extent_path=None, output_extent=None,
         stats_flag=True, overwrite_flag=False):
    """Extract GRIDMET temperature

    Parameters
    ----------
    start_dt : datetime
        Start date.
    end_dt : datetime
        End date.
    netcdf_ws : str
        Folder of GRIDMET netcdf files.
    ancillary_ws : str
        Folder of ancillary rasters.
    output_ws : str
        Folder of output rasters.
    variable : list, optional
        GRIDMET variables to download (the default is ['etr', 'pr']).
        Choices: 'eto', 'etr', 'pr', 'srad', 'sph', 'tmmn', 'tmmx', 'vs'
        Set as ['all'] to process all variables.
    extent_path : str, optional
        File path defining the output extent.
    output_extent : list, optional
        Decimal degrees values defining output extent.
    stats_flag : bool, optional
        If True, compute raster statistics (the default is True).
    overwrite_flag : bool, optional
        If True, overwrite existing files (the default is False).

    Returns
    -------
    None

    """
    logging.info('\nExtracting GRIDMET variables')
    logging.debug('  Start date: {}'.format(start_dt))
    logging.debug('  End date:   {}'.format(end_dt))

    # Save GRIDMET lat, lon, and elevation arrays
    elev_raster = os.path.join(ancillary_ws, 'gridmet_elev.img')

    # GRIDMET rasters to extract
    data_full_list = ['eto', 'etr', 'pr', 'srad', 'sph', 'tmmn', 'tmmx', 'vs']
    if not variables:
        logging.error('\nERROR: variables parameter is empty\n')
        sys.exit()
    elif type(variables) is not list:
        # DEADBEEF - I could try converting comma separated strings to lists?
        logging.warning('\nERROR: variables parameter must be a list\n')
        sys.exit()
    elif not set(variables).issubset(set(data_full_list)):
        logging.error('\nERROR: variables parameter is invalid\n  {}'.format(
            variables))
        sys.exit()

    output_fmt = '{}_{}_daily_gridmet.img'
    gridmet_re = re.compile('(?P<VAR>\w+)_(?P<YEAR>\d{4}).nc$')

    # GRIDMET band name dictionary
    gridmet_band_dict = dict()
    gridmet_band_dict['eto'] = 'potential_evapotranspiration'
    gridmet_band_dict['etr'] = 'potential_evapotranspiration'
    gridmet_band_dict['pr'] = 'precipitation_amount'
    gridmet_band_dict['srad'] = 'surface_downwelling_shortwave_flux_in_air'
    gridmet_band_dict['sph'] = 'specific_humidity'
    gridmet_band_dict['tmmn'] = 'air_temperature'
    gridmet_band_dict['tmmx'] = 'air_temperature'
    gridmet_band_dict['vs'] = 'wind_speed'

    # Get extent/geo from elevation raster
    gridmet_ds = gdal.Open(elev_raster)
    gridmet_osr = drigo.raster_ds_osr(gridmet_ds)
    gridmet_proj = drigo.osr_proj(gridmet_osr)
    gridmet_cs = drigo.raster_ds_cellsize(gridmet_ds, x_only=True)
    gridmet_extent = drigo.raster_ds_extent(gridmet_ds)
    gridmet_full_geo = gridmet_extent.geo(gridmet_cs)
    gridmet_x, gridmet_y = gridmet_extent.origin()
    gridmet_ds = None
    logging.debug('  Projection: {}'.format(gridmet_proj))
    logging.debug('  Cellsize: {}'.format(gridmet_cs))
    logging.debug('  Geo: {}'.format(gridmet_full_geo))
    logging.debug('  Extent: {}'.format(gridmet_extent))

    # Subset data to a smaller extent
    if output_extent is not None:
        logging.info('\nComputing subset extent & geo')
        logging.debug('  Extent: {}'.format(output_extent))
        gridmet_extent = drigo.Extent(output_extent)
        gridmet_extent.adjust_to_snap(
            'EXPAND', gridmet_x, gridmet_y, gridmet_cs)
        gridmet_geo = gridmet_extent.geo(gridmet_cs)
        logging.debug('  Geo: {}'.format(gridmet_geo))
        logging.debug('  Extent: {}'.format(gridmet_extent))
    elif extent_path is not None:
        logging.info('\nComputing subset extent & geo')
        if not os.path.isfile(extent_path):
            logging.error(
                '\nThe extent object does not exist, exiting\n'
                '  {}'.format(extent_path))
            return False
        elif extent_path.lower().endswith('.shp'):
            gridmet_extent = drigo.feature_path_extent(extent_path)
            extent_osr = drigo.feature_path_osr(extent_path)
            extent_cs = None
        else:
            gridmet_extent = drigo.raster_path_extent(extent_path)
            extent_osr = drigo.raster_path_osr(extent_path)
            extent_cs = drigo.raster_path_cellsize(extent_path, x_only=True)
        gridmet_extent = drigo.project_extent(
            gridmet_extent, extent_osr, gridmet_osr, extent_cs)
        gridmet_extent.adjust_to_snap(
            'EXPAND', gridmet_x, gridmet_y, gridmet_cs)
        gridmet_geo = gridmet_extent.geo(gridmet_cs)
        logging.debug('  Geo: {}'.format(gridmet_geo))
        logging.debug('  Extent: {}'.format(gridmet_extent))
    else:
        gridmet_geo = gridmet_full_geo

    # Get indices for slicing/clipping input arrays
    g_i, g_j = drigo.array_geo_offsets(
        gridmet_full_geo, gridmet_geo, cs=gridmet_cs)
    g_rows, g_cols = gridmet_extent.shape(cs=gridmet_cs)

    # Flip row indices since GRIDMET arrays are flipped up/down
    # Hard coding GRIDMET row count for now
    row_a, row_b = 585 - (g_j + g_rows), 585 - g_j,
    col_a, col_b = g_i, g_i + g_cols

    # Process each variable
    logging.info("")
    for input_var in variables:
        logging.info("\nVariable: {}".format(input_var))

        # Rename variables to match cimis
        if input_var == 'pr':
            output_var = 'ppt'
        else:
            output_var = input_var

        # Build output folder
        var_ws = os.path.join(output_ws, output_var)
        if not os.path.isdir(var_ws):
            os.makedirs(var_ws)

        # Process each file in the input workspace
        for input_name in sorted(os.listdir(netcdf_ws)):
            input_match = gridmet_re.match(input_name)
            if not input_match:
                logging.debug("{}".format(input_name))
                logging.debug('  Regular expression didn\'t match, skipping')
                continue
            elif input_match.group('VAR') != input_var:
                logging.debug("{}".format(input_name))
                logging.debug('  Variable didn\'t match, skipping')
                continue
            else:
                logging.info("{}".format(input_name))

            year_str = input_match.group('YEAR')
            logging.info("  {}".format(year_str))
            year_int = int(year_str)
            year_days = int(dt.datetime(year_int, 12, 31).strftime('%j'))
            if start_dt is not None and year_int < start_dt.year:
                logging.debug('    Before start date, skipping')
                continue
            elif end_dt is not None and year_int > end_dt.year:
                logging.debug('    After end date, skipping')
                continue

            # Build input file path
            input_raster = os.path.join(netcdf_ws, input_name)
            # if not os.path.isfile(input_raster):
            #     logging.debug(
            #         '  Input NetCDF doesn\'t exist, skipping    {}'.format(
            #             input_raster))
            #     continue

            # Create a single raster for each year with 365 bands
            # Each day will be stored in a separate band
            output_path = os.path.join(
                var_ws, output_fmt.format(output_var, year_str))
            logging.debug('  {}'.format(output_path))
            if os.path.isfile(output_path):
                if not overwrite_flag:
                    logging.debug('    File already exists, skipping')
                    continue
                else:
                    logging.debug('    File already exists, removing existing')
                    os.remove(output_path)
            drigo.build_empty_raster(
                output_path, band_cnt=366, output_dtype=np.float32,
                output_proj=gridmet_proj, output_cs=gridmet_cs,
                output_extent=gridmet_extent, output_fill_flag=True)

            # Read in the GRIDMET NetCDF file
            # Immediately clip input array to save memory
            input_nc_f = netCDF4.Dataset(input_raster, 'r')
            input_nc = input_nc_f.variables[gridmet_band_dict[input_var]][
                :, row_a: row_b, col_a: col_b].copy()
            input_nc = np.flip(input_nc, 1)
            input_nc_f.close()
            del input_nc_f

            # A numpy array is returned when slicing a masked array
            #   if there are no masked pixels
            # This is a hack to force the numpy array back to a masked array
            if type(input_nc) != np.ma.core.MaskedArray:
                input_nc = np.ma.core.MaskedArray(
                    input_nc, np.zeros(input_nc.shape, dtype=bool))

            # Check all valid dates in the year
            year_dates = _utils.date_range(
                dt.datetime(year_int, 1, 1), dt.datetime(year_int + 1, 1, 1))
            for date_dt in year_dates:
                if start_dt is not None and date_dt < start_dt:
                    logging.debug('  {} - before start date, skipping'.format(
                        date_dt.date()))
                    continue
                elif end_dt is not None and date_dt > end_dt:
                    logging.debug('  {} - after end date, skipping'.format(
                        date_dt.date()))
                    continue
                else:
                    logging.info('  {}'.format(date_dt.date()))

                doy = int(date_dt.strftime('%j'))
                doy_i = range(1, year_days + 1).index(doy)

                # Arrays are read as masked array with a fill value of -9999
                # Convert to basic numpy array arrays with nan values
                try:
                    input_full_ma = input_nc[doy_i, :, :]
                except IndexError:
                    logging.info('    date not in netcdf, skipping')
                    continue
                input_full_array = input_full_ma.data.astype(np.float32)
                input_full_nodata = float(input_full_ma.fill_value)
                input_full_array[input_full_array == input_full_nodata] = np.nan

                # Since inputs are netcdf, need to create GDAL raster
                #   datasets in order to use gdal_common functions
                # Create an in memory dataset of the full ETo array
                input_full_ds = drigo.array_to_mem_ds(
                    input_full_array, output_geo=gridmet_geo,
                    output_proj=gridmet_proj)

                # Then extract the subset from the in memory dataset
                output_array = drigo.raster_ds_to_array(
                    input_full_ds, 1, mask_extent=gridmet_extent,
                    return_nodata=False)

                # Convert Kelvin to Celsius
                if input_var in ['tmmx', 'tmmn']:
                    output_array -= 273.15

                # Save the projected array as 32-bit floats
                drigo.array_to_comp_raster(
                    output_array.astype(np.float32), output_path,
                    band=doy, stats_flag=False)
                # drigo.array_to_raster(
                #     output_array.astype(np.float32), output_path,
                #     output_geo=gridmet_geo, output_proj=gridmet_proj,
                #     stats_flag=False)
                del output_array

        if stats_flag:
            drigo.raster_statistics(output_path)

    logging.debug('\nScript Complete')
def main(netcdf_ws=os.getcwd(), ancillary_ws=os.getcwd(),
         output_ws=os.getcwd(), variables=['prcp'],
         daily_flag=False, monthly_flag=True, annual_flag=False,
         start_year=1981, end_year=2010,
         extent_path=None, output_extent=None,
         stats_flag=True, overwrite_flag=False):
    """Extract DAYMET temperature

    Parameters
    ----------
    netcdf_ws : str
        Folder of DAYMET netcdf files.
    ancillary_ws : str
        Folder of ancillary rasters.
    output_ws : str
        Folder of output rasters.
    variables : list, optional
        DAYMET variables to download ('prcp', 'srad', 'vp', 'tmmn', 'tmmx').
        Set as ['all'] to process all variables.
    daily_flag : bool, optional
        If True, compute daily (DOY) climatologies.
    monthly_flag : bool, optional
        If True, compute monthly climatologies.
    annual_flag : bool, optional
        If True, compute annual climatologies.
    start_year : int, optional
        Climatology start year.
    end_year : int, optional
        Climatology end year.
    extent_path : str, optional
        File path a raster defining the output extent.
    output_extent : list, optional
        Decimal degrees values defining output extent.
    stats_flag : bool, optional
        If True, compute raster statistics (the default is True).
    overwrite_flag : bool, optional
        If True, overwrite existing files (the default is False).

    Returns
    -------
    None

    """
    logging.info('\nGenerating DAYMET climatologies')

    daily_fmt = 'daymet_{var}_30yr_normal_{doy:03d}.img'
    monthly_fmt = 'daymet_{var}_30yr_normal_{month:02d}.img'
    annual_fmt = 'daymet_{var}_30yr_normal.img'
    # daily_fmt = 'daymet_{var}_normal_{start}_{end}_{doy:03d}.img'
    # monthly_fmt = 'daymet_{var}_normal_{start}_{end}_{month:02d}.img'
    # annual_fmt = 'daymet_{var}_normal_{start}_{end}.img'

    # If a date is not set, process 1981-2010 climatology
    try:
        start_dt = dt.datetime(start_year, 1, 1)
        logging.debug('  Start date: {}'.format(start_dt))
    except:
        start_dt = dt.datetime(1981, 1, 1)
        logging.info('  Start date: {}'.format(start_dt))
    try:
        end_dt = dt.datetime(end_year, 12, 31)
        logging.debug('  End date:   {}'.format(end_dt))
    except:
        end_dt = dt.datetime(2010, 12, 31)
        logging.info('  End date:   {}'.format(end_dt))

    # Get DAYMET spatial reference from an ancillary raster
    mask_raster = os.path.join(ancillary_ws, 'daymet_mask.img')

    daymet_re = re.compile('daymet_v3_(?P<VAR>\w+)_(?P<YEAR>\d{4})_na.nc4$')

    # DAYMET rasters to extract
    var_full_list = ['prcp', 'tmmn', 'tmmx']
    # data_full_list = ['prcp', 'srad', 'vp', 'tmmn', 'tmmx']
    if not variables:
        logging.error('\nERROR: variables parameter is empty\n')
        sys.exit()
    elif type(variables) is not list:
        # DEADBEEF - I could try converting comma separated strings to lists?
        logging.warning('\nERROR: variables parameter must be a list\n')
        sys.exit()
    elif 'all' in variables:
        logging.error('\nDownloading all variables\n  {}'.format(
            ','.join(var_full_list)))
        var_list = var_full_list[:]
    elif not set(variables).issubset(set(var_full_list)):
        logging.error('\nERROR: variables parameter is invalid\n  {}'.format(
            variables))
        sys.exit()
    else:
        var_list = variables[:]

    # Get extent/geo from mask raster
    daymet_ds = gdal.Open(mask_raster)
    daymet_osr = drigo.raster_ds_osr(daymet_ds)
    daymet_proj = drigo.osr_proj(daymet_osr)
    daymet_cs = drigo.raster_ds_cellsize(daymet_ds, x_only=True)
    daymet_extent = drigo.raster_ds_extent(daymet_ds)
    daymet_geo = daymet_extent.geo(daymet_cs)
    daymet_x, daymet_y = daymet_extent.origin()
    daymet_ds = None
    logging.debug('  Projection: {}'.format(daymet_proj))
    logging.debug('  Cellsize: {}'.format(daymet_cs))
    logging.debug('  Geo: {}'.format(daymet_geo))
    logging.debug('  Extent: {}'.format(daymet_extent))
    logging.debug('  Origin: {} {}'.format(daymet_x, daymet_y))

    # Subset data to a smaller extent
    if output_extent is not None:
        logging.info('\nComputing subset extent & geo')
        logging.debug('  Extent: {}'.format(output_extent))
        # Assume input extent is in decimal degrees
        output_extent = drigo.project_extent(
            drigo.Extent(output_extent), drigo.epsg_osr(4326), daymet_osr, 0.001)
        output_extent = drigo.intersect_extents([daymet_extent, output_extent])
        output_extent.adjust_to_snap('EXPAND', daymet_x, daymet_y, daymet_cs)
        output_geo = output_extent.geo(daymet_cs)
        logging.debug('  Geo: {}'.format(output_geo))
        logging.debug('  Extent: {}'.format(output_extent))
    elif extent_path is not None:
        logging.info('\nComputing subset extent & geo')
        output_extent = drigo.project_extent(
            drigo.raster_path_extent(extent_path),
            drigo.raster_path_osr(extent_path), daymet_osr,
            drigo.raster_path_cellsize(extent_path, x_only=True))
        output_extent = drigo.intersect_extents([daymet_extent, output_extent])
        output_extent.adjust_to_snap('EXPAND', daymet_x, daymet_y, daymet_cs)
        output_geo = output_extent.geo(daymet_cs)
        logging.debug('  Geo: {}'.format(output_geo))
        logging.debug('  Extent: {}'.format(output_extent))
    else:
        output_extent = daymet_extent.copy()
        output_geo = daymet_geo[:]
    output_shape = output_extent.shape(cs=daymet_cs)
    xi, yi = drigo.array_geo_offsets(daymet_geo, output_geo, daymet_cs)
    output_rows, output_cols = output_extent.shape(daymet_cs)
    logging.debug('  Shape: {} {}'.format(output_rows, output_cols))
    logging.debug('  Offsets: {} {} (x y)'.format(xi, yi))

    # Process each variable
    for input_var in var_list:
        logging.info("\nVariable: {}".format(input_var))

        # Rename variables to match cimis
        if input_var == 'prcp':
            output_var = 'ppt'
        else:
            output_var = input_var
        logging.debug("Output name: {}".format(output_var))

        # Build output folder
        var_ws = os.path.join(output_ws, output_var)
        if not os.path.isdir(var_ws):
            os.makedirs(var_ws)

        # Build output arrays
        logging.debug('  Building arrays')
        if daily_flag:
            daily_sum = np.full(
                (365, output_shape[0], output_shape[1]), 0, np.float64)
            daily_count = np.full(
                (365, output_shape[0], output_shape[1]), 0, np.uint8)
        if monthly_flag:
            monthly_sum = np.full(
                (12, output_shape[0], output_shape[1]), 0, np.float64)
            monthly_count = np.full(
                (12, output_shape[0], output_shape[1]), 0, np.uint8)
        if monthly_flag:
            annual_sum = np.full(
                (output_shape[0], output_shape[1]), 0, np.float64)
            annual_count = np.full(
                (output_shape[0], output_shape[1]), 0, np.uint8)

        # Process each file/year separately
        for input_name in sorted(os.listdir(netcdf_ws)):
            logging.debug("  {}".format(input_name))
            input_match = daymet_re.match(input_name)
            if not input_match:
                logging.debug('  Regular expression didn\'t match, skipping')
                continue
            elif input_match.group('VAR') != input_var:
                logging.debug('  Variable didn\'t match, skipping')
                continue
            year_str = input_match.group('YEAR')
            logging.info("  Year: {}".format(year_str))
            year_int = int(year_str)
            year_days = int(dt.datetime(year_int, 12, 31).strftime('%j'))
            if start_dt is not None and year_int < start_dt.year:
                logging.debug('    Before start date, skipping')
                continue
            elif end_dt is not None and year_int > end_dt.year:
                logging.debug('    After end date, skipping')
                continue

            # Build input file path
            input_raster = os.path.join(netcdf_ws, input_name)
            if not os.path.isfile(input_raster):
                logging.debug(
                    '  Input raster doesn\'t exist, skipping    {}'.format(
                        input_raster))
                continue

            # Build output folder
            if daily_flag:
                daily_ws = os.path.join(var_ws, 'daily')
                if not os.path.isdir(daily_ws):
                    os.makedirs(daily_ws)

            if monthly_flag:
                monthly_temp_sum = np.full(
                    (12, output_shape[0], output_shape[1]), 0, np.float64)
                monthly_temp_count = np.full(
                    (12, output_shape[0], output_shape[1]), 0, np.uint8)

            # Read in the DAYMET NetCDF file
            input_nc_f = netCDF4.Dataset(input_raster, 'r')
            # logging.debug(input_nc_f.variables)

            # Check all valid dates in the year
            year_dates = _utils.date_range(
                dt.datetime(year_int, 1, 1), dt.datetime(year_int + 1, 1, 1))
            for date_dt in year_dates:
                logging.debug('  {}'.format(date_dt.date()))
                # if start_dt is not None and date_dt < start_dt:
                #     logging.debug(
                #         '  {} - before start date, skipping'.format(
                #             date_dt.date()))
                #     continue
                # elif end_dt is not None and date_dt > end_dt:
                #     logging.debug('  {} - after end date, skipping'.format(
                #         date_dt.date()))
                #     continue
                # else:
                #     logging.info('  {}'.format(date_dt.date()))

                doy = int(date_dt.strftime('%j'))
                doy_i = range(1, year_days + 1).index(doy)
                month_i = date_dt.month - 1

                # Arrays are being read as masked array with a -9999 fill value
                # Convert to basic numpy array arrays with nan values
                try:
                    input_ma = input_nc_f.variables[input_var][
                        doy_i, yi: yi + output_rows, xi: xi + output_cols]
                except IndexError:
                    logging.info('    date not in netcdf, skipping')
                    continue
                input_nodata = float(input_ma.fill_value)
                output_array = input_ma.data.astype(np.float32)
                output_array[output_array == input_nodata] = np.nan
                output_mask = np.isfinite(output_array)

                # Convert Kelvin to Celsius
                if input_var in ['tmax', 'tmin']:
                    output_array -= 273.15

                # Save values
                if daily_flag:
                    daily_sum[doy_i, :, :] += output_array
                    daily_count[doy_i, :, :] += output_mask
                if monthly_flag:
                    monthly_temp_sum[month_i, :, :] += output_array
                    monthly_temp_count[month_i, :, :] += output_mask
                if annual_flag:
                    annual_sum[:, :] += output_array
                    annual_count[:, :] += output_mask

                # Cleanup
                # del input_ds, input_array
                del input_ma, output_array, output_mask

            # Compute mean monthly for the year
            if monthly_flag:
                # Sum precipitation
                if input_var == 'prcp':
                    monthly_sum += monthly_temp_sum
                else:
                    monthly_sum += monthly_temp_sum / monthly_temp_count
                # Is this the right count?
                monthly_count += np.any(monthly_temp_count, axis=0)
                del monthly_temp_sum, monthly_temp_count

            input_nc_f.close()
            del input_nc_f

        # Save the projected climatology arrays
        if daily_flag:
            for doy_i in range(daily_sum.shape[0]):
                daily_name = daily_fmt.format(
                    var=output_var, start=start_year, end=end_year,
                    doy=doy_i + 1)
                daily_path = os.path.join(daily_ws, daily_name)
                drigo.array_to_raster(
                    daily_sum[doy_i, :, :] / daily_count[doy_i, :, :],
                    daily_path, output_geo=output_geo,
                    output_proj=daymet_proj, stats_flag=stats_flag)
            del daily_sum, daily_count
        if monthly_flag:
            for month_i in range(monthly_sum.shape[0]):
                monthly_name = monthly_fmt.format(
                    var=output_var, start=start_year, end=end_year,
                    month=month_i + 1)
                monthly_path = os.path.join(var_ws, monthly_name)
                drigo.array_to_raster(
                    monthly_sum[month_i, :, :] / monthly_count[month_i, :, :],
                    monthly_path, output_geo=output_geo,
                    output_proj=daymet_proj, stats_flag=stats_flag)
            del monthly_sum, monthly_count
        if annual_flag:
            annual_name = annual_fmt.format(
                var=output_var, start=start_year, end=end_year)
            annual_path = os.path.join(var_ws, annual_name)
            drigo.array_to_raster(
                annual_sum / annual_count, annual_path,
                output_geo=output_geo, output_proj=daymet_proj,
                stats_flag=stats_flag)
            del annual_sum, annual_count

    logging.debug('\nScript Complete')
Beispiel #3
0
def main(netcdf_ws=os.getcwd(),
         ancillary_ws=os.getcwd(),
         output_ws=os.getcwd(),
         variables=['prcp'],
         start_date=None,
         end_date=None,
         extent_path=None,
         output_extent=None,
         stats_flag=True,
         overwrite_flag=False):
    """Extract DAYMET temperature

    Parameters
    ----------
    netcdf_ws : str
        Folder of DAYMET netcdf files.
    ancillary_ws : str
        Folder of ancillary rasters.
    output_ws : str
        Folder of output rasters.
    variables : list, optional
        DAYMET variables to download ('prcp', 'srad', 'vp', 'tmmn', 'tmmx').
        Set as ['all'] to process all variables.
    start_date : str, optional
        ISO format date (YYYY-MM-DD).
    end_date : str, optional
        ISO format date (YYYY-MM-DD).
    extent_path : str, optional
        File path defining the output extent.
    output_extent : list, optional
        Decimal degrees values defining output extent.
    stats_flag : bool, optional
        If True, compute raster statistics (the default is True).
    overwrite_flag : bool, optional
        If True, overwrite existing files (the default is False).

    Returns
    -------
    None

    """
    logging.info('\nExtracting DAYMET variables')

    # If a date is not set, process 2015
    try:
        start_dt = dt.datetime.strptime(start_date, '%Y-%m-%d')
        logging.debug('  Start date: {}'.format(start_dt))
    except:
        start_dt = dt.datetime(2015, 1, 1)
        logging.info('  Start date: {}'.format(start_dt))
    try:
        end_dt = dt.datetime.strptime(end_date, '%Y-%m-%d')
        logging.debug('  End date:   {}'.format(end_dt))
    except:
        end_dt = dt.datetime(2015, 12, 31)
        logging.info('  End date:   {}'.format(end_dt))

    # Get DAYMET spatial reference from an ancillary raster
    mask_raster = os.path.join(ancillary_ws, 'daymet_mask.img')

    daymet_re = re.compile('daymet_v3_(?P<VAR>\w+)_(?P<YEAR>\d{4})_na.nc4$')

    # DAYMET rasters to extract
    var_full_list = ['prcp', 'srad', 'vp', 'tmmn', 'tmmx']
    if not variables:
        logging.error('\nERROR: variables parameter is empty\n')
        sys.exit()
    elif type(variables) is not list:
        # DEADBEEF - I could try converting comma separated strings to lists?
        logging.warning('\nERROR: variables parameter must be a list\n')
        sys.exit()
    elif 'all' in variables:
        logging.error('\nDownloading all variables\n  {}'.format(
            ','.join(var_full_list)))
        var_list = var_full_list[:]
    elif not set(variables).issubset(set(var_full_list)):
        logging.error(
            '\nERROR: variables parameter is invalid\n  {}'.format(variables))
        sys.exit()
    else:
        var_list = variables[:]

    # DAYMET band name dictionary
    # daymet_band_dict = dict()
    # daymet_band_dict['prcp'] = 'precipitation_amount'
    # daymet_band_dict['srad'] = 'surface_downwelling_shortwave_flux_in_air'
    # daymet_band_dict['sph'] = 'specific_humidity'
    # daymet_band_dict['tmin'] = 'air_temperature'
    # daymet_band_dict['tmax'] = 'air_temperature'

    # Get extent/geo from mask raster
    daymet_ds = gdal.Open(mask_raster)
    daymet_osr = drigo.raster_ds_osr(daymet_ds)
    daymet_proj = drigo.osr_proj(daymet_osr)
    daymet_cs = drigo.raster_ds_cellsize(daymet_ds, x_only=True)
    daymet_extent = drigo.raster_ds_extent(daymet_ds)
    daymet_geo = daymet_extent.geo(daymet_cs)
    daymet_x, daymet_y = daymet_extent.origin()
    daymet_ds = None
    logging.debug('  Projection: {}'.format(daymet_proj))
    logging.debug('  Cellsize: {}'.format(daymet_cs))
    logging.debug('  Geo: {}'.format(daymet_geo))
    logging.debug('  Extent: {}'.format(daymet_extent))
    logging.debug('  Origin: {} {}'.format(daymet_x, daymet_y))

    # Subset data to a smaller extent
    if output_extent is not None:
        logging.info('\nComputing subset extent & geo')
        logging.debug('  Extent: {}'.format(output_extent))
        # Assume input extent is in decimal degrees
        output_extent = drigo.project_extent(drigo.Extent(output_extent),
                                             drigo.epsg_osr(4326), daymet_osr,
                                             0.001)
        output_extent = drigo.intersect_extents([daymet_extent, output_extent])
        output_extent.adjust_to_snap('EXPAND', daymet_x, daymet_y, daymet_cs)
        output_geo = output_extent.geo(daymet_cs)
        logging.debug('  Geo: {}'.format(output_geo))
        logging.debug('  Extent: {}'.format(output_extent))
    elif extent_path is not None:
        logging.info('\nComputing subset extent & geo')
        if extent_path.lower().endswith('.shp'):
            output_extent = drigo.feature_path_extent(extent_path)
            extent_osr = drigo.feature_path_osr(extent_path)
            extent_cs = None
        else:
            output_extent = drigo.raster_path_extent(extent_path)
            extent_osr = drigo.raster_path_osr(extent_path)
            extent_cs = drigo.raster_path_cellsize(extent_path, x_only=True)
        output_extent = drigo.project_extent(output_extent, extent_osr,
                                             daymet_osr, extent_cs)
        output_extent = drigo.intersect_extents([daymet_extent, output_extent])
        output_extent.adjust_to_snap('EXPAND', daymet_x, daymet_y, daymet_cs)
        output_geo = output_extent.geo(daymet_cs)
        logging.debug('  Geo: {}'.format(output_geo))
        logging.debug('  Extent: {}'.format(output_extent))
    else:
        output_extent = daymet_extent.copy()
        output_geo = daymet_geo[:]
    # output_shape = output_extent.shape(cs=daymet_cs)
    xi, yi = drigo.array_geo_offsets(daymet_geo, output_geo, daymet_cs)
    output_rows, output_cols = output_extent.shape(daymet_cs)
    logging.debug('  Shape: {} {}'.format(output_rows, output_cols))
    logging.debug('  Offsets: {} {} (x y)'.format(xi, yi))

    # Process each variable
    for input_var in var_list:
        logging.info("\nVariable: {}".format(input_var))

        # Rename variables to match cimis
        if input_var == 'prcp':
            output_var = 'ppt'
        else:
            output_var = input_var

        # Build output folder
        var_ws = os.path.join(output_ws, output_var)
        if not os.path.isdir(var_ws):
            os.makedirs(var_ws)

        # Process each file in the input workspace
        for input_name in sorted(os.listdir(netcdf_ws)):
            logging.debug("{}".format(input_name))
            input_match = daymet_re.match(input_name)
            if not input_match:
                logging.debug('  Regular expression didn\'t match, skipping')
                continue
            elif input_match.group('VAR') != input_var:
                logging.debug('  Variable didn\'t match, skipping')
                continue
            year_str = input_match.group('YEAR')
            logging.info("  Year: {}".format(year_str))
            year_int = int(year_str)
            year_days = int(dt.datetime(year_int, 12, 31).strftime('%j'))
            if start_dt is not None and year_int < start_dt.year:
                logging.debug('    Before start date, skipping')
                continue
            elif end_dt is not None and year_int > end_dt.year:
                logging.debug('    After end date, skipping')
                continue

            # Build input file path
            input_raster = os.path.join(netcdf_ws, input_name)
            # if not os.path.isfile(input_raster):
            #     logging.debug(
            #         '    Input raster doesn\'t exist, skipping    {}'.format(
            #             input_raster))
            #     continue

            # Build output folder
            output_year_ws = os.path.join(var_ws, year_str)
            if not os.path.isdir(output_year_ws):
                os.makedirs(output_year_ws)

            # Read in the DAYMET NetCDF file
            input_nc_f = netCDF4.Dataset(input_raster, 'r')
            # logging.debug(input_nc_f.variables)

            # Check all valid dates in the year
            year_dates = _utils.date_range(dt.datetime(year_int, 1, 1),
                                           dt.datetime(year_int + 1, 1, 1))
            for date_dt in year_dates:
                if start_dt is not None and date_dt < start_dt:
                    logging.debug('  {} - before start date, skipping'.format(
                        date_dt.date()))
                    continue
                elif end_dt is not None and date_dt > end_dt:
                    logging.debug('  {} - after end date, skipping'.format(
                        date_dt.date()))
                    continue
                else:
                    logging.info('  {}'.format(date_dt.date()))

                output_path = os.path.join(
                    output_year_ws,
                    '{}_{}_daymet.img'.format(output_var,
                                              date_dt.strftime('%Y%m%d')))
                if os.path.isfile(output_path):
                    logging.debug('    {}'.format(output_path))
                    if not overwrite_flag:
                        logging.debug('    File already exists, skipping')
                        continue
                    else:
                        logging.debug(
                            '    File already exists, removing existing')
                        os.remove(output_path)

                doy = int(date_dt.strftime('%j'))
                doy_i = range(1, year_days + 1).index(doy)

                # Arrays are being read as masked array with a fill value of -9999
                # Convert to basic numpy array arrays with nan values
                try:
                    input_ma = input_nc_f.variables[input_var][doy_i, yi:yi +
                                                               output_rows,
                                                               xi:xi +
                                                               output_cols]
                except IndexError:
                    logging.info('    date not in netcdf, skipping')
                    continue
                input_nodata = float(input_ma.fill_value)
                output_array = input_ma.data.astype(np.float32)
                output_array[output_array == input_nodata] = np.nan

                # Convert Kelvin to Celsius
                if input_var in ['tmax', 'tmin']:
                    output_array -= 273.15

                # Save the array as 32-bit floats
                drigo.array_to_raster(output_array.astype(np.float32),
                                      output_path,
                                      output_geo=output_geo,
                                      output_proj=daymet_proj,
                                      stats_flag=stats_flag)
                del input_ma, output_array
            input_nc_f.close()
            del input_nc_f

    logging.debug('\nScript Complete')
def main(netcdf_ws=os.getcwd(),
         ancillary_ws=os.getcwd(),
         output_ws=os.getcwd(),
         etr_flag=False,
         eto_flag=False,
         start_date=None,
         end_date=None,
         extent_path=None,
         output_extent=None,
         stats_flag=True,
         overwrite_flag=False):
    """Compute daily ETr/ETo from GRIDMET data

    Parameters
    ----------
    netcdf_ws : str
        Folder of GRIDMET netcdf files.
    ancillary_ws : str
        Folder of ancillary rasters.
    output_ws : str
        Folder of output rasters.
    etr_flag : str, optional
        If True, compute alfalfa reference ET (ETr) (the default is False).
    eto_flag : str, optional
        If True, compute grass reference ET (ETo) (the default is False).
    start_date : str, optional
        ISO format date (YYYY-MM-DD).
    end_date : str, optional
        ISO format date (YYYY-MM-DD).
    extent_path : str, optional
        File path defining the output extent.
    output_extent : list, optional
        Decimal degrees values defining output extent.
    stats_flag : bool, optional
        If True, compute raster statistics (the default is True).
    overwrite_flag : bool, optional
        If True, overwrite existing files (the default is False).

    Returns
    -------
    None
    
    """
    logging.info('\nComputing GRIDMET ETo/ETr')
    np.seterr(invalid='ignore')

    # Compute ETr and/or ETo
    if not etr_flag and not eto_flag:
        logging.info('  ETo/ETr flag(s) not set, defaulting to ETr')
        etr_flag = True

    # If a date is not set, process 2017
    try:
        start_dt = dt.datetime.strptime(start_date, '%Y-%m-%d')
        logging.debug('  Start date: {}'.format(start_dt))
    except:
        start_dt = dt.datetime(2017, 1, 1)
        logging.info('  Start date: {}'.format(start_dt))
    try:
        end_dt = dt.datetime.strptime(end_date, '%Y-%m-%d')
        logging.debug('  End date:   {}'.format(end_dt))
    except:
        end_dt = dt.datetime(2017, 12, 31)
        logging.info('  End date:   {}'.format(end_dt))

    # Save GRIDMET lat, lon, and elevation arrays
    elev_raster = os.path.join(ancillary_ws, 'gridmet_elev.img')
    lat_raster = os.path.join(ancillary_ws, 'gridmet_lat.img')

    # Wind speed is measured at 2m
    zw = 10

    etr_fmt = 'etr_{}_daily_gridmet.img'
    eto_fmt = 'eto_{}_daily_gridmet.img'
    # gridmet_re = re.compile('(?P<VAR>\w+)_(?P<YEAR>\d{4}).nc')

    # GRIDMET band name dictionary
    gridmet_band_dict = dict()
    gridmet_band_dict['eto'] = 'potential_evapotranspiration'
    gridmet_band_dict['etr'] = 'potential_evapotranspiration'
    # gridmet_band_dict['pr'] = 'precipitation_amount'
    # gridmet_band_dict['srad'] = 'surface_downwelling_shortwave_flux_in_air'
    # gridmet_band_dict['sph'] = 'specific_humidity'
    # gridmet_band_dict['tmmn'] = 'air_temperature'
    # gridmet_band_dict['tmmx'] = 'air_temperature'
    # gridmet_band_dict['vs'] = 'wind_speed'

    # Get extent/geo from elevation raster
    gridmet_ds = gdal.Open(elev_raster)
    gridmet_osr = drigo.raster_ds_osr(gridmet_ds)
    gridmet_proj = drigo.osr_proj(gridmet_osr)
    gridmet_cs = drigo.raster_ds_cellsize(gridmet_ds, x_only=True)
    gridmet_extent = drigo.raster_ds_extent(gridmet_ds)
    gridmet_full_geo = gridmet_extent.geo(gridmet_cs)
    gridmet_x, gridmet_y = gridmet_extent.origin()
    gridmet_ds = None
    logging.debug('  Projection: {}'.format(gridmet_proj))
    logging.debug('  Cellsize: {}'.format(gridmet_cs))
    logging.debug('  Geo: {}'.format(gridmet_full_geo))
    logging.debug('  Extent: {}'.format(gridmet_extent))

    # Subset data to a smaller extent
    if output_extent is not None:
        logging.info('\nComputing subset extent & geo')
        logging.debug('  Extent: {}'.format(output_extent))
        gridmet_extent = drigo.Extent(output_extent)
        gridmet_extent.adjust_to_snap('EXPAND', gridmet_x, gridmet_y,
                                      gridmet_cs)
        gridmet_geo = gridmet_extent.geo(gridmet_cs)
        logging.debug('  Geo: {}'.format(gridmet_geo))
        logging.debug('  Extent: {}'.format(output_extent))
    elif extent_path is not None:
        logging.info('\nComputing subset extent & geo')
        if not os.path.isfile(extent_path):
            logging.error('\nThe extent object not exist, exiting\n'
                          '  {}'.format(extent_path))
            return False
        elif extent_path.lower().endswith('.shp'):
            gridmet_extent = drigo.feature_path_extent(extent_path)
            # DEADBEEF - Consider moving call into a try/except block
            # logging.error(
            #     '\nThere was a problem reading the extent object'
            #     '\nThe file path may be invalid or the file may not exist '
            #     'or be corrupt.\n{}'.format(extent_path))
            extent_osr = drigo.feature_path_osr(extent_path)
            extent_cs = None
        else:
            gridmet_extent = drigo.raster_path_extent(extent_path)
            extent_osr = drigo.raster_path_osr(extent_path)
            extent_cs = drigo.raster_path_cellsize(extent_path, x_only=True)
        gridmet_extent = drigo.project_extent(gridmet_extent, extent_osr,
                                              gridmet_osr, extent_cs)
        gridmet_extent.adjust_to_snap('EXPAND', gridmet_x, gridmet_y,
                                      gridmet_cs)
        gridmet_geo = gridmet_extent.geo(gridmet_cs)
        logging.debug('  Geo: {}'.format(gridmet_geo))
        logging.debug('  Extent: {}'.format(gridmet_extent))
    else:
        gridmet_geo = gridmet_full_geo

    # Get indices for slicing/clipping input arrays
    g_i, g_j = drigo.array_geo_offsets(gridmet_full_geo,
                                       gridmet_geo,
                                       cs=gridmet_cs)
    g_rows, g_cols = gridmet_extent.shape(cs=gridmet_cs)

    # Flip row indices since GRIDMET arrays are flipped up/down
    # Hard coding GRIDMET row count for now
    row_a, row_b = 585 - (g_j + g_rows), 585 - g_j,
    col_a, col_b = g_i, g_i + g_cols

    # Read the elevation and latitude arrays
    elev_array = drigo.raster_to_array(elev_raster,
                                       mask_extent=gridmet_extent,
                                       return_nodata=False)
    lat_array = drigo.raster_to_array(lat_raster,
                                      mask_extent=gridmet_extent,
                                      return_nodata=False)
    lat_array *= math.pi / 180

    # Check elevation and latitude arrays
    if np.all(np.isnan(elev_array)):
        logging.error('\nERROR: The elevation array is all nodata, exiting\n')
        sys.exit()
    elif np.all(np.isnan(lat_array)):
        logging.error('\nERROR: The latitude array is all nodata, exiting\n')
        sys.exit()

    # Build output folder
    etr_ws = os.path.join(output_ws, 'etr')
    eto_ws = os.path.join(output_ws, 'eto')
    if etr_flag and not os.path.isdir(etr_ws):
        os.makedirs(etr_ws)
    if eto_flag and not os.path.isdir(eto_ws):
        os.makedirs(eto_ws)

    # By default, try to process all possible years
    if start_dt.year == end_dt.year:
        year_list = [str(start_dt.year)]
    year_list = sorted(map(str, range((start_dt.year), (end_dt.year + 1))))

    # Process each year separately
    for year_str in year_list:
        logging.info("\nYear: {}".format(year_str))
        year_int = int(year_str)
        year_days = int(dt.datetime(year_int, 12, 31).strftime('%j'))
        if start_dt is not None and year_int < start_dt.year:
            logging.debug('  Before start date, skipping')
            continue
        elif end_dt is not None and year_int > end_dt.year:
            logging.debug('  After end date, skipping')
            continue

        # Build input file path
        eto_path = os.path.join(netcdf_ws, 'eto_{}.nc'.format(year_str))
        etr_path = os.path.join(netcdf_ws, 'etr_{}.nc'.format(year_str))
        if eto_flag and not os.path.isfile(eto_path):
            logging.debug(
                '  ETo NetCDF doesn\'t exist\n    {}'.format(eto_path))
            continue
        if etr_flag and not os.path.isfile(etr_path):
            logging.debug(
                '  ETr NetCDF doesn\'t exist\n    {}'.format(etr_path))
            continue

        # Create a single raster for each year with 365 bands
        # Each day will be stored in a separate band
        etr_raster = os.path.join(etr_ws, etr_fmt.format(year_str))
        eto_raster = os.path.join(eto_ws, eto_fmt.format(year_str))
        if etr_flag and (overwrite_flag or not os.path.isfile(etr_raster)):
            logging.debug('  {}'.format(etr_raster))
            drigo.build_empty_raster(etr_raster,
                                     band_cnt=366,
                                     output_dtype=np.float32,
                                     output_proj=gridmet_proj,
                                     output_cs=gridmet_cs,
                                     output_extent=gridmet_extent,
                                     output_fill_flag=True)
        if eto_flag and (overwrite_flag or not os.path.isfile(eto_raster)):
            logging.debug('  {}'.format(eto_raster))
            drigo.build_empty_raster(eto_raster,
                                     band_cnt=366,
                                     output_dtype=np.float32,
                                     output_proj=gridmet_proj,
                                     output_cs=gridmet_cs,
                                     output_extent=gridmet_extent,
                                     output_fill_flag=True)
        # DEADBEEF - Need to find a way to test if both of these conditionals
        #   did not pass and pass logging debug message to user

        # Read in the GRIDMET NetCDF file
        # Immediately clip input arrays to save memory
        # Transpose arrays back to row x col
        logging.info('  Reading NetCDFs into memory')
        if eto_flag:
            logging.debug("    {}".format(eto_path))
            eto_nc_f = netCDF4.Dataset(eto_path, 'r')
            eto_nc = eto_nc_f.variables[
                gridmet_band_dict['eto']][:, row_a:row_b, col_a:col_b].copy()
            eto_nc = np.flip(eto_nc, 1)
            eto_nc_f.close()
            del eto_nc_f
        if etr_flag:
            logging.debug("    {}".format(etr_path))
            etr_nc_f = netCDF4.Dataset(etr_path, 'r')
            etr_nc = etr_nc_f.variables[
                gridmet_band_dict['etr']][:, row_a:row_b, col_a:col_b].copy()
            etr_nc = np.flip(etr_nc, 1)
            etr_nc_f.close()
            del etr_nc_f

        # A numpy array is returned when slicing a masked array
        #   if there are no masked pixels
        # This is a hack to force the numpy array back to a masked array
        # For now assume all arrays need to be converted
        if eto_flag and type(eto_nc) != np.ma.core.MaskedArray:
            eto_nc = np.ma.core.MaskedArray(eto_nc,
                                            np.zeros(eto_nc.shape, dtype=bool))
        if etr_flag and type(etr_nc) != np.ma.core.MaskedArray:
            etr_nc = np.ma.core.MaskedArray(etr_nc,
                                            np.zeros(etr_nc.shape, dtype=bool))

        # Check all valid dates in the year
        year_dates = _utils.date_range(dt.datetime(year_int, 1, 1),
                                       dt.datetime(year_int + 1, 1, 1))
        for date_dt in year_dates:
            if start_dt is not None and date_dt < start_dt:
                logging.debug('  {} - before start date, skipping'.format(
                    date_dt.date()))
                continue
            elif end_dt is not None and date_dt > end_dt:
                logging.debug('  {} - after end date, skipping'.format(
                    date_dt.date()))
                continue
            else:
                logging.info('  {}'.format(date_dt.date()))

            doy = int(date_dt.strftime('%j'))
            doy_i = range(1, year_days + 1).index(doy)

            if eto_flag:
                # Arrays are being read as masked array with a fill value of -9999
                # Convert to basic numpy array arrays with nan values
                try:
                    eto_ma = eto_nc[doy_i, :, :]
                except IndexError:
                    logging.info('    date not in netcdf, skipping')
                    continue
                eto_array = eto_ma.data.astype(np.float32)
                eto_nodata = float(eto_ma.fill_value)
                eto_array[eto_array == eto_nodata] = np.nan

                # Since inputs are netcdf, need to create GDAL raster
                #   datasets in order to use gdal_common functions
                # Create an in memory dataset of the full ETo array
                eto_ds = drigo.array_to_mem_ds(eto_array,
                                               output_geo=gridmet_geo,
                                               output_proj=gridmet_proj)

                # Then extract the subset from the in memory dataset
                eto_array = drigo.raster_ds_to_array(
                    eto_ds, 1, mask_extent=gridmet_extent, return_nodata=False)

                # Save
                drigo.array_to_comp_raster(eto_array.astype(np.float32),
                                           eto_raster,
                                           band=doy,
                                           stats_flag=False)
                # drigo.array_to_raster(
                #     eto_array.astype(np.float32), eto_raster,
                #     output_geo=gridmet_geo, output_proj=gridmet_proj,
                #     stats_flag=stats_flag)

                # Cleanup
                del eto_ds, eto_array

            if etr_flag:
                try:
                    etr_ma = etr_nc[doy_i, :, :]
                except IndexError:
                    logging.info('    date not in netcdf, skipping')
                    continue
                etr_array = etr_ma.data.astype(np.float32)
                etr_nodata = float(etr_ma.fill_value)
                etr_array[etr_array == etr_nodata] = np.nan
                etr_ds = drigo.array_to_mem_ds(etr_array,
                                               output_geo=gridmet_geo,
                                               output_proj=gridmet_proj)
                etr_array = drigo.raster_ds_to_array(
                    etr_ds, 1, mask_extent=gridmet_extent, return_nodata=False)
                drigo.array_to_comp_raster(etr_array.astype(np.float32),
                                           etr_raster,
                                           band=doy,
                                           stats_flag=False)
                # drigo.array_to_raster(
                #     etr_array.astype(np.float32), etr_raster,
                #     output_geo=gridmet_geo, output_proj=gridmet_proj,
                #     stats_flag=stats_flag)
                del etr_ds, etr_array

        if stats_flag and eto_flag:
            drigo.raster_statistics(eto_raster)
        if stats_flag and etr_flag:
            drigo.raster_statistics(etr_raster)

        # DEADBEEF - Code for computing ETo/ETr from the component variables
        # # Build input file path
        # tmin_path = os.path.join(netcdf_ws, 'tmmn_{}.nc'.format(year_str))
        # tmax_path = os.path.join(netcdf_ws, 'tmmx_{}.nc'.format(year_str))
        # sph_path = os.path.join(netcdf_ws, 'sph_{}.nc'.format(year_str))
        # rs_path = os.path.join(netcdf_ws, 'srad_{}.nc'.format(year_str))
        # wind_path = os.path.join(netcdf_ws, 'vs_{}.nc'.format(year_str))
        # # Check that all input files are present
        # missing_flag = False
        # for input_path in [tmin_path, tmax_path, sph_path,
        #                    rs_path, wind_path]:
        #     if not os.path.isfile(input_path):
        #         logging.debug('  Input NetCDF doesn\'t exist\n    {}'.format(
        #             input_path))
        #         missing_flag = True
        # if missing_flag:
        #     logging.debug('  skipping')
        #     continue
        #
        # # Create a single raster for each year with 365 bands
        # # Each day will be stored in a separate band
        # etr_raster = os.path.join(etr_ws, etr_fmt.format(year_str))
        # eto_raster = os.path.join(eto_ws, eto_fmt.format(year_str))
        # if etr_flag and (overwrite_flag or not os.path.isfile(etr_raster)):
        #     logging.debug('  {}'.format(etr_raster))
        #     drigo.build_empty_raster(
        #         etr_raster, band_cnt=366, output_dtype=np.float32,
        #         output_proj=gridmet_proj, output_cs=gridmet_cs,
        #         output_extent=gridmet_extent, output_fill_flag=True)
        # if eto_flag and (overwrite_flag or not os.path.isfile(eto_raster)):
        #     logging.debug('  {}'.format(eto_raster))
        #     drigo.build_empty_raster(
        #         eto_raster, band_cnt=366, output_dtype=np.float32,
        #         output_proj=gridmet_proj, output_cs=gridmet_cs,
        #         output_extent=gridmet_extent, output_fill_flag=True)
        # # DEADBEEF - Need to find a way to test if both of these conditionals
        # #   did not pass and pass logging debug message to user
        #
        # # Read in the GRIDMET NetCDF file
        # # Immediately clip input arrays to save memory
        # # Transpose arrays back to row x col
        # logging.info('  Reading NetCDFs into memory')
        # logging.debug("    {}".format(tmin_path))
        # tmin_nc_f = netCDF4.Dataset(tmin_path, 'r')
        # tmin_nc = tmin_nc_f.variables[gridmet_band_dict['tmmn']][
        #     :, row_a: row_b, col_a: col_b].copy()
        # tmin_nc = np.flip(tmin_nc, 1)
        # tmin_nc_f.close()
        # del tmin_nc_f
        #
        # logging.debug("    {}".format(tmax_path))
        # tmax_nc_f = netCDF4.Dataset(tmax_path, 'r')
        # tmax_nc = tmax_nc_f.variables[gridmet_band_dict['tmmx']][
        #     :, row_a: row_b, col_a: col_b].copy()
        # tmax_nc = np.flip(tmax_nc, 1)
        # tmax_nc_f.close()
        # del tmax_nc_f
        #
        # logging.debug("    {}".format(sph_path))
        # sph_nc_f = netCDF4.Dataset(sph_path, 'r')
        # sph_nc = sph_nc_f.variables[gridmet_band_dict['sph']][
        #     :, row_a: row_b, col_a: col_b].copy()
        # sph_nc = np.flip(sph_nc, 1)
        # sph_nc_f.close()
        # del sph_nc_f
        #
        # logging.debug("    {}".format(rs_path))
        # rs_nc_f = netCDF4.Dataset(rs_path, 'r')
        # rs_nc = rs_nc_f.variables[gridmet_band_dict['srad']][
        #     :, row_a: row_b, col_a: col_b].copy()
        # rs_nc = np.flip(rs_nc, 1)
        # rs_nc_f.close()
        # del rs_nc_f
        #
        # logging.debug("    {}".format(wind_path))
        # wind_nc_f = netCDF4.Dataset(wind_path, 'r')
        # wind_nc = wind_nc_f.variables[gridmet_band_dict['vs']][
        #     :, row_a: row_b, col_a: col_b].copy()
        # wind_nc = np.flip(wind_nc, 1)
        # wind_nc_f.close()
        # del wind_nc_f
        #
        # # A numpy array is returned when slicing a masked array
        # #   if there are no masked pixels
        # # This is a hack to force the numpy array back to a masked array
        # # For now assume all arrays need to be converted
        # if type(tmax_nc) != np.ma.core.MaskedArray:
        #     tmax_nc = np.ma.core.MaskedArray(
        #         tmax_nc, np.zeros(tmax_nc.shape, dtype=bool))
        # if type(sph_nc) != np.ma.core.MaskedArray:
        #     sph_nc = np.ma.core.MaskedArray(
        #         sph_nc, np.zeros(sph_nc.shape, dtype=bool))
        # if type(rs_nc) != np.ma.core.MaskedArray:
        #     rs_nc = np.ma.core.MaskedArray(
        #         rs_nc, np.zeros(rs_nc.shape, dtype=bool))
        # if type(wind_nc) != np.ma.core.MaskedArray:
        #     wind_nc = np.ma.core.MaskedArray(
        #         wind_nc, np.zeros(wind_nc.shape, dtype=bool))
        #
        # # Check all valid dates in the year
        # year_dates = _utils.date_range(
        #     dt.datetime(year_int, 1, 1), dt.datetime(year_int + 1, 1, 1))
        # for date_dt in year_dates:
        #     if start_dt is not None and date_dt < start_dt:
        #         logging.debug('  {} - before start date, skipping'.format(
        #             date_dt.date()))
        #         continue
        #     elif end_dt is not None and date_dt > end_dt:
        #         logging.debug('  {} - after end date, skipping'.format(
        #             date_dt.date()))
        #         continue
        #     else:
        #         logging.info('  {}'.format(date_dt.date()))
        #
        #     doy = int(date_dt.strftime('%j'))
        #     doy_i = range(1, year_days + 1).index(doy)
        #
        #     # Arrays are being read as masked array with a fill value of -9999
        #     # Convert to basic numpy array arrays with nan values
        #     try:
        #         tmin_ma = tmin_nc[doy_i, :, :]
        #     except IndexError:
        #         logging.info('    date not in netcdf, skipping')
        #         continue
        #     tmin_array = tmin_ma.data.astype(np.float32)
        #     tmin_nodata = float(tmin_ma.fill_value)
        #     tmin_array[tmin_array == tmin_nodata] = np.nan
        #
        #     try:
        #         tmax_ma = tmax_nc[doy_i, :, :]
        #     except IndexError:
        #         logging.info('    date not in netcdf, skipping')
        #         continue
        #     tmax_array = tmax_ma.data.astype(np.float32)
        #     tmax_nodata = float(tmax_ma.fill_value)
        #     tmax_array[tmax_array == tmax_nodata] = np.nan
        #
        #     try:
        #         sph_ma = sph_nc[doy_i, :, :]
        #     except IndexError:
        #         logging.info('    date not in netcdf, skipping')
        #         continue
        #     sph_array = sph_ma.data.astype(np.float32)
        #     sph_nodata = float(sph_ma.fill_value)
        #     sph_array[sph_array == sph_nodata] = np.nan
        #
        #     try:
        #         rs_ma = rs_nc[doy_i, :, :]
        #     except IndexError:
        #         logging.info('    date not in netcdf, skipping')
        #         continue
        #     rs_array = rs_ma.data.astype(np.float32)
        #     rs_nodata = float(rs_ma.fill_value)
        #     rs_array[rs_array == rs_nodata] = np.nan
        #
        #     try:
        #         wind_ma = wind_nc[doy_i, :, :]
        #     except IndexError:
        #         logging.info('    date not in netcdf, skipping')
        #         continue
        #     wind_array = wind_ma.data.astype(np.float32)
        #     wind_nodata = float(wind_ma.fill_value)
        #     wind_array[wind_array == wind_nodata] = np.nan
        #     del tmin_ma, tmax_ma, sph_ma, rs_ma, wind_ma
        #
        #     # Since inputs are netcdf, need to create GDAL raster
        #     #   datasets in order to use gdal_common functions
        #     # Create an in memory dataset of the full ETo array
        #     tmin_ds = drigo.array_to_mem_ds(
        #         tmin_array, output_geo=gridmet_geo,
        #         # tmin_array, output_geo=gridmet_full_geo,
        #         output_proj=gridmet_proj)
        #     tmax_ds = drigo.array_to_mem_ds(
        #         tmax_array, output_geo=gridmet_geo,
        #         # tmax_array, output_geo=gridmet_full_geo,
        #         output_proj=gridmet_proj)
        #     sph_ds = drigo.array_to_mem_ds(
        #         sph_array, output_geo=gridmet_geo,
        #         # sph_array, output_geo=gridmet_full_geo,
        #         output_proj=gridmet_proj)
        #     rs_ds = drigo.array_to_mem_ds(
        #         rs_array, output_geo=gridmet_geo,
        #         # rs_array, output_geo=gridmet_full_geo,
        #         output_proj=gridmet_proj)
        #     wind_ds = drigo.array_to_mem_ds(
        #         wind_array, output_geo=gridmet_geo,
        #         # wind_array, output_geo=gridmet_full_geo,
        #         output_proj=gridmet_proj)
        #
        #     # Then extract the subset from the in memory dataset
        #     tmin_array = drigo.raster_ds_to_array(
        #         tmin_ds, 1, mask_extent=gridmet_extent, return_nodata=False)
        #     tmax_array = drigo.raster_ds_to_array(
        #         tmax_ds, 1, mask_extent=gridmet_extent, return_nodata=False)
        #     sph_array = drigo.raster_ds_to_array(
        #         sph_ds, 1, mask_extent=gridmet_extent, return_nodata=False)
        #     rs_array = drigo.raster_ds_to_array(
        #         rs_ds, 1, mask_extent=gridmet_extent, return_nodata=False)
        #     wind_array = drigo.raster_ds_to_array(
        #         wind_ds, 1, mask_extent=gridmet_extent, return_nodata=False)
        #     del tmin_ds, tmax_ds, sph_ds, rs_ds, wind_ds
        #
        #     # Adjust units
        #     tmin_array -= 273.15
        #     tmax_array -= 273.15
        #     rs_array *= 0.0864
        #
        #     # Compute vapor pressure from specific humidity
        #     pair_array = refet.calcs._air_pressure(elev=elev_array)
        #     ea_array = refet.calcs._actual_vapor_pressure(
        #         q=sph_array, pair=pair_array)
        #
        #     # ETr/ETo
        #     refet_obj = refet.Daily(
        #         tmin=tmin_array, tmax=tmax_array, ea=ea_array, rs=rs_array,
        #         uz=wind_array, zw=zw, elev=elev_array, lat=lat_array, doy=doy,
        #         method='asce')
        #     if etr_flag:
        #         etr_array = refet_obj.etr()
        #     if eto_flag:
        #         eto_array = refet_obj.eto()
        #
        #     # Cleanup
        #     del tmin_array, tmax_array, sph_array, rs_array, wind_array
        #     del pair_array, ea_array
        #
        #     # Save the projected array as 32-bit floats
        #     if etr_flag:
        #         drigo.array_to_comp_raster(
        #             etr_array.astype(np.float32), etr_raster,
        #             band=doy, stats_flag=False)
        #         # drigo.array_to_raster(
        #         #     etr_array.astype(np.float32), etr_raster,
        #         #     output_geo=gridmet_geo, output_proj=gridmet_proj,
        #         #     stats_flag=stats_flag)
        #         del etr_array
        #     if eto_flag:
        #         drigo.array_to_comp_raster(
        #             eto_array.astype(np.float32), eto_raster,
        #             band=doy, stats_flag=False)
        #         # drigo.array_to_raster(
        #         #     eto_array.astype(np.float32), eto_raster,
        #         #     output_geo=gridmet_geo, output_proj=gridmet_proj,
        #         #     stats_flag=stats_flag)
        #         del eto_array
        #
        # del tmin_nc
        # del tmax_nc
        # del sph_nc
        # del rs_nc
        # del wind_nc
        #
        # if stats_flag and etr_flag:
        #     drigo.raster_statistics(etr_raster)
        # if stats_flag and eto_flag:
        #     drigo.raster_statistics(eto_raster)

    logging.debug('\nScript Complete')
Beispiel #5
0
def main(start_dt,
         end_dt,
         netcdf_ws,
         ancillary_ws,
         output_ws,
         extent_path=None,
         output_extent=None,
         stats_flag=True,
         overwrite_flag=False):
    """Extract DAYMET temperature

    Parameters
    ----------
    start_dt : datetime
        Start date.
    end_dt : datetime
        End date.
    netcdf_ws : str
        Folder of DAYMET netcdf files.
    ancillary_ws : str
        Folder of ancillary rasters.
    output_ws : str
        Folder of output rasters.
    extent_path : str, optional
        File path defining the output extent.
    output_extent : list, optional
        Decimal degrees values defining output extent.
    stats_flag : bool, optional
        If True, compute raster statistics (the default is True).
    overwrite_flag : bool, optional
        If True, overwrite existing files (the default is False).

    Returns
    -------
    None

    """
    logging.info('\nExtracting DAYMET vapor pressure')
    logging.debug('  Start date: {}'.format(start_dt))
    logging.debug('  End date:   {}'.format(end_dt))

    # Get DAYMET spatial reference from an ancillary raster
    mask_raster = os.path.join(ancillary_ws, 'daymet_mask.img')
    elev_raster = os.path.join(ancillary_ws, 'daymet_elev.img')

    daymet_re = re.compile('daymet_v3_(?P<VAR>\w+)_(?P<YEAR>\d{4})_na.nc4$')

    # DAYMET band name dictionary
    # daymet_band_dict = dict()
    # daymet_band_dict['prcp'] = 'precipitation_amount'
    # daymet_band_dict['srad'] = 'surface_downwelling_shortwave_flux_in_air'
    # daymet_band_dict['sph'] = 'specific_humidity'
    # daymet_band_dict['tmin'] = 'air_temperature'
    # daymet_band_dict['tmax'] = 'air_temperature'

    # Get extent/geo from mask raster
    daymet_ds = gdal.Open(mask_raster)
    daymet_osr = drigo.raster_ds_osr(daymet_ds)
    daymet_proj = drigo.osr_proj(daymet_osr)
    daymet_cs = drigo.raster_ds_cellsize(daymet_ds, x_only=True)
    daymet_extent = drigo.raster_ds_extent(daymet_ds)
    daymet_geo = daymet_extent.geo(daymet_cs)
    daymet_x, daymet_y = daymet_extent.origin()
    daymet_ds = None
    logging.debug('  Projection: {}'.format(daymet_proj))
    logging.debug('  Cellsize: {}'.format(daymet_cs))
    logging.debug('  Geo: {}'.format(daymet_geo))
    logging.debug('  Extent: {}'.format(daymet_extent))
    logging.debug('  Origin: {} {}'.format(daymet_x, daymet_y))

    # Subset data to a smaller extent
    if output_extent is not None:
        logging.info('\nComputing subset extent & geo')
        logging.debug('  Extent: {}'.format(output_extent))
        # Assume input extent is in decimal degrees
        output_extent = drigo.project_extent(drigo.Extent(output_extent),
                                             drigo.epsg_osr(4326), daymet_osr,
                                             0.001)
        output_extent = drigo.intersect_extents([daymet_extent, output_extent])
        output_extent.adjust_to_snap('EXPAND', daymet_x, daymet_y, daymet_cs)
        output_geo = output_extent.geo(daymet_cs)
        logging.debug('  Geo: {}'.format(output_geo))
        logging.debug('  Extent: {}'.format(output_extent))
    elif extent_path is not None:
        logging.info('\nComputing subset extent & geo')
        if extent_path.lower().endswith('.shp'):
            output_extent = drigo.feature_path_extent(extent_path)
            extent_osr = drigo.feature_path_osr(extent_path)
            extent_cs = None
        else:
            output_extent = drigo.raster_path_extent(extent_path)
            extent_osr = drigo.raster_path_osr(extent_path)
            extent_cs = drigo.raster_path_cellsize(extent_path, x_only=True)
        output_extent = drigo.project_extent(output_extent, extent_osr,
                                             daymet_osr, extent_cs)
        output_extent = drigo.intersect_extents([daymet_extent, output_extent])
        output_extent.adjust_to_snap('EXPAND', daymet_x, daymet_y, daymet_cs)
        output_geo = output_extent.geo(daymet_cs)
        logging.debug('  Geo: {}'.format(output_geo))
        logging.debug('  Extent: {}'.format(output_extent))
    else:
        output_extent = daymet_extent.copy()
        output_geo = daymet_geo[:]
    # output_shape = output_extent.shape(cs=daymet_cs)
    xi, yi = drigo.array_geo_offsets(daymet_geo, output_geo, daymet_cs)
    output_rows, output_cols = output_extent.shape(daymet_cs)
    logging.debug('  Shape: {} {}'.format(output_rows, output_cols))
    logging.debug('  Offsets: {} {} (x y)'.format(xi, yi))

    # Read the elevation array
    elev_array = drigo.raster_to_array(elev_raster,
                                       mask_extent=output_extent,
                                       return_nodata=False)
    pair_array = refet.calcs._air_pressure_func(elev_array)
    del elev_array

    # Process each variable
    input_var = 'vp'
    output_var = 'ea'
    logging.info("\nVariable: {}".format(input_var))

    # Build output folder
    var_ws = os.path.join(output_ws, output_var)
    if not os.path.isdir(var_ws):
        os.makedirs(var_ws)

    # Process each file in the input workspace
    for input_name in sorted(os.listdir(netcdf_ws)):
        logging.debug("{}".format(input_name))
        input_match = daymet_re.match(input_name)
        if not input_match:
            logging.debug('  Regular expression didn\'t match, skipping')
            continue
        elif input_match.group('VAR') != input_var:
            logging.debug('  Variable didn\'t match, skipping')
            continue
        year_str = input_match.group('YEAR')
        logging.info("  Year: {}".format(year_str))
        year_int = int(year_str)
        year_days = int(dt.datetime(year_int, 12, 31).strftime('%j'))
        if start_dt is not None and year_int < start_dt.year:
            logging.debug('    Before start date, skipping')
            continue
        elif end_dt is not None and year_int > end_dt.year:
            logging.debug('    After end date, skipping')
            continue

        # Build input file path
        input_raster = os.path.join(netcdf_ws, input_name)
        # if not os.path.isfile(input_raster):
        #     logging.debug(
        #         '    Input raster doesn\'t exist, skipping    {}'.format(
        #             input_raster))
        #     continue

        # Build output folder
        output_year_ws = os.path.join(var_ws, year_str)
        if not os.path.isdir(output_year_ws):
            os.makedirs(output_year_ws)

        # Read in the DAYMET NetCDF file
        input_nc_f = netCDF4.Dataset(input_raster, 'r')
        # logging.debug(input_nc_f.variables)

        # Check all valid dates in the year
        year_dates = _utils.date_range(dt.datetime(year_int, 1, 1),
                                       dt.datetime(year_int + 1, 1, 1))
        for date_dt in year_dates:
            if start_dt is not None and date_dt < start_dt:
                logging.debug('  {} - before start date, skipping'.format(
                    date_dt.date()))
                continue
            elif end_dt is not None and date_dt > end_dt:
                logging.debug('  {} - after end date, skipping'.format(
                    date_dt.date()))
                continue
            else:
                logging.info('  {}'.format(date_dt.date()))

            output_path = os.path.join(
                output_year_ws,
                '{}_{}_daymet.img'.format(output_var,
                                          date_dt.strftime('%Y%m%d')))
            if os.path.isfile(output_path):
                logging.debug('    {}'.format(output_path))
                if not overwrite_flag:
                    logging.debug('    File already exists, skipping')
                    continue
                else:
                    logging.debug('    File already exists, removing existing')
                    os.remove(output_path)

            doy = int(date_dt.strftime('%j'))
            doy_i = range(1, year_days + 1).index(doy)

            # Arrays are being read as masked array with a fill value of -9999
            # Convert to basic numpy array arrays with nan values
            try:
                input_ma = input_nc_f.variables[input_var][doy_i,
                                                           yi:yi + output_rows,
                                                           xi:xi + output_cols]
            except IndexError:
                logging.info('    date not in netcdf, skipping')
                continue
            input_nodata = float(input_ma.fill_value)
            sph_array = input_ma.data.astype(np.float32)
            sph_array[sph_array == input_nodata] = np.nan

            # Compute ea [kPa] from specific humidity [kg/kg]
            ea_array = (sph_array * pair_array) / (0.622 + 0.378 * sph_array)

            # Save the array as 32-bit floats
            drigo.array_to_raster(ea_array.astype(np.float32),
                                  output_path,
                                  output_geo=output_geo,
                                  output_proj=daymet_proj,
                                  stats_flag=stats_flag)

            del input_ma, ea_array, sph_array
        input_nc_f.close()
        del input_nc_f

    logging.debug('\nScript Complete')