def main(ini_path, overwrite_flag=True): """Generate Beamer ETg summary tables Args: ini_path (str): overwrite_flag (bool): if True, overwrite existing tables Default is True (for now) """ logging.info('\nGenerate Beamer ETg summary tables') # # Eventually get from INI (like ini['BEAMER']['landsat_products']) # daily_fields = [ # 'ZONE_NAME', 'ZONE_FID', 'DATE', 'SCENE_ID', 'PLATFORM', 'PATH', 'ROW', # 'YEAR', 'MONTH', 'DAY', 'DOY', 'WATER_YEAR', # 'PIXEL_COUNT', 'ETSTAR_COUNT', # 'NDVI_TOA', 'NDWI_TOA', 'ALBEDO_SUR', 'TS', 'EVI_SUR', 'ETSTAR_MEAN', # 'ETG_MEAN', 'ETG_LPI', 'ETG_UPI', 'ETG_LCI', 'ETG_UCI', # 'ET_MEAN', 'ET_LPI', 'ET_UPI', 'ET_LCI', 'ET_UCI', # 'ETO', 'PPT'] # annual_fields = [ # 'SCENE_COUNT', 'PIXEL_COUNT', 'ETSTAR_COUNT', # 'NDVI_TOA', 'NDWI_TOA', 'ALBEDO_SUR', 'TS', # 'EVI_SUR_MEAN', 'EVI_SUR_MEDIAN', 'EVI_SUR_MIN', 'EVI_SUR_MAX', # 'ETSTAR_MEAN', # 'ETG_MEAN', 'ETG_LPI', 'ETG_UPI', 'ETG_LCI', 'ETG_UCI', # 'ET_MEAN', 'ET_LPI', 'ET_UPI', 'ET_LCI', 'ET_UCI', # 'ETO', 'PPT'] # For unit conversion eto_fields = [ 'ETG_MEAN', 'ETG_LPI', 'ETG_UPI', 'ETG_LCI', 'ETG_UCI', 'ET_MEAN', 'ET_LPI', 'ET_UPI', 'ET_LCI', 'ET_UCI', 'ETO' ] ppt_fields = ['PPT'] # Read config file ini = inputs.read(ini_path) inputs.parse_section(ini, section='INPUTS') inputs.parse_section(ini, section='ZONAL_STATS') inputs.parse_section(ini, section='BEAMER') inputs.parse_section(ini, section='SUMMARY') inputs.parse_section(ini, section='TABLES') # Hardcode GRIDMET month range to the water year ini['SUMMARY']['gridmet_start_month'] = 10 ini['SUMMARY']['gridmet_end_month'] = 9 # Output paths output_daily_path = os.path.join( ini['SUMMARY']['output_ws'], ini['BEAMER']['output_name'].replace('.csv', '_daily.xlsx')) output_annual_path = os.path.join( ini['SUMMARY']['output_ws'], ini['BEAMER']['output_name'].replace('.csv', '_annual.xlsx')) # Check if files already exist if overwrite_flag: if os.path.isfile(output_daily_path): os.remove(output_daily_path) if os.path.isfile(output_annual_path): os.remove(output_annual_path) else: if (os.path.isfile(output_daily_path) and os.path.isfile(output_annual_path)): logging.info('\nOutput files already exist and ' 'overwrite is False, exiting') return True # Start/end year year_list = list( range(ini['INPUTS']['start_year'], ini['INPUTS']['end_year'] + 1)) month_list = list( utils.wrapped_range(ini['INPUTS']['start_month'], ini['INPUTS']['end_month'], 1, 12)) doy_list = list( utils.wrapped_range(ini['INPUTS']['start_doy'], ini['INPUTS']['end_doy'], 1, 366)) # GRIDMET month range (default to water year) gridmet_start_month = ini['SUMMARY']['gridmet_start_month'] gridmet_end_month = ini['SUMMARY']['gridmet_end_month'] gridmet_months = list( utils.month_range(gridmet_start_month, gridmet_end_month)) logging.info('\nGridmet months: {}'.format(', '.join( map(str, gridmet_months)))) # Get ee features from shapefile zone_geom_list = gdc.shapefile_2_geom_list_func( ini['INPUTS']['zone_shp_path'], zone_field=ini['INPUTS']['zone_field'], reverse_flag=False) # Filter features by FID before merging geometries if ini['INPUTS']['fid_keep_list']: zone_geom_list = [ zone_obj for zone_obj in zone_geom_list if zone_obj[0] in ini['INPUTS']['fid_keep_list'] ] if ini['INPUTS']['fid_skip_list']: zone_geom_list = [ zone_obj for zone_obj in zone_geom_list if zone_obj[0] not in ini['INPUTS']['fid_skip_list'] ] # # Filter features by FID before merging geometries # if ini['INPUTS']['fid_keep_list']: # landsat_df = landsat_df[landsat_df['ZONE_FID'].isin( # ini['INPUTS']['fid_keep_list'])] # if ini['INPUTS']['fid_skip_list']: # landsat_df = landsat_df[~landsat_df['ZONE_FID'].isin( # ini['INPUTS']['fid_skip_list'])] logging.info('\nProcessing zones') zone_df_dict = {} for zone_fid, zone_name, zone_json in zone_geom_list: zone_name = zone_name.replace(' ', '_') logging.info('ZONE: {} (FID: {})'.format(zone_name, zone_fid)) zone_stats_ws = os.path.join(ini['ZONAL_STATS']['output_ws'], zone_name) if not os.path.isdir(zone_stats_ws): logging.debug( ' Folder {} does not exist, skipping'.format(zone_stats_ws)) continue # Input paths landsat_daily_path = os.path.join( zone_stats_ws, '{}_landsat_daily.csv'.format(zone_name)) gridmet_daily_path = os.path.join( zone_stats_ws, '{}_gridmet_daily.csv'.format(zone_name)) gridmet_monthly_path = os.path.join( zone_stats_ws, '{}_gridmet_monthly.csv'.format(zone_name)) if not os.path.isfile(landsat_daily_path): logging.error(' Landsat daily CSV does not exist, skipping zone') continue elif (not os.path.isfile(gridmet_daily_path) and not os.path.isfile(gridmet_monthly_path)): logging.error( ' GRIDMET daily or monthly CSV does not exist, skipping zone') continue # DEADBEEF - Eventually support generating only Landsat figures # logging.error( # ' GRIDMET daily and/or monthly CSV files do not exist.\n' # ' ETo and PPT will not be processed.') logging.debug(' Reading Landsat CSV') landsat_df = pd.read_csv(landsat_daily_path) logging.debug(' Filtering Landsat dataframe') landsat_df = landsat_df[landsat_df['PIXEL_COUNT'] > 0] # QA field should have been written in zonal stats code # Eventually this block can be removed if 'QA' not in landsat_df.columns.values: landsat_df['QA'] = 0 # # This assumes that there are L5/L8 images in the dataframe # if not landsat_df.empty: # max_pixel_count = max(landsat_df['PIXEL_COUNT']) # else: # max_pixel_count = 0 if year_list: landsat_df = landsat_df[landsat_df['YEAR'].isin(year_list)] if month_list: landsat_df = landsat_df[landsat_df['MONTH'].isin(month_list)] if doy_list: landsat_df = landsat_df[landsat_df['DOY'].isin(doy_list)] # Assume the default is for these to be True and only filter if False if not ini['INPUTS']['landsat4_flag']: landsat_df = landsat_df[landsat_df['PLATFORM'] != 'LT04'] if not ini['INPUTS']['landsat5_flag']: landsat_df = landsat_df[landsat_df['PLATFORM'] != 'LT05'] if not ini['INPUTS']['landsat7_flag']: landsat_df = landsat_df[landsat_df['PLATFORM'] != 'LE07'] if not ini['INPUTS']['landsat8_flag']: landsat_df = landsat_df[landsat_df['PLATFORM'] != 'LC08'] if ini['INPUTS']['path_keep_list']: landsat_df = landsat_df[landsat_df['PATH'].isin( ini['INPUTS']['path_keep_list'])] if (ini['INPUTS']['row_keep_list'] and ini['INPUTS']['row_keep_list'] != ['XXX']): landsat_df = landsat_df[landsat_df['ROW'].isin( ini['INPUTS']['row_keep_list'])] if ini['INPUTS']['scene_id_keep_list']: # Replace XXX with primary ROW value for checking skip list SCENE_ID scene_id_df = pd.Series([ s.replace('XXX', '{:03d}'.format(int(r))) for s, r in zip(landsat_df['SCENE_ID'], landsat_df['ROW']) ]) landsat_df = landsat_df[scene_id_df.isin( ini['INPUTS']['scene_id_keep_list']).values] # This won't work: SCENE_ID have XXX but scene_id_skip_list don't # landsat_df = landsat_df[landsat_df['SCENE_ID'].isin( # ini['INPUTS']['scene_id_keep_list'])] if ini['INPUTS']['scene_id_skip_list']: # Replace XXX with primary ROW value for checking skip list SCENE_ID scene_id_df = pd.Series([ s.replace('XXX', '{:03d}'.format(int(r))) for s, r in zip(landsat_df['SCENE_ID'], landsat_df['ROW']) ]) landsat_df = landsat_df[np.logical_not( scene_id_df.isin(ini['INPUTS']['scene_id_skip_list']).values)] # This won't work: SCENE_ID have XXX but scene_id_skip_list don't # landsat_df = landsat_df[np.logical_not(landsat_df['SCENE_ID'].isin( # ini['INPUTS']['scene_id_skip_list']))] # Filter by QA/QC value if ini['SUMMARY']['max_qa'] >= 0 and not landsat_df.empty: logging.debug(' Maximum QA: {0}'.format( ini['SUMMARY']['max_qa'])) landsat_df = landsat_df[ landsat_df['QA'] <= ini['SUMMARY']['max_qa']] # Filter by average cloud score if ini['SUMMARY']['max_cloud_score'] < 100 and not landsat_df.empty: logging.debug(' Maximum cloud score: {0}'.format( ini['SUMMARY']['max_cloud_score'])) landsat_df = landsat_df[ landsat_df['CLOUD_SCORE'] <= ini['SUMMARY']['max_cloud_score']] # Filter by Fmask percentage if ini['SUMMARY']['max_fmask_pct'] < 100 and not landsat_df.empty: landsat_df['FMASK_PCT'] = 100 * (landsat_df['FMASK_COUNT'] / landsat_df['FMASK_TOTAL']) logging.debug(' Max Fmask threshold: {}'.format( ini['SUMMARY']['max_fmask_pct'])) landsat_df = landsat_df[ landsat_df['FMASK_PCT'] <= ini['SUMMARY']['max_fmask_pct']] # Filter low count SLC-off images if ini['SUMMARY']['min_slc_off_pct'] > 0 and not landsat_df.empty: logging.debug(' Mininum SLC-off threshold: {}%'.format( ini['SUMMARY']['min_slc_off_pct'])) # logging.debug(' Maximum pixel count: {}'.format( # max_pixel_count)) slc_off_mask = ((landsat_df['PLATFORM'] == 'LE07') & ((landsat_df['YEAR'] >= 2004) | ((landsat_df['YEAR'] == 2003) & (landsat_df['DOY'] > 151)))) slc_off_pct = 100 * (landsat_df['PIXEL_COUNT'] / landsat_df['PIXEL_TOTAL']) # slc_off_pct = 100 * (landsat_df['PIXEL_COUNT'] / max_pixel_count) landsat_df = landsat_df[( (slc_off_pct >= ini['SUMMARY']['min_slc_off_pct']) & slc_off_mask) | (~slc_off_mask)] if landsat_df.empty: logging.error( ' Empty Landsat dataframe after filtering, skipping zone') continue # Aggregate GRIDMET (to water year) if os.path.isfile(gridmet_monthly_path): logging.debug(' Reading montly GRIDMET CSV') gridmet_df = pd.read_csv(gridmet_monthly_path) elif os.path.isfile(gridmet_daily_path): logging.debug(' Reading daily GRIDMET CSV') gridmet_df = pd.read_csv(gridmet_daily_path) logging.debug(' Computing GRIDMET summaries') # Summarize GRIDMET for target months year if (gridmet_start_month in [10, 11, 12] and gridmet_end_month in [10, 11, 12]): month_mask = ((gridmet_df['MONTH'] >= gridmet_start_month) & (gridmet_df['MONTH'] <= gridmet_end_month)) gridmet_df.loc[month_mask, 'GROUP_YEAR'] = gridmet_df['YEAR'] + 1 elif (gridmet_start_month in [10, 11, 12] and gridmet_end_month not in [10, 11, 12]): month_mask = gridmet_df['MONTH'] >= gridmet_start_month gridmet_df.loc[month_mask, 'GROUP_YEAR'] = gridmet_df['YEAR'] + 1 month_mask = gridmet_df['MONTH'] <= gridmet_end_month gridmet_df.loc[month_mask, 'GROUP_YEAR'] = gridmet_df['YEAR'] else: month_mask = ((gridmet_df['MONTH'] >= gridmet_start_month) & (gridmet_df['MONTH'] <= gridmet_end_month)) gridmet_df.loc[month_mask, 'GROUP_YEAR'] = gridmet_df['YEAR'] # GROUP_YEAR for rows not in the GRIDMET month range will be NAN gridmet_df = gridmet_df[~pd.isnull(gridmet_df['GROUP_YEAR'])] if year_list: gridmet_df = gridmet_df[gridmet_df['GROUP_YEAR'].isin(year_list)] if gridmet_df.empty: logging.error( ' Empty GRIDMET dataframe after filtering by year') continue # Group GRIDMET data by user specified range (default is water year) gridmet_group_df = gridmet_df \ .groupby(['ZONE_NAME', 'ZONE_FID', 'GROUP_YEAR']) \ .agg({'ETO': np.sum, 'PPT': np.sum}) \ .reset_index() \ .sort_values(by='GROUP_YEAR') # .rename(columns={'ETO': 'ETO', 'PPT': 'PPT'}) \ # Rename wasn't working when chained... gridmet_group_df.rename(columns={'GROUP_YEAR': 'YEAR'}, inplace=True) gridmet_group_df['YEAR'] = gridmet_group_df['YEAR'].astype(int) # # Group GRIDMET data by month # gridmet_month_df = gridmet_df\ # .groupby(['ZONE_NAME', 'ZONE_FID', 'GROUP_YEAR', 'MONTH']) \ # .agg({'ETO': np.sum, 'PPT': np.sum}) \ # .reset_index() \ # .sort_values(by=['GROUP_YEAR', 'MONTH']) # gridmet_month_df.rename(columns={'GROUP_YEAR': 'YEAR'}, inplace=True) # # Rename monthly PPT columns # gridmet_month_df['MONTH'] = 'PPT_M' + gridmet_month_df['MONTH'].astype(str) # # Pivot rows up to separate columns # gridmet_month_df = gridmet_month_df.pivot_table( # 'PPT', ['ZONE_NAME', 'YEAR'], 'MONTH') # gridmet_month_df.reset_index(inplace=True) # columns = ['ZONE_NAME', 'YEAR'] + ['PPT_M{}'.format(m) for m in gridmet_months] # gridmet_month_df = gridmet_month_df[columns] # del gridmet_month_df.index.name # Merge Landsat and GRIDMET collections zone_df = landsat_df.merge(gridmet_group_df, on=['ZONE_NAME', 'ZONE_FID', 'YEAR']) if zone_df is None or zone_df.empty: logging.info(' Empty zone dataframe, not generating figures') continue # Compute ETg zone_df['ETG_MEAN'] = zone_df['ETSTAR_MEAN'] * (zone_df['ETO'] - zone_df['PPT']) zone_df['ETG_LPI'] = zone_df['ETSTAR_LPI'] * (zone_df['ETO'] - zone_df['PPT']) zone_df['ETG_UPI'] = zone_df['ETSTAR_UPI'] * (zone_df['ETO'] - zone_df['PPT']) zone_df['ETG_LCI'] = zone_df['ETSTAR_LCI'] * (zone_df['ETO'] - zone_df['PPT']) zone_df['ETG_UCI'] = zone_df['ETSTAR_UCI'] * (zone_df['ETO'] - zone_df['PPT']) # Compute ET zone_df['ET_MEAN'] = zone_df['ETG_MEAN'] + zone_df['PPT'] zone_df['ET_LPI'] = zone_df['ETG_LPI'] + zone_df['PPT'] zone_df['ET_UPI'] = zone_df['ETG_UPI'] + zone_df['PPT'] zone_df['ET_LCI'] = zone_df['ETG_LCI'] + zone_df['PPT'] zone_df['ET_UCI'] = zone_df['ETG_UCI'] + zone_df['PPT'] # Append zone dataframes zone_df_dict[zone_name] = zone_df # Export each zone to a separate tab if not os.path.isfile(output_daily_path): logging.info('\nWriting daily values to Excel') excel_f = ExcelWriter(output_daily_path) for zone_name, zone_df in sorted(zone_df_dict.items()): logging.info(' {}'.format(zone_name)) zone_df.to_excel(excel_f, zone_name, index=False, float_format='%.4f') # zone_df.to_excel(excel_f, zone_name, index=False) del zone_df excel_f.save() if not os.path.isfile(output_annual_path): logging.info('\nComputing annual summaries') annual_df = pd.concat(list(zone_df_dict.values())) \ .groupby(['ZONE_NAME', 'YEAR']) \ .agg({ 'PIXEL_COUNT': ['count', 'mean'], 'PIXEL_TOTAL': ['mean'], 'FMASK_COUNT': 'mean', 'FMASK_TOTAL': 'mean', 'CLOUD_SCORE': 'mean', 'ETSTAR_COUNT': 'mean', 'NDVI_TOA': 'mean', 'NDWI_TOA': 'mean', 'ALBEDO_SUR': 'mean', 'TS': 'mean', # 'EVI_SUR': 'mean', 'EVI_SUR': ['mean', 'median', 'min', 'max'], 'ETSTAR_MEAN': 'mean', 'ETG_MEAN': 'mean', 'ETG_LPI': 'mean', 'ETG_UPI': 'mean', 'ETG_LCI': 'mean', 'ETG_UCI': 'mean', 'ET_MEAN': 'mean', 'ET_LPI': 'mean', 'ET_UPI': 'mean', 'ET_LCI': 'mean', 'ET_UCI': 'mean', 'ETO': 'mean', 'PPT': 'mean' }) annual_df.columns = annual_df.columns.map('_'.join) annual_df = annual_df.rename(columns={ 'PIXEL_COUNT_count': 'SCENE_COUNT', 'PIXEL_COUNT_mean': 'PIXEL_COUNT' }) annual_df = annual_df.rename( columns={ 'EVI_SUR_mean': 'EVI_SUR_MEAN', 'EVI_SUR_median': 'EVI_SUR_MEDIAN', 'EVI_SUR_min': 'EVI_SUR_MIN', 'EVI_SUR_max': 'EVI_SUR_MAX' }) annual_df.rename(columns=lambda x: str(x).replace('_mean', ''), inplace=True) annual_df['SCENE_COUNT'] = annual_df['SCENE_COUNT'].astype(np.int) annual_df['PIXEL_COUNT'] = annual_df['PIXEL_COUNT'].astype(np.int) annual_df['PIXEL_TOTAL'] = annual_df['PIXEL_TOTAL'].astype(np.int) annual_df['FMASK_COUNT'] = annual_df['FMASK_COUNT'].astype(np.int) annual_df['FMASK_TOTAL'] = annual_df['FMASK_TOTAL'].astype(np.int) annual_df['ETSTAR_COUNT'] = annual_df['ETSTAR_COUNT'].astype(np.int) annual_df = annual_df.reset_index() # Convert ETo units if (ini['BEAMER']['eto_units'] == 'mm' and ini['TABLES']['eto_units'] == 'mm'): pass elif (ini['BEAMER']['eto_units'] == 'mm' and ini['TABLES']['eto_units'] == 'in'): annual_df[eto_fields] /= (25.4) elif (ini['BEAMER']['eto_units'] == 'mm' and ini['TABLES']['eto_units'] == 'ft'): annual_df[eto_fields] /= (12 * 25.4) else: logging.error( ('\nERROR: Input units {} and output units {} are not ' + 'currently supported, exiting').format( ini['BEAMER']['eto_units'], ini['TABLES']['eto_units'])) sys.exit() # Convert PPT units if (ini['BEAMER']['ppt_units'] == 'mm' and ini['TABLES']['ppt_units'] == 'mm'): pass elif (ini['BEAMER']['ppt_units'] == 'mm' and ini['TABLES']['ppt_units'] == 'in'): annual_df[ppt_fields] /= (25.4) elif (ini['BEAMER']['ppt_units'] == 'mm' and ini['TABLES']['ppt_units'] == 'ft'): annual_df[ppt_fields] /= (12 * 25.4) else: logging.error( ('\nERROR: Input units {} and output units {} are not ' + 'currently supported, exiting').format( ini['BEAMER']['ppt_units'], ini['TABLES']['ppt_units'])) sys.exit() logging.info('\nWriting annual values to Excel') excel_f = ExcelWriter(output_annual_path) for zone_name in sorted(zone_df_dict.keys()): logging.info(' {}'.format(zone_name)) zone_df = annual_df[annual_df['ZONE_NAME'] == zone_name] zone_df.to_excel(excel_f, zone_name, index=False, float_format='%.4f') del zone_df excel_f.save()
def main(ini_path, show_flag=False, overwrite_flag=True): """Generate Bokeh figures Bokeh issues: Adjust y range based on non-muted data https://stackoverflow.com/questions/43620837/how-to-get-bokeh-to-dynamically-adjust-y-range-when-panning Linked interactive legends so that there is only one legend for the gridplot Maybe hide or mute QA values above max (instead of filtering them in advance) Args: ini_path (str): show_flag (bool): if True, show the figures in the browser. Default is False. overwrite_flag (bool): if True, overwrite existing tables. Default is True (for now) """ logging.info('\nGenerate interactive timeseries figures') # Eventually read from INI plot_var_list = ['NDVI_TOA', 'ALBEDO_SUR', 'TS', 'NDWI_TOA', 'EVI_SUR'] # plot_var_list = [ # 'NDVI_TOA', 'ALBEDO_SUR', 'TS', 'NDWI_TOA', # 'CLOUD_SCORE', 'FMASK_PCT'] output_folder = 'figures' # Read config file ini = inputs.read(ini_path) inputs.parse_section(ini, section='INPUTS') inputs.parse_section(ini, section='ZONAL_STATS') inputs.parse_section(ini, section='SUMMARY') inputs.parse_section(ini, section='FIGURES') inputs.parse_section(ini, section='BEAMER') # Output paths output_ws = os.path.join(ini['SUMMARY']['output_ws'], output_folder) if not os.path.isdir(output_ws): os.makedirs(output_ws) # Start/end year year_list = list( range(ini['INPUTS']['start_year'], ini['INPUTS']['end_year'] + 1)) month_list = list( utils.wrapped_range(ini['INPUTS']['start_month'], ini['INPUTS']['end_month'], 1, 12)) doy_list = list( utils.wrapped_range(ini['INPUTS']['start_doy'], ini['INPUTS']['end_doy'], 1, 366)) # GRIDMET month range (default to water year) gridmet_start_month = ini['SUMMARY']['gridmet_start_month'] gridmet_end_month = ini['SUMMARY']['gridmet_end_month'] gridmet_months = list( utils.month_range(gridmet_start_month, gridmet_end_month)) logging.info('\nGridmet months: {}'.format(', '.join( map(str, gridmet_months)))) # Read in the zonal stats CSV logging.debug(' Reading zonal stats CSV file') input_df = pd.read_csv( os.path.join(ini['ZONAL_STATS']['output_ws'], ini['BEAMER']['output_name'])) logging.debug(input_df.head()) logging.debug(' Filtering Landsat dataframe') input_df = input_df[input_df['PIXEL_COUNT'] > 0] # # This assumes that there are L5/L8 images in the dataframe # if not input_df.empty: # max_pixel_count = max(input_df['PIXEL_COUNT']) # else: # max_pixel_count = 0 if ini['INPUTS']['fid_keep_list']: input_df = input_df[input_df['ZONE_FID'].isin( ini['INPUTS']['fid_keep_list'])] if ini['INPUTS']['fid_skip_list']: input_df = input_df[~input_df['ZONE_FID']. isin(ini['INPUTS']['fid_skip_list'])] if year_list: input_df = input_df[input_df['YEAR'].isin(year_list)] if month_list: input_df = input_df[input_df['MONTH'].isin(month_list)] if doy_list: input_df = input_df[input_df['DOY'].isin(doy_list)] if ini['INPUTS']['path_keep_list']: input_df = input_df[input_df['PATH'].isin( ini['INPUTS']['path_keep_list'])] if (ini['INPUTS']['row_keep_list'] and ini['INPUTS']['row_keep_list'] != ['XXX']): input_df = input_df[input_df['ROW'].isin( ini['INPUTS']['row_keep_list'])] # Assume the default is for these to be True and only filter if False if not ini['INPUTS']['landsat4_flag']: input_df = input_df[input_df['PLATFORM'] != 'LT04'] if not ini['INPUTS']['landsat5_flag']: input_df = input_df[input_df['PLATFORM'] != 'LT05'] if not ini['INPUTS']['landsat7_flag']: input_df = input_df[input_df['PLATFORM'] != 'LE07'] if not ini['INPUTS']['landsat8_flag']: input_df = input_df[input_df['PLATFORM'] != 'LC08'] if ini['INPUTS']['scene_id_keep_list']: # Replace XXX with primary ROW value for checking skip list SCENE_ID scene_id_df = pd.Series([ s.replace('XXX', '{:03d}'.format(int(r))) for s, r in zip(input_df['SCENE_ID'], input_df['ROW']) ]) input_df = input_df[scene_id_df.isin( ini['INPUTS']['scene_id_keep_list']).values] # This won't work: SCENE_ID have XXX but scene_id_skip_list don't # input_df = input_df[input_df['SCENE_ID'].isin( # ini['INPUTS']['scene_id_keep_list'])] if ini['INPUTS']['scene_id_skip_list']: # Replace XXX with primary ROW value for checking skip list SCENE_ID scene_id_df = pd.Series([ s.replace('XXX', '{:03d}'.format(int(r))) for s, r in zip(input_df['SCENE_ID'], input_df['ROW']) ]) input_df = input_df[np.logical_not( scene_id_df.isin(ini['INPUTS']['scene_id_skip_list']).values)] # This won't work: SCENE_ID have XXX but scene_id_skip_list don't # input_df = input_df[~input_df['SCENE_ID'].isin( # ini['INPUTS']['scene_id_skip_list'])] # Filter by QA/QC value if ini['SUMMARY']['max_qa'] >= 0 and not input_df.empty: logging.debug(' Maximum QA: {0}'.format(ini['SUMMARY']['max_qa'])) input_df = input_df[input_df['QA'] <= ini['SUMMARY']['max_qa']] # First filter by average cloud score if ini['SUMMARY']['max_cloud_score'] < 100 and not input_df.empty: logging.debug(' Maximum cloud score: {0}'.format( ini['SUMMARY']['max_cloud_score'])) input_df = input_df[ input_df['CLOUD_SCORE'] <= ini['SUMMARY']['max_cloud_score']] # Filter by Fmask percentage if ini['SUMMARY']['max_fmask_pct'] < 100 and not input_df.empty: input_df['FMASK_PCT'] = 100 * (input_df['FMASK_COUNT'] / input_df['FMASK_TOTAL']) logging.debug(' Max Fmask threshold: {}'.format( ini['SUMMARY']['max_fmask_pct'])) input_df = input_df[ input_df['FMASK_PCT'] <= ini['SUMMARY']['max_fmask_pct']] # Filter low count SLC-off images if ini['SUMMARY']['min_slc_off_pct'] > 0 and not input_df.empty: logging.debug(' Mininum SLC-off threshold: {}%'.format( ini['SUMMARY']['min_slc_off_pct'])) # logging.debug(' Maximum pixel count: {}'.format( # max_pixel_count)) slc_off_mask = ((input_df['PLATFORM'] == 'LE07') & ((input_df['YEAR'] >= 2004) | ((input_df['YEAR'] == 2003) & (input_df['DOY'] > 151)))) slc_off_pct = 100 * (input_df['PIXEL_COUNT'] / input_df['PIXEL_TOTAL']) # slc_off_pct = 100 * (input_df['PIXEL_COUNT'] / max_pixel_count) input_df = input_df[((slc_off_pct >= ini['SUMMARY']['min_slc_off_pct']) & slc_off_mask) | (~slc_off_mask)] if input_df.empty: logging.error(' Empty dataframe after filtering, exiting') return False # Process each zone separately logging.debug(input_df.head()) zone_name_list = sorted(list(set(input_df['ZONE_NAME'].values))) for zone_name in zone_name_list: logging.info('ZONE: {}'.format(zone_name)) # The names are currently stored in the CSV as spaces zone_output_name = zone_name.replace(' ', '_') zone_df = input_df[input_df['ZONE_NAME'] == zone_name] if zone_df.empty: logging.info(' Empty zone dataframe, skipping zone') continue # Output file paths output_doy_path = os.path.join( output_ws, '{}_timeseries_doy.html'.format(zone_output_name)) output_date_path = os.path.join( output_ws, '{}_timeseries_date.html'.format(zone_output_name)) # # Check for QA field # if 'QA' not in zone_df.columns.values: # # logging.warning( # # ' WARNING: QA field not present in CSV\n' # # ' To compute QA/QC values, please run "ee_summary_qaqc.py"\n' # # ' Script will continue with no QA/QC values') # zone_df['QA'] = 0 # # raw_input('ENTER') # # logging.error( # # '\nPlease run the "ee_summary_qaqc.py" script ' # # 'to compute QA/QC values\n') # # sys.exit() # Check that plot variables are present for plot_var in plot_var_list: if plot_var not in zone_df.columns.values: logging.error(' The variable {} does not exist in the ' 'dataframe'.format(plot_var)) sys.exit() # if ini['INPUTS']['scene_id_keep_list']: # # Replace XXX with primary ROW value for checking skip list SCENE_ID # scene_id_df = pd.Series([ # s.replace('XXX', '{:03d}'.format(int(r))) # for s, r in zip(zone_df['SCENE_ID'], zone_df['ROW'])]) # zone_df = zone_df[scene_id_df.isin( # ini['INPUTS']['scene_id_keep_list']).values] # # This won't work: SCENE_ID have XXX but scene_id_skip_list don't # # zone_df = zone_df[zone_df['SCENE_ID'].isin( # # ini['INPUTS']['scene_id_keep_list'])] # if ini['INPUTS']['scene_id_skip_list']: # # Replace XXX with primary ROW value for checking skip list SCENE_ID # scene_id_df = pd.Series([ # s.replace('XXX', '{:03d}'.format(int(r))) # for s, r in zip(zone_df['SCENE_ID'], zone_df['ROW'])]) # zone_df = zone_df[np.logical_not(scene_id_df.isin( # ini['INPUTS']['scene_id_skip_list']).values)] # # This won't work: SCENE_ID have XXX but scene_id_skip_list don't # # zone_df = zone_df[np.logical_not(zone_df['SCENE_ID'].isin( # # ini['INPUTS']['scene_id_skip_list']))] # Compute colors for each QA value logging.debug(' Building column data source') qa_values = sorted(list(set(zone_df['QA'].values))) colors = { qa: "#%02x%02x%02x" % (int(r), int(g), int(b)) for qa, ( r, g, b, _) in zip(qa_values, 255 * cm.viridis(mpl.colors.Normalize()(qa_values))) } logging.debug(' QA values: {}'.format(', '.join(map(str, qa_values)))) # Unpack the data by QA type to support interactive legends sources = dict() for qa_value in qa_values: qa_df = zone_df[zone_df['QA'] == qa_value] qa_data = { 'INDEX': list(range(len(qa_df.index))), 'PLATFORM': qa_df['PLATFORM'], 'DATE': pd.to_datetime(qa_df['DATE']), 'DATE_STR': pd.to_datetime( qa_df['DATE']).map(lambda x: x.strftime('%Y-%m-%d')), 'DOY': qa_df['DOY'].values, 'QA': qa_df['QA'].values, 'COLOR': [colors[qa] for qa in qa_df['QA'].values] } for plot_var in plot_var_list: if plot_var in qa_df.columns.values: qa_data.update({plot_var: qa_df[plot_var].values}) sources[qa_value] = bokeh.models.ColumnDataSource(qa_data) tooltips = [("LANDSAT", "@PLATFORM"), ("DATE", "@TIME"), ("DOY", "@DOY")] # Selection hover_circle = Circle(fill_color='#ff0000', line_color='#ff0000') selected_circle = Circle(fill_color='COLOR', line_color='COLOR') nonselected_circle = Circle(fill_color='#aaaaaa', line_color='#aaaaaa') # Plot the data by DOY logging.debug(' Building DOY timeseries figure') if os.path.isfile(output_doy_path): os.remove(output_doy_path) output_file(output_doy_path, title=zone_name) figure_args = dict( plot_width=750, plot_height=250, title=None, tools="xwheel_zoom,xpan,xbox_zoom,reset,box_select", # tools="xwheel_zoom,xpan,xbox_zoom,reset,tap", active_scroll="xwheel_zoom") plot_args = dict(size=4, alpha=0.9, color='COLOR') if ini['SUMMARY']['max_qa'] > 0: plot_args['legend'] = 'QA' figures = [] for plot_i, plot_var in enumerate(plot_var_list): if plot_i == 0: f = figure( # x_range=Range1d(1, 366, bounds=(1, 366)), y_axis_label=plot_var, **figure_args) else: f = figure(x_range=f.x_range, y_axis_label=plot_var, **figure_args) for qa, source in sorted(sources.items()): r = f.circle('DOY', plot_var, source=source, **plot_args) r.hover_glyph = hover_circle r.selection_glyph = selected_circle r.nonselection_glyph = nonselected_circle r.muted_glyph = nonselected_circle # DEADBEEF - This will display high QA points as muted # if qa > ini['SUMMARY']['max_qa']: # r.muted = True # # r.visible = False f.add_tools(bokeh.models.HoverTool(tooltips=tooltips)) # if ini['SUMMARY']['max_qa'] > 0: f.legend.location = "top_left" f.legend.click_policy = "hide" # f.legend.click_policy = "mute" f.legend.orientation = "horizontal" figures.append(f) # Try to not allow more than 4 plots in a column p = gridplot(figures, ncols=len(plot_var_list) // 3, sizing_mode='stretch_both') if show_flag: show(p) save(p) # Plot the data by DATE logging.debug(' Building date timeseries figure') if os.path.isfile(output_date_path): os.remove(output_date_path) output_file(output_date_path, title=zone_name) figure_args = dict( plot_width=750, plot_height=250, title=None, tools="xwheel_zoom,xpan,xbox_zoom,reset,box_select", # tools="xwheel_zoom,xpan,xbox_zoom,reset,tap", active_scroll="xwheel_zoom", x_axis_type="datetime", ) plot_args = dict(size=4, alpha=0.9, color='COLOR') if ini['SUMMARY']['max_qa'] > 0: plot_args['legend'] = 'QA' figures = [] for plot_i, plot_var in enumerate(plot_var_list): if plot_i == 0: f = figure( # x_range=Range1d(x_limit[0], x_limit[1], bounds=x_limit), y_axis_label=plot_var, **figure_args) else: f = figure(x_range=f.x_range, y_axis_label=plot_var, **figure_args) if plot_var == 'TS': f.y_range.bounds = (270, None) for qa, source in sorted(sources.items()): r = f.circle('DATE', plot_var, source=source, **plot_args) r.hover_glyph = hover_circle r.selection_glyph = selected_circle r.nonselection_glyph = nonselected_circle r.muted_glyph = nonselected_circle # DEADBEEF - This will display high QA points as muted # if qa > ini['SUMMARY']['max_qa']: # r.muted = True # # r.visible = False f.add_tools(bokeh.models.HoverTool(tooltips=tooltips)) # if ini['SUMMARY']['max_qa'] > 0: f.legend.location = "top_left" f.legend.click_policy = "hide" # f.legend.click_policy = "mute" f.legend.orientation = "horizontal" figures.append(f) # Try to not allow more than 4 plots in a column p = gridplot(figures, ncols=len(plot_var_list) // 3, sizing_mode='stretch_both') if show_flag: show(p) save(p) # Pause after each iteration if show is True if show_flag: input('Press ENTER to continue')
def main(ini_path, show_flag=False, overwrite_flag=False): """Generate Beamer ETg summary figures Args: ini_path (str): show_flag (bool): if True, show the figures in the browser. Default is False. overwrite_flag (bool): if True, overwrite existing figures Default is True (for now) """ logging.info('\nGenerate Beamer ETg summary figures') ncolors = [ '#348ABD', '#7A68A6', '#A60628', '#467821', '#CF4457', '#188487', '#E24A33'] xtick_fs = 8 ytick_fs = 8 xlabel_fs = 8 ylabel_fs = 8 ms = 2 figsize = (3.0, 2.5) output_folder = 'figures' # For unit conversion eto_fields = [ 'ETG_MEAN', 'ETG_LPI', 'ETG_UPI', 'ETG_LCI', 'ETG_UCI', 'ET_MEAN', 'ET_LPI', 'ET_UPI', 'ET_LCI', 'ET_UCI', 'WY_ETO'] ppt_fields = ['WY_PPT'] # Read config file ini = inputs.read(ini_path) inputs.parse_section(ini, section='INPUTS') inputs.parse_section(ini, section='ZONAL_STATS') inputs.parse_section(ini, section='SUMMARY') inputs.parse_section(ini, section='FIGURES') inputs.parse_section(ini, section='BEAMER') # Output paths output_ws = os.path.join( ini['SUMMARY']['output_ws'], output_folder) if not os.path.isdir(output_ws): os.makedirs(output_ws) # Start/end year year_list = list(range( ini['INPUTS']['start_year'], ini['INPUTS']['end_year'] + 1)) month_list = list(utils.wrapped_range( ini['INPUTS']['start_month'], ini['INPUTS']['end_month'], 1, 12)) doy_list = list(utils.wrapped_range( ini['INPUTS']['start_doy'], ini['INPUTS']['end_doy'], 1, 366)) # GRIDMET month range (default to water year) gridmet_start_month = ini['SUMMARY']['gridmet_start_month'] gridmet_end_month = ini['SUMMARY']['gridmet_end_month'] gridmet_months = list(utils.month_range( gridmet_start_month, gridmet_end_month)) logging.info('\nGridmet months: {}'.format( ', '.join(map(str, gridmet_months)))) # Read in the zonal stats CSV logging.debug(' Reading zonal stats CSV file') input_df = pd.read_csv(os.path.join( ini['ZONAL_STATS']['output_ws'], ini['BEAMER']['output_name'])) logging.debug(input_df.head()) logging.debug(' Filtering Landsat dataframe') input_df = input_df[input_df['PIXEL_COUNT'] > 0] # # This assumes that there are L5/L8 images in the dataframe # if not input_df.empty: # max_pixel_count = max(input_df['PIXEL_COUNT']) # else: # max_pixel_count = 0 if ini['INPUTS']['fid_keep_list']: input_df = input_df[input_df['ZONE_FID'].isin( ini['INPUTS']['fid_keep_list'])] if ini['INPUTS']['fid_skip_list']: input_df = input_df[~input_df['ZONE_FID'].isin( ini['INPUTS']['fid_skip_list'])] if year_list: input_df = input_df[input_df['YEAR'].isin(year_list)] if month_list: input_df = input_df[input_df['MONTH'].isin(month_list)] if doy_list: input_df = input_df[input_df['DOY'].isin(doy_list)] if ini['INPUTS']['path_keep_list']: input_df = input_df[ input_df['PATH'].isin(ini['INPUTS']['path_keep_list'])] if (ini['INPUTS']['row_keep_list'] and ini['INPUTS']['row_keep_list'] != ['XXX']): input_df = input_df[ input_df['ROW'].isin(ini['INPUTS']['row_keep_list'])] # Assume the default is for these to be True and only filter if False if not ini['INPUTS']['landsat4_flag']: input_df = input_df[input_df['PLATFORM'] != 'LT04'] if not ini['INPUTS']['landsat5_flag']: input_df = input_df[input_df['PLATFORM'] != 'LT05'] if not ini['INPUTS']['landsat7_flag']: input_df = input_df[input_df['PLATFORM'] != 'LE07'] if not ini['INPUTS']['landsat8_flag']: input_df = input_df[input_df['PLATFORM'] != 'LC08'] if ini['INPUTS']['scene_id_keep_list']: # Replace XXX with primary ROW value for checking skip list SCENE_ID scene_id_df = pd.Series([ s.replace('XXX', '{:03d}'.format(int(r))) for s, r in zip(input_df['SCENE_ID'], input_df['ROW'])]) input_df = input_df[scene_id_df.isin( ini['INPUTS']['scene_id_keep_list']).values] # This won't work: SCENE_ID have XXX but scene_id_skip_list don't # input_df = input_df[input_df['SCENE_ID'].isin( # ini['INPUTS']['scene_id_keep_list'])] if ini['INPUTS']['scene_id_skip_list']: # Replace XXX with primary ROW value for checking skip list SCENE_ID scene_id_df = pd.Series([ s.replace('XXX', '{:03d}'.format(int(r))) for s, r in zip(input_df['SCENE_ID'], input_df['ROW'])]) input_df = input_df[np.logical_not(scene_id_df.isin( ini['INPUTS']['scene_id_skip_list']).values)] # This won't work: SCENE_ID have XXX but scene_id_skip_list don't # input_df = input_df[~input_df['SCENE_ID'].isin( # ini['INPUTS']['scene_id_skip_list'])] # Filter by QA/QC value if ini['SUMMARY']['max_qa'] >= 0 and not input_df.empty: logging.debug(' Maximum QA: {0}'.format( ini['SUMMARY']['max_qa'])) input_df = input_df[input_df['QA'] <= ini['SUMMARY']['max_qa']] # First filter by average cloud score if ini['SUMMARY']['max_cloud_score'] < 100 and not input_df.empty: logging.debug(' Maximum cloud score: {0}'.format( ini['SUMMARY']['max_cloud_score'])) input_df = input_df[ input_df['CLOUD_SCORE'] <= ini['SUMMARY']['max_cloud_score']] # Filter by Fmask percentage if ini['SUMMARY']['max_fmask_pct'] < 100 and not input_df.empty: input_df['FMASK_PCT'] = 100 * ( input_df['FMASK_COUNT'] / input_df['FMASK_TOTAL']) logging.debug(' Max Fmask threshold: {}'.format( ini['SUMMARY']['max_fmask_pct'])) input_df = input_df[ input_df['FMASK_PCT'] <= ini['SUMMARY']['max_fmask_pct']] # Filter low count SLC-off images if ini['SUMMARY']['min_slc_off_pct'] > 0 and not input_df.empty: logging.debug(' Mininum SLC-off threshold: {}%'.format( ini['SUMMARY']['min_slc_off_pct'])) # logging.debug(' Maximum pixel count: {}'.format( # max_pixel_count)) slc_off_mask = ( (input_df['PLATFORM'] == 'LE07') & ((input_df['YEAR'] >= 2004) | ((input_df['YEAR'] == 2003) & (input_df['DOY'] > 151)))) slc_off_pct = 100 * (input_df['PIXEL_COUNT'] / input_df['PIXEL_TOTAL']) # slc_off_pct = 100 * (input_df['PIXEL_COUNT'] / max_pixel_count) input_df = input_df[ ((slc_off_pct >= ini['SUMMARY']['min_slc_off_pct']) & slc_off_mask) | (~slc_off_mask)] if input_df.empty: logging.error(' Empty dataframe after filtering, exiting') return False # Process each zone separately logging.debug(input_df.head()) zone_name_list = sorted(list(set(input_df['ZONE_NAME'].values))) for zone_name in zone_name_list: logging.info('ZONE: {}'.format(zone_name)) # The names are currently stored in the CSV with spaces zone_output_name = zone_name.replace(' ', '_') zone_df = input_df[input_df['ZONE_NAME'] == zone_name] if zone_df.empty: logging.info(' Empty zone dataframe, skipping zone') continue logging.debug(' Computing annual summaries') annual_df = zone_df \ .groupby(['ZONE_NAME', 'YEAR']) \ .agg({ 'PIXEL_COUNT': ['count', 'mean'], 'PIXEL_TOTAL': ['mean'], 'FMASK_COUNT': 'mean', 'FMASK_TOTAL': 'mean', 'CLOUD_SCORE': 'mean', 'ETSTAR_COUNT': 'mean', 'NDVI_TOA': 'mean', 'NDWI_TOA': 'mean', 'ALBEDO_SUR': 'mean', 'TS': 'mean', 'EVI_SUR': 'mean', 'ETSTAR_MEAN': 'mean', 'ETG_MEAN': 'mean', 'ETG_LPI': 'mean', 'ETG_UPI': 'mean', 'ETG_LCI': 'mean', 'ETG_UCI': 'mean', 'ET_MEAN': 'mean', 'ET_LPI': 'mean', 'ET_UPI': 'mean', 'ET_LCI': 'mean', 'ET_UCI': 'mean', 'WY_ETO': 'mean', 'WY_PPT': 'mean' }) annual_df.columns = annual_df.columns.map('_'.join) annual_df = annual_df.rename(columns={ 'PIXEL_COUNT_count': 'SCENE_COUNT', 'PIXEL_COUNT_mean': 'PIXEL_COUNT'}) annual_df.rename( columns=lambda x: str(x).replace('_mean', ''), inplace=True) annual_df['SCENE_COUNT'] = annual_df['SCENE_COUNT'].astype(np.int) annual_df['PIXEL_COUNT'] = annual_df['PIXEL_COUNT'].astype(np.int) annual_df['PIXEL_TOTAL'] = annual_df['PIXEL_TOTAL'].astype(np.int) annual_df['FMASK_COUNT'] = annual_df['FMASK_COUNT'].astype(np.int) annual_df['FMASK_TOTAL'] = annual_df['FMASK_TOTAL'].astype(np.int) annual_df['ETSTAR_COUNT'] = annual_df['ETSTAR_COUNT'].astype(np.int) annual_df = annual_df.reset_index() # Convert ETo units if (ini['BEAMER']['eto_units'] == 'mm' and ini['FIGURES']['eto_units'] == 'mm'): pass elif (ini['BEAMER']['eto_units'] == 'mm' and ini['FIGURES']['eto_units'] == 'in'): annual_df[eto_fields] /= (25.4) elif (ini['BEAMER']['eto_units'] == 'mm' and ini['FIGURES']['eto_units'] == 'ft'): annual_df[eto_fields] /= (12 * 25.4) else: logging.error( ('\nERROR: Input units {} and output units {} are not ' + 'currently supported, exiting').format( ini['BEAMER']['eto_units'], ini['FIGURES']['eto_units'])) sys.exit() # Convert PPT units if (ini['BEAMER']['ppt_units'] == 'mm' and ini['FIGURES']['ppt_units'] == 'mm'): pass elif (ini['BEAMER']['ppt_units'] == 'mm' and ini['FIGURES']['ppt_units'] == 'in'): annual_df[ppt_fields] /= (25.4) elif (ini['BEAMER']['ppt_units'] == 'mm' and ini['FIGURES']['ppt_units'] == 'ft'): annual_df[ppt_fields] /= (12 * 25.4) else: logging.error( ('\nERROR: Input units {} and output units {} are not ' 'currently supported, exiting').format( ini['BEAMER']['ppt_units'], ini['FIGURES']['ppt_units'])) sys.exit() logging.debug(' Generating figures') zone_df = annual_df[annual_df['ZONE_NAME'] == zone_name] year_min, year_max = min(zone_df['YEAR']), max(zone_df['YEAR']) # Set default PPT min/max scaling ppt_min = 0 if ini['FIGURES']['ppt_units'] == 'mm': ppt_max = 100 * math.ceil((max(zone_df['WY_PPT']) + 100) / 100) elif ini['FIGURES']['ppt_units'] == 'ft': ppt_max = 0.2 * math.ceil((max(zone_df['WY_PPT']) + 0.1) / 0.2) else: ppt_max = 1.2 * max(zone_df['WY_PPT']) logging.debug(' EVI vs PPT') figure_path = os.path.join( output_ws, '{}_evi.png'.format(zone_output_name)) fig = plt.figure(figsize=figsize) ax1 = fig.add_axes([0.20, 0.21, 0.65, 0.75]) ax1.set_xlabel('Year', fontsize=xlabel_fs) ax2 = ax1.twinx() ax1.plot( zone_df['YEAR'].values, zone_df['WY_PPT'], marker='o', c='0.5', ms=ms, label='WY PPT') ax1.yaxis.tick_right() ax1.yaxis.set_label_position("right") ax1.set_xlim([year_min - 1, year_max + 1]) ax1.set_ylim([ppt_min, ppt_max]) ax1.tick_params(axis='y', labelsize=ytick_fs) ax1.tick_params(axis='x', labelsize=xtick_fs) ax1.tick_params(axis='x', which='both', top='off') ax1.xaxis.set_minor_locator(MultipleLocator(1)) for tick in ax1.get_xticklabels(): tick.set_rotation(45) tick.set_ha('right') ax1.set_ylabel( 'PPT [{}/yr]'.format(ini['FIGURES']['ppt_units']), fontsize=ylabel_fs) ax2.plot( zone_df['YEAR'].values, zone_df['EVI_SUR'].values, marker='o', c=ncolors[0], ms=ms, label='EVI') ax1.plot(0, 0, marker='o', c=ncolors[0], ms=ms, label='EVI') ax2.yaxis.tick_left() ax2.yaxis.set_label_position("left") ax2.set_ylim([ 0.05 * math.floor((min(zone_df['EVI_SUR']) - 0.01) / 0.05), 0.05 * math.ceil((max(zone_df['EVI_SUR']) + 0.01) / 0.05)]) ax2.tick_params(axis='y', labelsize=ytick_fs) ax2.set_ylabel('EVI [dimensionless]', fontsize=ylabel_fs) ax1.legend( loc='upper right', frameon=False, fontsize=6, numpoints=1) if overwrite_flag or not os.path.isfile(figure_path): plt.savefig(figure_path, dpi=300) plt.close() del fig, ax1, ax2 logging.debug(' ETo vs PPT') figure_path = os.path.join( output_ws, '{}_eto.png'.format(zone_output_name)) fig = plt.figure(figsize=figsize) ax1 = fig.add_axes([0.18, 0.21, 0.67, 0.75]) ax1.set_xlabel('Year', fontsize=xlabel_fs) ax2 = ax1.twinx() ax1.plot( zone_df['YEAR'].values, zone_df['WY_PPT'], marker='o', c='0.5', ms=ms, label='WY PPT') ax1.yaxis.tick_right() ax1.yaxis.set_label_position("right") ax1.set_xlim([year_min - 1, year_max + 1]) ax1.set_ylim([ppt_min, ppt_max]) ax1.tick_params(axis='y', labelsize=ytick_fs) ax1.tick_params(axis='x', labelsize=xtick_fs) ax1.tick_params(axis='x', which='both', top='off') ax1.xaxis.set_minor_locator(MultipleLocator(1)) for tick in ax1.get_xticklabels(): tick.set_rotation(45) tick.set_ha('right') ax1.set_ylabel( 'PPT [{}/yr]'.format(ini['FIGURES']['ppt_units']), fontsize=ylabel_fs) ax2.plot( zone_df['YEAR'].values, zone_df['WY_ETO'].values, marker='o', c=ncolors[1], ms=ms, label='ETo') ax1.plot(0, 0, marker='o', c=ncolors[1], ms=ms, label='ETo') ax2.yaxis.tick_left() ax2.yaxis.set_label_position("left") ax2.set_ylim([ max(0, 0.9 * min(zone_df['WY_ETO'])), 1.1 * max(zone_df['WY_ETO'])]) # ax2.set_ylim([ # max(0, 100 * math.floor((min(zone_df['WY_ETO']) - 100) / 100)), # 100 * math.ceil((max(zone_df['WY_ETO']) + 100) / 100)]) ax2.tick_params(axis='y', labelsize=ytick_fs) ax2.set_ylabel( 'ETo [{}/yr]'.format(ini['FIGURES']['eto_units']), fontsize=ylabel_fs) ax1.legend(loc='upper right', frameon=False, fontsize=6, numpoints=1) if overwrite_flag or not os.path.isfile(figure_path): plt.savefig(figure_path, dpi=300) plt.close() del fig, ax1, ax2 logging.debug(' ET vs PPT') figure_path = os.path.join( output_ws, '{}_et.png'.format(zone_output_name)) fig = plt.figure(figsize=figsize) ax1 = fig.add_axes([0.18, 0.21, 0.67, 0.75]) ax1.set_xlabel('Year', fontsize=xlabel_fs) ax2 = ax1.twinx() ax1.plot( zone_df['YEAR'].values, zone_df['WY_PPT'], marker='o', c='0.5', ms=ms, label='WY PPT') ax1.yaxis.tick_right() ax1.yaxis.set_label_position("right") ax1.set_xlim([year_min - 1, year_max + 1]) ax1.set_ylim([ppt_min, ppt_max]) ax1.tick_params(axis='y', labelsize=ytick_fs) ax1.tick_params(axis='x', labelsize=xtick_fs) ax1.tick_params(axis='x', which='both', top='off') ax1.xaxis.set_minor_locator(MultipleLocator(1)) for tick in ax1.get_xticklabels(): tick.set_rotation(45) tick.set_ha('right') ax1.set_ylabel( 'PPT [{}/yr]'.format(ini['FIGURES']['ppt_units']), fontsize=ylabel_fs) ax2.plot( zone_df['YEAR'].values, zone_df['ET_UCI'].values, marker='', c=ncolors[2], alpha=0.5, lw=0.75) ax2.plot( zone_df['YEAR'].values, zone_df['ET_LCI'].values, marker='', c=ncolors[2], alpha=0.5, lw=0.75) ax2.plot( zone_df['YEAR'].values, zone_df['ET_MEAN'].values, marker='o', c=ncolors[2], ms=ms, label='ET') ax1.plot(0, 0, marker='o', c=ncolors[2], ms=ms, label='ET') ax2.yaxis.tick_left() ax2.yaxis.set_label_position("left") ax2.set_ylim([ max(0, 0.9 * min(zone_df['ET_LCI'])), 1.1 * max(zone_df['ET_UCI'])]) # ax2.set_ylim([ # max(0, 100 * math.floor((min(zone_df['ET_MEAN']) - 100) / 100)), # 100 * math.ceil((max(zone_df['ET_MEAN']) + 100) / 100)]) ax2.tick_params(axis='y', labelsize=ytick_fs) ax2.set_ylabel( 'ET [{}/yr]'.format(ini['FIGURES']['eto_units']), fontsize=ylabel_fs) ax1.legend(loc='upper right', frameon=False, fontsize=6, numpoints=1) if overwrite_flag or not os.path.isfile(figure_path): plt.savefig(figure_path, dpi=300) plt.close() del fig, ax1, ax2 logging.debug(' ETg vs PPT') figure_path = os.path.join( output_ws, '{}_etg.png'.format(zone_output_name)) fig = plt.figure(figsize=figsize) ax1 = fig.add_axes([0.18, 0.21, 0.67, 0.75]) ax1.set_xlabel('Year', fontsize=xlabel_fs) ax2 = ax1.twinx() ax1.plot( zone_df['YEAR'].values, zone_df['WY_PPT'], marker='o', c='0.5', ms=ms, label='WY PPT') ax1.yaxis.tick_right() ax1.yaxis.set_label_position("right") ax1.set_xlim([year_min - 1, year_max + 1]) ax1.set_ylim([ppt_min, ppt_max]) ax1.tick_params(axis='y', labelsize=ytick_fs) ax1.tick_params(axis='x', labelsize=xtick_fs) ax1.tick_params(axis='x', which='both', top='off') ax1.xaxis.set_minor_locator(MultipleLocator(1)) for tick in ax1.get_xticklabels(): tick.set_rotation(45) tick.set_ha('right') ax1.set_ylabel( 'PPT [{}/yr]'.format(ini['FIGURES']['ppt_units']), fontsize=ylabel_fs) ax2.plot( zone_df['YEAR'].values, zone_df['ETG_UCI'].values, marker='', c=ncolors[3], alpha=0.5, lw=0.75) ax2.plot( zone_df['YEAR'].values, zone_df['ETG_LCI'].values, marker='', c=ncolors[3], alpha=0.5, lw=0.75) ax2.plot( zone_df['YEAR'].values, zone_df['ETG_MEAN'].values, marker='o', c=ncolors[3], ms=ms, label='ETg') ax1.plot(0, 0, marker='o', c=ncolors[3], ms=ms, label='ETg') ax2.yaxis.tick_left() ax2.yaxis.set_label_position("left") ax2.set_ylim([ max(0, 0.9 * min(zone_df['ETG_LCI'])), 1.1 * max(zone_df['ETG_UCI'])]) # ax2.set_ylim([ # max(0, 100 * math.floor((min(zone_df['ETG_MEAN']) - 100) / 100)), # 100 * math.ceil((max(zone_df['ETG_MEAN']) + 100) / 100)]) ax2.tick_params(axis='y', labelsize=ytick_fs) ax2.set_ylabel( 'ETg [{}/yr]'.format(ini['FIGURES']['eto_units']), fontsize=ylabel_fs) ax1.legend(loc='upper right', frameon=False, fontsize=6, numpoints=1) if overwrite_flag or not os.path.isfile(figure_path): plt.savefig(figure_path, dpi=300) plt.close() del fig, ax1, ax2 logging.debug(' Complimentary') figure_path = os.path.join( output_ws, '{}_complimentary.png'.format(zone_output_name)) fig = plt.figure(figsize=(3, 2.5)) ax = fig.add_axes([0.18, 0.16, 0.78, 0.80]) # ax = fig.add_axes([0.18, 0.21, 0.67, 0.70]) ax.plot( zone_df['WY_PPT'].values, zone_df['WY_ETO'].values, linestyle='', marker='o', c=ncolors[1], ms=3, label='ETo') ax.plot( zone_df['WY_PPT'].values, zone_df['ET_MEAN'].values, linestyle='', marker='o', c=ncolors[2], ms=3, label='ET') # xmax = 100 * math.ceil(max(zone_df['WY_PPT']) / 100) # ymax = 200 * math.ceil((max(zone_df['WY_ETO']) + 200) / 200) ax.set_xlim([ppt_min, ppt_max]) ax.set_ylim([0, 1.2 * max(zone_df['WY_ETO'])]) ax.tick_params(axis='y', labelsize=ytick_fs) ax.tick_params(axis='x', labelsize=xtick_fs) ax.tick_params(axis='x', which='both', top='off') ax.tick_params(axis='y', which='both', right='off') ax.set_xlabel('PPT [{}/yr]'.format( ini['FIGURES']['ppt_units']), fontsize=xlabel_fs) ax.set_ylabel('ET and ETo [{}/yr]'.format( ini['FIGURES']['eto_units']), fontsize=ylabel_fs) ax.legend(loc='upper right', frameon=False, fontsize=6, numpoints=1) if overwrite_flag or not os.path.isfile(figure_path): plt.savefig(figure_path, dpi=300) plt.close() del fig, ax
def main(ini_path=None, overwrite_flag=True, show_flag=False): """Generate summary figures Args: ini_path (str): file path of the control file overwrite_flag (bool): if True, overwrite existing figures show_flag (bool): if True, show figures as they are being built """ logging.info('\nGenerate summary figures') # Read config file ini = inputs.read(ini_path) inputs.parse_section(ini, section='INPUTS') inputs.parse_section(ini, section='ZONAL_STATS') inputs.parse_section(ini, section='SUMMARY') inputs.parse_section(ini, section='FIGURES') # Band options band_list = [ 'albedo_sur', 'cloud_score', 'eto', 'evi_sur', 'fmask_count', 'fmask_total', 'ndvi_sur', 'ndvi_toa', 'ndwi_green_nir_sur', 'ndwi_green_nir_toa', 'ndwi_green_swir1_sur', 'ndwi_green_swir1_toa', 'ndwi_nir_swir1_sur', 'ndwi_nir_swir1_toa', 'ndwi_swir1_green_sur', 'ndwi_swir1_green_toa', # 'ndwi_sur', 'ndwi_toa', 'pixel_count', 'pixel_total', 'ppt', 'tc_bright', 'tc_green', 'tc_wet', 'ts' ] band_name = { 'albedo_sur': 'Albedo', 'cloud_score': 'Cloud Score', 'eto': 'ETo', 'evi_sur': 'EVI', 'fmask_count': 'Fmask Count', 'fmask_total': 'Fmask Total', 'ndvi_sur': 'NDVI', 'ndvi_toa': 'NDVI (TOA)', 'ndwi_green_nir_sur': 'NDWI (Green, NIR)', 'ndwi_green_nir_toa': 'NDWI (Green, NIR) (TOA)', 'ndwi_green_swir1_sur': 'NDWI (Green, SWIR1)', 'ndwi_green_swir1_toa': 'NDWI (Green, SWIR1) (TOA)', 'ndwi_nir_swir1_sur': 'NDWI (NIR, SWIR1)', 'ndwi_nir_swir1_toa': 'NDWI (NIR, SWIR1) (TOA)', 'ndwi_swir1_green_sur': 'NDWI (SWIR1, Green)', 'ndwi_swir1_green_toa': 'NDWI (SWIR1, Green) (TOA)', # 'ndwi_sur': 'NDWI (SWIR1, GREEN)', # 'ndwi_toa': 'NDWI (SWIR1, GREEN) (TOA)', 'pixel_count': 'Pixel Count', 'pixel_total': 'Pixel Total', 'ppt': 'PPT', 'tc_bright': 'Brightness', 'tc_green': 'Greeness', 'tc_wet': 'Wetness', 'ts': 'Ts' } band_unit = { 'albedo_sur': 'dimensionless', 'cloud_score': 'dimensionless', 'evi_sur': 'dimensionless', 'eto': 'mm', 'fmask_count': 'dimensionless', 'fmask_total': 'dimensionless', 'ndvi_sur': 'dimensionless', 'ndvi_toa': 'dimensionless', 'ndwi_green_nir_sur': 'dimensionless', 'ndwi_green_nir_toa': 'dimensionless', 'ndwi_green_swir1_sur': 'dimensionless', 'ndwi_green_swir1_toa': 'dimensionless', 'ndwi_nir_swir1_sur': 'dimensionless', 'ndwi_nir_swir1_toa': 'dimensionless', 'ndwi_swir1_green_sur': 'dimensionless', 'ndwi_swir1_green_toa': 'dimensionless', # 'ndwi_sur': 'dimensionless', # 'ndwi_toa': 'dimensionless', 'pixel_count': 'dimensionless', 'pixel_total': 'dimensionless', 'ppt': 'mm', 'tc_bright': 'dimensionless', 'tc_green': 'dimensionless', 'tc_wet': 'dimensionless', 'ts': 'K', } band_color = { 'albedo_sur': '#CF4457', 'cloud_score': '0.5', 'eto': '#348ABD', 'fmask_count': '0.5', 'fmask_total': '0.5', 'evi_sur': '#FFA500', 'ndvi_sur': '#A60628', 'ndvi_toa': '#A60628', 'ndwi_green_nir_sur': '#4eae4b', 'ndwi_green_nir_toa': '#4eae4b', 'ndwi_green_swir1_sur': '#4eae4b', 'ndwi_green_swir1_toa': '#4eae4b', 'ndwi_nir_swir1_sur': '#4eae4b', 'ndwi_nir_swir1_toa': '#4eae4b', 'ndwi_swir1_green_sur': '#4eae4b', 'ndwi_swir1_green_toa': '#4eae4b', # 'ndwi_sur': '#4eae4b', # 'ndwi_toa': '#4eae4b', 'pixel_count': '0.5', 'pixel_total': '0.5', 'ppt': '0.5', 'tc_bright': '#E24A33', 'tc_green': '#E24A33', 'tc_wet': '#E24A33', 'ts': '#188487' } # A couple of color palettes to sample from # import seaborn as sns # print(sns.color_palette('hls', 20).as_hex()) # print(sns.color_palette('husl', 20).as_hex()) # print(sns.color_palette('hsv', 20).as_hex()) # print(sns.color_palette('Set1', 20).as_hex()) # print(sns.color_palette('Set2', 20).as_hex()) # Hardcoded plot options figures_folder = 'figures' fig_type = 'large' plot_dict = dict() # Center y-labels in figure window (instead of centering on ticks/axes) plot_dict['center_ylabel'] = False # Axes percentages must be 0-1 plot_dict['timeseries_band_ax_pct'] = [0.3, 0.92] plot_dict['timeseries_ppt_ax_pct'] = [0.0, 0.35] plot_dict['complement_band_ax_pct'] = [0.0, 0.5] plot_dict['complement_eto_ax_pct'] = [0.4, 1.0] if fig_type.lower() == 'large': plot_dict['title_fs'] = 12 plot_dict['xtick_fs'] = 10 plot_dict['ytick_fs'] = 10 plot_dict['xlabel_fs'] = 10 plot_dict['ylabel_fs'] = 10 plot_dict['legend_fs'] = 10 plot_dict['ts_ms'] = 3 plot_dict['comp_ms'] = 4 plot_dict['timeseries_ax'] = [0.12, 0.13, 0.78, 0.81] plot_dict['scatter_ax'] = [0.12, 0.10, 0.82, 0.84] plot_dict['complement_ax'] = [0.12, 0.10, 0.78, 0.84] plot_dict['fig_size'] = (6.0, 5.0) elif fig_type.lower() == 'small': plot_dict['title_fs'] = 10 plot_dict['xtick_fs'] = 8 plot_dict['ytick_fs'] = 8 plot_dict['xlabel_fs'] = 8 plot_dict['ylabel_fs'] = 8 plot_dict['legend_fs'] = 8 plot_dict['ts_ms'] = 1.5 plot_dict['comp_ms'] = 2 plot_dict['timeseries_ax'] = [0.18, 0.21, 0.67, 0.70] plot_dict['scatter_ax'] = [0.18, 0.21, 0.67, 0.70] plot_dict['complement_ax'] = [0.18, 0.16, 0.67, 0.75] plot_dict['fig_size'] = (3.0, 2.5) plot_dict['fig_dpi'] = 300 plot_dict['show'] = show_flag plot_dict['overwrite'] = overwrite_flag # CSV parameters landsat_annual_fields = [ 'ZONE_FID', 'ZONE_NAME', 'YEAR', 'SCENE_COUNT', 'CLOUD_SCORE', 'PIXEL_COUNT', 'PIXEL_TOTAL', 'FMASK_COUNT', 'FMASK_TOTAL', 'TS', 'ALBEDO_SUR', 'NDVI_TOA', 'NDVI_SUR', 'EVI_SUR', 'NDWI_GREEN_NIR_SUR', 'NDWI_GREEN_SWIR1_SUR', 'NDWI_NIR_SWIR1_SUR', # 'NDWI_GREEN_NIR_TOA', 'NDWI_GREEN_SWIR1_TOA', 'NDWI_NIR_SWIR1_TOA', # 'NDWI_SWIR1_GREEN_TOA', 'NDWI_SWIR1_GREEN_SUR', # 'NDWI_TOA', 'NDWI_SUR', 'TC_BRIGHT', 'TC_GREEN', 'TC_WET' ] # Add merged row XXX to keep list ini['INPUTS']['row_keep_list'].append('XXX') # Check figure bands timeseries_bands = ini['FIGURES']['timeseries_bands'] scatter_bands = ini['FIGURES']['scatter_bands'] complementary_bands = ini['FIGURES']['complementary_bands'] if timeseries_bands: logging.info('Timeseries Bands:') for band in timeseries_bands: if band not in band_list: logging.info( ' Invalid timeseries band: {}, exiting'.format(band)) return False logging.info(' {}'.format(band)) if scatter_bands: logging.info('Scatter Bands (x:y):') for band_x, band_y in scatter_bands: if band_x not in band_list: logging.info( ' Invalid scatter band: {}, exiting'.format(band_x)) return False elif band_y not in band_list: logging.info(' Invalid band: {}, exiting'.format(band_y)) return False logging.info(' {}:{}'.format(band_x, band_y)) if complementary_bands: logging.info('Complementary Bands:') for band in complementary_bands: if band not in band_list: logging.info( ' Invalid complementary band: {}, exiting'.format(band)) return False logging.info(' {}'.format(band)) # Add input plot options plot_dict['ppt_plot_type'] = ini['FIGURES']['ppt_plot_type'] plot_dict['scatter_best_fit'] = ini['FIGURES']['scatter_best_fit'] # Start/end year year_list = list( range(ini['INPUTS']['start_year'], ini['INPUTS']['end_year'] + 1)) month_list = list( utils.wrapped_range(ini['INPUTS']['start_month'], ini['INPUTS']['end_month'], 1, 12)) doy_list = list( utils.wrapped_range(ini['INPUTS']['start_doy'], ini['INPUTS']['end_doy'], 1, 366)) # GRIDMET month range (default to water year) gridmet_start_month = ini['SUMMARY']['gridmet_start_month'] gridmet_end_month = ini['SUMMARY']['gridmet_end_month'] gridmet_months = list( utils.month_range(gridmet_start_month, gridmet_end_month)) logging.info('\nGridmet months: {}'.format(', '.join( map(str, gridmet_months)))) # Get ee features from shapefile zone_geom_list = gdc.shapefile_2_geom_list_func( ini['INPUTS']['zone_shp_path'], zone_field=ini['INPUTS']['zone_field'], reverse_flag=False) # Filter features by FID before merging geometries if ini['INPUTS']['fid_keep_list']: zone_geom_list = [ zone_obj for zone_obj in zone_geom_list if zone_obj[0] in ini['INPUTS']['fid_keep_list'] ] if ini['INPUTS']['fid_skip_list']: zone_geom_list = [ zone_obj for zone_obj in zone_geom_list if zone_obj[0] not in ini['INPUTS']['fid_skip_list'] ] # # Filter features by FID before merging geometries # if ini['INPUTS']['fid_keep_list']: # landsat_df = landsat_df[landsat_df['ZONE_FID'].isin( # ini['INPUTS']['fid_keep_list'])] # if ini['INPUTS']['fid_skip_list']: # landsat_df = landsat_df[~landsat_df['ZONE_FID'].isin( # ini['INPUTS']['fid_skip_list'])] logging.info('\nProcessing zones') for zone_fid, zone_name, zone_json in zone_geom_list: zone_name = zone_name.replace(' ', '_') logging.info('ZONE: {} (FID: {})'.format(zone_name, zone_fid)) zone_stats_ws = os.path.join(ini['ZONAL_STATS']['output_ws'], zone_name) zone_figures_ws = os.path.join(ini['SUMMARY']['output_ws'], zone_name, figures_folder) if not os.path.isdir(zone_stats_ws): logging.debug( ' Folder {} does not exist, skipping'.format(zone_stats_ws)) continue elif not os.path.isdir(zone_figures_ws): os.makedirs(zone_figures_ws) # Input paths landsat_daily_path = os.path.join( zone_stats_ws, '{}_landsat_daily.csv'.format(zone_name)) gridmet_daily_path = os.path.join( zone_stats_ws, '{}_gridmet_daily.csv'.format(zone_name)) gridmet_monthly_path = os.path.join( zone_stats_ws, '{}_gridmet_monthly.csv'.format(zone_name)) if not os.path.isfile(landsat_daily_path): logging.error(' Landsat daily CSV does not exist, skipping zone') continue elif (not os.path.isfile(gridmet_daily_path) and not os.path.isfile(gridmet_monthly_path)): logging.error( ' GRIDMET daily or monthly CSV does not exist, skipping zone') continue # DEADBEEF - Eventually support generating only Landsat figures # logging.error( # ' GRIDMET daily and/or monthly CSV files do not exist.\n' # ' ETo and PPT will not be processed.') # Output paths landsat_summary_path = os.path.join( zone_figures_ws, '{}_landsat_figures.csv'.format(zone_name)) gridmet_summary_path = os.path.join( zone_figures_ws, '{}_gridmet_figures.csv'.format(zone_name)) zone_summary_path = os.path.join( zone_figures_ws, '{}_zone_figures.csv'.format(zone_name)) logging.debug(' Reading Landsat CSV') landsat_df = pd.read_csv(landsat_daily_path) logging.debug(' Filtering Landsat dataframe') landsat_df = landsat_df[landsat_df['PIXEL_COUNT'] > 0] # QA field should have been written in zonal stats code # Eventually this block can be removed if 'QA' not in landsat_df.columns.values: landsat_df['QA'] = 0 # # This assumes that there are L5/L8 images in the dataframe # if not landsat_df.empty: # max_pixel_count = max(landsat_df['PIXEL_COUNT']) # else: # max_pixel_count = 0 if year_list: landsat_df = landsat_df[landsat_df['YEAR'].isin(year_list)] if month_list: landsat_df = landsat_df[landsat_df['MONTH'].isin(month_list)] if doy_list: landsat_df = landsat_df[landsat_df['DOY'].isin(doy_list)] # Assume the default is for these to be True and only filter if False if not ini['INPUTS']['landsat4_flag']: landsat_df = landsat_df[landsat_df['PLATFORM'] != 'LT04'] if not ini['INPUTS']['landsat5_flag']: landsat_df = landsat_df[landsat_df['PLATFORM'] != 'LT05'] if not ini['INPUTS']['landsat7_flag']: landsat_df = landsat_df[landsat_df['PLATFORM'] != 'LE07'] if not ini['INPUTS']['landsat8_flag']: landsat_df = landsat_df[landsat_df['PLATFORM'] != 'LC08'] if ini['INPUTS']['path_keep_list']: landsat_df = landsat_df[landsat_df['PATH'].isin( ini['INPUTS']['path_keep_list'])] if (ini['INPUTS']['row_keep_list'] and ini['INPUTS']['row_keep_list'] != ['XXX']): landsat_df = landsat_df[landsat_df['ROW'].isin( ini['INPUTS']['row_keep_list'])] if ini['INPUTS']['scene_id_keep_list']: # Replace XXX with primary ROW value for checking skip list SCENE_ID scene_id_df = pd.Series([ s.replace('XXX', '{:03d}'.format(int(r))) for s, r in zip(landsat_df['SCENE_ID'], landsat_df['ROW']) ]) landsat_df = landsat_df[scene_id_df.isin( ini['INPUTS']['scene_id_keep_list']).values] # This won't work: SCENE_ID have XXX but scene_id_skip_list don't # landsat_df = landsat_df[landsat_df['SCENE_ID'].isin( # ini['INPUTS']['scene_id_keep_list'])] if ini['INPUTS']['scene_id_skip_list']: # Replace XXX with primary ROW value for checking skip list SCENE_ID scene_id_df = pd.Series([ s.replace('XXX', '{:03d}'.format(int(r))) for s, r in zip(landsat_df['SCENE_ID'], landsat_df['ROW']) ]) landsat_df = landsat_df[np.logical_not( scene_id_df.isin(ini['INPUTS']['scene_id_skip_list']).values)] # This won't work: SCENE_ID have XXX but scene_id_skip_list don't # landsat_df = landsat_df[np.logical_not(landsat_df['SCENE_ID'].isin( # ini['INPUTS']['scene_id_skip_list']))] # Filter by QA/QC value if ini['SUMMARY']['max_qa'] >= 0 and not landsat_df.empty: logging.debug(' Maximum QA: {0}'.format( ini['SUMMARY']['max_qa'])) landsat_df = landsat_df[ landsat_df['QA'] <= ini['SUMMARY']['max_qa']] # Filter by average cloud score if ini['SUMMARY']['max_cloud_score'] < 100 and not landsat_df.empty: logging.debug(' Maximum cloud score: {0}'.format( ini['SUMMARY']['max_cloud_score'])) landsat_df = landsat_df[ landsat_df['CLOUD_SCORE'] <= ini['SUMMARY']['max_cloud_score']] # Filter by Fmask percentage if ini['SUMMARY']['max_fmask_pct'] < 100 and not landsat_df.empty: landsat_df['FMASK_PCT'] = 100 * (landsat_df['FMASK_COUNT'] / landsat_df['FMASK_TOTAL']) logging.debug(' Max Fmask threshold: {}'.format( ini['SUMMARY']['max_fmask_pct'])) landsat_df = landsat_df[ landsat_df['FMASK_PCT'] <= ini['SUMMARY']['max_fmask_pct']] # Filter low count SLC-off images if ini['SUMMARY']['min_slc_off_pct'] > 0 and not landsat_df.empty: logging.debug(' Mininum SLC-off threshold: {}%'.format( ini['SUMMARY']['min_slc_off_pct'])) # logging.debug(' Maximum pixel count: {}'.format( # max_pixel_count)) slc_off_mask = ((landsat_df['LANDSAT'] == 'LE7') & ((landsat_df['YEAR'] >= 2004) | ((landsat_df['YEAR'] == 2003) & (landsat_df['DOY'] > 151)))) slc_off_pct = 100 * (landsat_df['PIXEL_COUNT'] / landsat_df['PIXEL_TOTAL']) # slc_off_pct = 100 * (landsat_df['PIXEL_COUNT'] / max_pixel_count) landsat_df = landsat_df[( (slc_off_pct >= ini['SUMMARY']['min_slc_off_pct']) & slc_off_mask) | (~slc_off_mask)] if landsat_df.empty: logging.error( ' Empty Landsat dataframe after filtering, skipping zone') continue logging.debug(' Computing Landsat annual summaries') agg_dict = { 'PIXEL_COUNT': { 'PIXEL_COUNT': 'mean', 'SCENE_COUNT': 'count' }, 'PIXEL_TOTAL': { 'PIXEL_TOTAL': 'mean' }, 'FMASK_COUNT': { 'FMASK_COUNT': 'mean' }, 'FMASK_TOTAL': { 'FMASK_TOTAL': 'mean' }, 'CLOUD_SCORE': { 'CLOUD_SCORE': 'mean' } } for field in landsat_df.columns.values: if field in landsat_annual_fields: agg_dict.update({field: {field: 'mean'}}) landsat_df = landsat_df \ .groupby(['ZONE_NAME', 'ZONE_FID', 'YEAR']) \ .agg(agg_dict) landsat_df.columns = landsat_df.columns.droplevel(0) landsat_df.reset_index(inplace=True) # landsat_df = landsat_df[landsat_annual_fields] landsat_df['YEAR'] = landsat_df['YEAR'].astype(np.int) landsat_df['SCENE_COUNT'] = landsat_df['SCENE_COUNT'].astype(np.int) landsat_df['PIXEL_COUNT'] = landsat_df['PIXEL_COUNT'].astype(np.int) landsat_df['PIXEL_TOTAL'] = landsat_df['PIXEL_TOTAL'].astype(np.int) landsat_df['FMASK_COUNT'] = landsat_df['FMASK_COUNT'].astype(np.int) landsat_df['FMASK_TOTAL'] = landsat_df['FMASK_TOTAL'].astype(np.int) landsat_df.sort_values(by='YEAR', inplace=True) # Aggregate GRIDMET (to water year) if os.path.isfile(gridmet_monthly_path): logging.debug(' Reading montly GRIDMET CSV') gridmet_df = pd.read_csv(gridmet_monthly_path) elif os.path.isfile(gridmet_daily_path): logging.debug(' Reading daily GRIDMET CSV') gridmet_df = pd.read_csv(gridmet_daily_path) logging.debug(' Computing GRIDMET summaries') # Summarize GRIDMET for target months year if (gridmet_start_month in [10, 11, 12] and gridmet_end_month in [10, 11, 12]): month_mask = ((gridmet_df['MONTH'] >= gridmet_start_month) & (gridmet_df['MONTH'] <= gridmet_end_month)) gridmet_df.loc[month_mask, 'GROUP_YEAR'] = gridmet_df['YEAR'] + 1 elif (gridmet_start_month in [10, 11, 12] and gridmet_end_month not in [10, 11, 12]): month_mask = gridmet_df['MONTH'] >= gridmet_start_month gridmet_df.loc[month_mask, 'GROUP_YEAR'] = gridmet_df['YEAR'] + 1 month_mask = gridmet_df['MONTH'] <= gridmet_end_month gridmet_df.loc[month_mask, 'GROUP_YEAR'] = gridmet_df['YEAR'] else: month_mask = ((gridmet_df['MONTH'] >= gridmet_start_month) & (gridmet_df['MONTH'] <= gridmet_end_month)) gridmet_df.loc[month_mask, 'GROUP_YEAR'] = gridmet_df['YEAR'] # GROUP_YEAR for rows not in the GRIDMET month range will be NAN gridmet_df = gridmet_df[~pd.isnull(gridmet_df['GROUP_YEAR'])] if year_list: gridmet_df = gridmet_df[gridmet_df['GROUP_YEAR'].isin(year_list)] if gridmet_df.empty: logging.error( ' Empty GRIDMET dataframe after filtering by year') continue # Group GRIDMET data by user specified range (default is water year) gridmet_group_df = gridmet_df \ .groupby(['ZONE_FID', 'ZONE_NAME', 'GROUP_YEAR']) \ .agg({'ETO': {'ETO': 'sum'}, 'PPT': {'PPT': 'sum'}}) gridmet_group_df.columns = gridmet_group_df.columns.droplevel(0) gridmet_group_df.reset_index(inplace=True) gridmet_group_df.rename(columns={'GROUP_YEAR': 'YEAR'}, inplace=True) gridmet_group_df.sort_values(by='YEAR', inplace=True) # # Group GRIDMET data by month # gridmet_month_df = gridmet_df.groupby( # ['ZONE_FID', 'ZONE_NAME', 'GROUP_YEAR', 'MONTH']).agg({ # 'ETO': {'ETO': 'sum'}, 'PPT': {'PPT': 'sum'}}) # gridmet_month_df.columns = gridmet_month_df.columns.droplevel(0) # gridmet_month_df.reset_index(inplace=True) # gridmet_month_df.rename(columns={'GROUP_YEAR': 'YEAR'}, inplace=True) # # gridmet_month_df.sort_values(by=['YEAR', 'MONTH'], inplace=True) # gridmet_month_df.reset_index(inplace=True) # # Rename monthly PPT columns # gridmet_month_df['MONTH'] = 'PPT_M' + gridmet_month_df['MONTH'].astype(str) # # Pivot rows up to separate columns # gridmet_month_df = gridmet_month_df.pivot_table( # 'PPT', ['ZONE_FID', 'YEAR'], 'MONTH') # gridmet_month_df.reset_index(inplace=True) # columns = ['ZONE_FID', 'YEAR'] + ['PPT_M{}'.format(m) for m in gridmet_months] # gridmet_month_df = gridmet_month_df[columns] # del gridmet_month_df.index.name # Merge Landsat and GRIDMET collections zone_df = landsat_df.merge(gridmet_group_df, on=['ZONE_FID', 'ZONE_NAME', 'YEAR']) # gridmet_group_df, on=['ZONE_FID', 'YEAR']) # zone_df = zone_df.merge( # gridmet_month_df, on=['ZONE_FID', 'ZONE_NAME', 'YEAR']) # gridmet_month_df, on=['ZONE_FID', 'YEAR']) if zone_df is None or zone_df.empty: logging.info(' Empty zone dataframe, not generating figures') continue # Save annual Landsat and GRIDMET tables logging.debug(' Saving summary tables') logging.debug(' {}'.format(landsat_summary_path)) landsat_df.sort_values(by=['YEAR'], inplace=True) landsat_df.to_csv(landsat_summary_path, index=False) # columns=export_fields logging.debug(' {}'.format(gridmet_summary_path)) gridmet_group_df.sort_values(by=['YEAR'], inplace=True) gridmet_group_df.to_csv(gridmet_summary_path, index=False) # columns=export_fields logging.debug(' {}'.format(zone_summary_path)) zone_df.sort_values(by=['YEAR'], inplace=True) zone_df.to_csv(zone_summary_path, index=False) # columns=export_fields # Adjust year range based on data availability? # start_year = min(zone_df['YEAR']), # end_year = max(zone_df['YEAR']) logging.debug(' Generating figures') for band in timeseries_bands: timeseries_plot(band, zone_df, zone_name, zone_figures_ws, ini['INPUTS']['start_year'], ini['INPUTS']['end_year'], band_name, band_unit, band_color, plot_dict) for band_x, band_y in scatter_bands: scatter_plot(band_x, band_y, zone_df, zone_name, zone_figures_ws, band_name, band_unit, band_color, plot_dict) for band in complementary_bands: complementary_plot(band, zone_df, zone_name, zone_figures_ws, band_name, band_unit, band_color, plot_dict) del landsat_df, gridmet_df, zone_df
def main(ini_path, overwrite_flag=True): """Generate Beamer ETg summary tables Args: ini_path (str): overwrite_flag (bool): if True, overwrite existing figures Default is True (for now) """ logging.info('\nGenerate Beamer ETg summary tables') # # Eventually get from INI (like ini['BEAMER']['landsat_products']) # daily_fields = [ # 'ZONE_NAME', 'ZONE_FID', 'DATE', 'SCENE_ID', 'PLATFORM', 'PATH', 'ROW', # 'YEAR', 'MONTH', 'DAY', 'DOY', 'WATER_YEAR', # 'PIXEL_COUNT', 'ETSTAR_COUNT', # 'NDVI_TOA', 'NDWI_TOA', 'ALBEDO_SUR', 'TS', 'EVI_SUR', 'ETSTAR_MEAN', # 'ETG_MEAN', 'ETG_LPI', 'ETG_UPI', 'ETG_LCI', 'ETG_UCI', # 'ET_MEAN', 'ET_LPI', 'ET_UPI', 'ET_LCI', 'ET_UCI', # 'WY_ETO', 'WY_PPT'] # annual_fields = [ # 'SCENE_COUNT', 'PIXEL_COUNT', 'ETSTAR_COUNT', # 'NDVI_TOA', 'NDWI_TOA', 'ALBEDO_SUR', 'TS', # 'EVI_SUR_MEAN', 'EVI_SUR_MEDIAN', 'EVI_SUR_MIN', 'EVI_SUR_MAX', # 'ETSTAR_MEAN', # 'ETG_MEAN', 'ETG_LPI', 'ETG_UPI', 'ETG_LCI', 'ETG_UCI', # 'ET_MEAN', 'ET_LPI', 'ET_UPI', 'ET_LCI', 'ET_UCI', # 'WY_ETO', 'WY_PPT'] # For unit conversion eto_fields = [ 'ETG_MEAN', 'ETG_LPI', 'ETG_UPI', 'ETG_LCI', 'ETG_UCI', 'ET_MEAN', 'ET_LPI', 'ET_UPI', 'ET_LCI', 'ET_UCI', 'WY_ETO' ] ppt_fields = ['WY_PPT'] # Read config file ini = inputs.read(ini_path) inputs.parse_section(ini, section='INPUTS') inputs.parse_section(ini, section='ZONAL_STATS') inputs.parse_section(ini, section='BEAMER') inputs.parse_section(ini, section='SUMMARY') inputs.parse_section(ini, section='TABLES') # Output paths output_daily_path = os.path.join( ini['SUMMARY']['output_ws'], ini['BEAMER']['output_name'].replace('.csv', '_daily.xlsx')) output_annual_path = os.path.join( ini['SUMMARY']['output_ws'], ini['BEAMER']['output_name'].replace('.csv', '_annual.xlsx')) # Check if files already exist if overwrite_flag: if os.path.isfile(output_daily_path): os.remove(output_daily_path) if os.path.isfile(output_annual_path): os.remove(output_annual_path) else: if (os.path.isfile(output_daily_path) and os.path.isfile(output_annual_path)): logging.info('\nOutput files already exist and ' 'overwrite is False, exiting') return True # Start/end year year_list = list( range(ini['INPUTS']['start_year'], ini['INPUTS']['end_year'] + 1)) month_list = list( utils.wrapped_range(ini['INPUTS']['start_month'], ini['INPUTS']['end_month'], 1, 12)) doy_list = list( utils.wrapped_range(ini['INPUTS']['start_doy'], ini['INPUTS']['end_doy'], 1, 366)) # GRIDMET month range (default to water year) gridmet_start_month = ini['SUMMARY']['gridmet_start_month'] gridmet_end_month = ini['SUMMARY']['gridmet_end_month'] gridmet_months = list( utils.month_range(gridmet_start_month, gridmet_end_month)) logging.info('\nGridmet months: {}'.format(', '.join( map(str, gridmet_months)))) # Read in the zonal stats CSV logging.debug(' Reading zonal stats CSV file') input_df = pd.read_csv( os.path.join(ini['ZONAL_STATS']['output_ws'], ini['BEAMER']['output_name'])) logging.debug(' Filtering Landsat dataframe') input_df = input_df[input_df['PIXEL_COUNT'] > 0] # # This assumes that there are L5/L8 images in the dataframe # if not input_df.empty: # max_pixel_count = max(input_df['PIXEL_COUNT']) # else: # max_pixel_count = 0 if ini['INPUTS']['fid_keep_list']: input_df = input_df[input_df['ZONE_FID'].isin( ini['INPUTS']['fid_keep_list'])] if ini['INPUTS']['fid_skip_list']: input_df = input_df[~input_df['ZONE_FID']. isin(ini['INPUTS']['fid_skip_list'])] zone_name_list = sorted(list(set(input_df['ZONE_NAME'].values))) if year_list: input_df = input_df[input_df['YEAR'].isin(year_list)] if month_list: input_df = input_df[input_df['MONTH'].isin(month_list)] if doy_list: input_df = input_df[input_df['DOY'].isin(doy_list)] if ini['INPUTS']['path_keep_list']: input_df = input_df[input_df['PATH'].isin( ini['INPUTS']['path_keep_list'])] if (ini['INPUTS']['row_keep_list'] and ini['INPUTS']['row_keep_list'] != ['XXX']): input_df = input_df[input_df['ROW'].isin( ini['INPUTS']['row_keep_list'])] # Assume the default is for these to be True and only filter if False if not ini['INPUTS']['landsat4_flag']: input_df = input_df[input_df['PLATFORM'] != 'LT04'] if not ini['INPUTS']['landsat5_flag']: input_df = input_df[input_df['PLATFORM'] != 'LT05'] if not ini['INPUTS']['landsat7_flag']: input_df = input_df[input_df['PLATFORM'] != 'LE07'] if not ini['INPUTS']['landsat8_flag']: input_df = input_df[input_df['PLATFORM'] != 'LC08'] if ini['INPUTS']['scene_id_keep_list']: # Replace XXX with primary ROW value for checking skip list SCENE_ID scene_id_df = pd.Series([ s.replace('XXX', '{:03d}'.format(int(r))) for s, r in zip(input_df['SCENE_ID'], input_df['ROW']) ]) input_df = input_df[scene_id_df.isin( ini['INPUTS']['scene_id_keep_list']).values] # This won't work: SCENE_ID have XXX but scene_id_skip_list don't # input_df = input_df[input_df['SCENE_ID'].isin( # ini['INPUTS']['scene_id_keep_list'])] if ini['INPUTS']['scene_id_skip_list']: # Replace XXX with primary ROW value for checking skip list SCENE_ID scene_id_df = pd.Series([ s.replace('XXX', '{:03d}'.format(int(r))) for s, r in zip(input_df['SCENE_ID'], input_df['ROW']) ]) input_df = input_df[np.logical_not( scene_id_df.isin(ini['INPUTS']['scene_id_skip_list']).values)] # This won't work: SCENE_ID have XXX but scene_id_skip_list don't # input_df = input_df[~input_df['SCENE_ID'].isin( # ini['INPUTS']['scene_id_skip_list'])] # Filter by QA/QC value if ini['SUMMARY']['max_qa'] >= 0 and not input_df.empty: logging.debug(' Maximum QA: {0}'.format(ini['SUMMARY']['max_qa'])) input_df = input_df[input_df['QA'] <= ini['SUMMARY']['max_qa']] # First filter by average cloud score if ini['SUMMARY']['max_cloud_score'] < 100 and not input_df.empty: logging.debug(' Maximum cloud score: {0}'.format( ini['SUMMARY']['max_cloud_score'])) input_df = input_df[ input_df['CLOUD_SCORE'] <= ini['SUMMARY']['max_cloud_score']] # Filter by Fmask percentage if ini['SUMMARY']['max_fmask_pct'] < 100 and not input_df.empty: input_df['FMASK_PCT'] = 100 * (input_df['FMASK_COUNT'] / input_df['FMASK_TOTAL']) logging.debug(' Max Fmask threshold: {}'.format( ini['SUMMARY']['max_fmask_pct'])) input_df = input_df[ input_df['FMASK_PCT'] <= ini['SUMMARY']['max_fmask_pct']] # Filter low count SLC-off images if ini['SUMMARY']['min_slc_off_pct'] > 0 and not input_df.empty: logging.debug(' Mininum SLC-off threshold: {}%'.format( ini['SUMMARY']['min_slc_off_pct'])) # logging.debug(' Maximum pixel count: {}'.format( # max_pixel_count)) slc_off_mask = ((input_df['PLATFORM'] == 'LE07') & ((input_df['YEAR'] >= 2004) | ((input_df['YEAR'] == 2003) & (input_df['DOY'] > 151)))) slc_off_pct = 100 * (input_df['PIXEL_COUNT'] / input_df['PIXEL_TOTAL']) # slc_off_pct = 100 * (input_df['PIXEL_COUNT'] / max_pixel_count) input_df = input_df[((slc_off_pct >= ini['SUMMARY']['min_slc_off_pct']) & slc_off_mask) | (~slc_off_mask)] if input_df.empty: logging.error(' Empty dataframe after filtering, exiting') return False if not os.path.isfile(output_daily_path): logging.info('\nWriting daily values to Excel') excel_f = ExcelWriter(output_daily_path) for zone_name in sorted(zone_name_list): logging.info(' {}'.format(zone_name)) zone_df = input_df[input_df['ZONE_NAME'] == zone_name] zone_df.to_excel(excel_f, zone_name, index=False, float_format='%.4f') # zone_df.to_excel(excel_f, zone_name, index=False) del zone_df excel_f.save() if not os.path.isfile(output_annual_path): logging.info('\nComputing annual summaries') annual_df = input_df \ .groupby(['ZONE_NAME', 'YEAR']) \ .agg({ 'PIXEL_COUNT': ['count', 'mean'], 'PIXEL_TOTAL': ['mean'], 'FMASK_COUNT': 'mean', 'FMASK_TOTAL': 'mean', 'CLOUD_SCORE': 'mean', 'ETSTAR_COUNT': 'mean', 'NDVI_TOA': 'mean', 'NDWI_TOA': 'mean', 'ALBEDO_SUR': 'mean', 'TS': 'mean', # 'EVI_SUR': 'mean', 'EVI_SUR': ['mean', 'median', 'min', 'max'], 'ETSTAR_MEAN': 'mean', 'ETG_MEAN': 'mean', 'ETG_LPI': 'mean', 'ETG_UPI': 'mean', 'ETG_LCI': 'mean', 'ETG_UCI': 'mean', 'ET_MEAN': 'mean', 'ET_LPI': 'mean', 'ET_UPI': 'mean', 'ET_LCI': 'mean', 'ET_UCI': 'mean', 'WY_ETO': 'mean', 'WY_PPT': 'mean' }) annual_df.columns = annual_df.columns.map('_'.join) annual_df = annual_df.rename(columns={ 'PIXEL_COUNT_count': 'SCENE_COUNT', 'PIXEL_COUNT_mean': 'PIXEL_COUNT' }) annual_df = annual_df.rename( columns={ 'EVI_SUR_mean': 'EVI_SUR_MEAN', 'EVI_SUR_median': 'EVI_SUR_MEDIAN', 'EVI_SUR_min': 'EVI_SUR_MIN', 'EVI_SUR_max': 'EVI_SUR_MAX' }) annual_df.rename(columns=lambda x: str(x).replace('_mean', ''), inplace=True) annual_df['SCENE_COUNT'] = annual_df['SCENE_COUNT'].astype(np.int) annual_df['PIXEL_COUNT'] = annual_df['PIXEL_COUNT'].astype(np.int) annual_df['PIXEL_TOTAL'] = annual_df['PIXEL_TOTAL'].astype(np.int) annual_df['FMASK_COUNT'] = annual_df['FMASK_COUNT'].astype(np.int) annual_df['FMASK_TOTAL'] = annual_df['FMASK_TOTAL'].astype(np.int) annual_df['ETSTAR_COUNT'] = annual_df['ETSTAR_COUNT'].astype(np.int) annual_df = annual_df.reset_index() # Convert ETo units if (ini['BEAMER']['eto_units'] == 'mm' and ini['TABLES']['eto_units'] == 'mm'): pass elif (ini['BEAMER']['eto_units'] == 'mm' and ini['TABLES']['eto_units'] == 'in'): annual_df[eto_fields] /= (25.4) elif (ini['BEAMER']['eto_units'] == 'mm' and ini['TABLES']['eto_units'] == 'ft'): annual_df[eto_fields] /= (12 * 25.4) else: logging.error( ('\nERROR: Input units {} and output units {} are not ' + 'currently supported, exiting').format( ini['BEAMER']['eto_units'], ini['TABLES']['eto_units'])) sys.exit() # Convert PPT units if (ini['BEAMER']['ppt_units'] == 'mm' and ini['TABLES']['ppt_units'] == 'mm'): pass elif (ini['BEAMER']['ppt_units'] == 'mm' and ini['TABLES']['ppt_units'] == 'in'): annual_df[ppt_fields] /= (25.4) elif (ini['BEAMER']['ppt_units'] == 'mm' and ini['TABLES']['ppt_units'] == 'ft'): annual_df[ppt_fields] /= (12 * 25.4) else: logging.error( ('\nERROR: Input units {} and output units {} are not ' + 'currently supported, exiting').format( ini['BEAMER']['ppt_units'], ini['TABLES']['ppt_units'])) sys.exit() logging.info('\nWriting annual values to Excel') excel_f = ExcelWriter(output_annual_path) for zone_name in sorted(zone_name_list): logging.info(' {}'.format(zone_name)) zone_df = annual_df[annual_df['ZONE_NAME'] == zone_name] zone_df.to_excel(excel_f, zone_name, index=False, float_format='%.4f') del zone_df excel_f.save()
def main(ini_path, show_flag=False, overwrite_flag=True): """Generate Bokeh figures Bokeh issues: Adjust y range based on non-muted data https://stackoverflow.com/questions/43620837/how-to-get-bokeh-to-dynamically-adjust-y-range-when-panning Linked interactive legends so that there is only one legend for the gridplot Maybe hide or mute QA values above max (instead of filtering them in advance) Args: ini_path (str): show_flag (bool): if True, show the figures in the browser. Default is False. overwrite_flag (bool): if True, overwrite existing tables. Default is True (for now) """ logging.info('\nGenerate interactive timeseries figures') # Eventually read from INI plot_var_list = ['NDVI_TOA', 'ALBEDO_SUR', 'TS', 'NDWI_TOA', 'EVI_SUR'] # plot_var_list = [ # 'NDVI_TOA', 'ALBEDO_SUR', 'TS', 'NDWI_TOA', # 'CLOUD_SCORE', 'FMASK_PCT'] output_folder = 'figures' # Read config file ini = inputs.read(ini_path) inputs.parse_section(ini, section='INPUTS') inputs.parse_section(ini, section='ZONAL_STATS') inputs.parse_section(ini, section='SUMMARY') inputs.parse_section(ini, section='FIGURES') inputs.parse_section(ini, section='BEAMER') # Hardcode GRIDMET month range to the water year ini['SUMMARY']['gridmet_start_month'] = 10 ini['SUMMARY']['gridmet_end_month'] = 9 # Start/end year year_list = list(range( ini['INPUTS']['start_year'], ini['INPUTS']['end_year'] + 1)) month_list = list(utils.wrapped_range( ini['INPUTS']['start_month'], ini['INPUTS']['end_month'], 1, 12)) doy_list = list(utils.wrapped_range( ini['INPUTS']['start_doy'], ini['INPUTS']['end_doy'], 1, 366)) # GRIDMET month range (default to water year) gridmet_start_month = ini['SUMMARY']['gridmet_start_month'] gridmet_end_month = ini['SUMMARY']['gridmet_end_month'] gridmet_months = list(utils.month_range( gridmet_start_month, gridmet_end_month)) logging.info('\nGridmet months: {}'.format( ', '.join(map(str, gridmet_months)))) # Get ee features from shapefile zone_geom_list = gdc.shapefile_2_geom_list_func( ini['INPUTS']['zone_shp_path'], zone_field=ini['INPUTS']['zone_field'], reverse_flag=False) # Filter features by FID before merging geometries if ini['INPUTS']['fid_keep_list']: zone_geom_list = [ zone_obj for zone_obj in zone_geom_list if zone_obj[0] in ini['INPUTS']['fid_keep_list']] if ini['INPUTS']['fid_skip_list']: zone_geom_list = [ zone_obj for zone_obj in zone_geom_list if zone_obj[0] not in ini['INPUTS']['fid_skip_list']] # # Filter features by FID before merging geometries # if ini['INPUTS']['fid_keep_list']: # landsat_df = landsat_df[landsat_df['ZONE_FID'].isin( # ini['INPUTS']['fid_keep_list'])] # if ini['INPUTS']['fid_skip_list']: # landsat_df = landsat_df[~landsat_df['ZONE_FID'].isin( # ini['INPUTS']['fid_skip_list'])] logging.info('\nProcessing zones') for zone_fid, zone_name, zone_json in zone_geom_list: zone_name = zone_name.replace(' ', '_') logging.info('ZONE: {} (FID: {})'.format(zone_name, zone_fid)) zone_stats_ws = os.path.join( ini['ZONAL_STATS']['output_ws'], zone_name) zone_figures_ws = os.path.join( ini['SUMMARY']['output_ws'], zone_name, output_folder) if not os.path.isdir(zone_stats_ws): logging.debug(' Folder {} does not exist, skipping'.format( zone_stats_ws)) continue elif not os.path.isdir(zone_figures_ws): os.makedirs(zone_figures_ws) # Input paths landsat_daily_path = os.path.join( zone_stats_ws, '{}_landsat_daily.csv'.format(zone_name)) gridmet_daily_path = os.path.join( zone_stats_ws, '{}_gridmet_daily.csv'.format(zone_name)) gridmet_monthly_path = os.path.join( zone_stats_ws, '{}_gridmet_monthly.csv'.format(zone_name)) if not os.path.isfile(landsat_daily_path): logging.error(' Landsat daily CSV does not exist, skipping zone') continue elif (not os.path.isfile(gridmet_daily_path) and not os.path.isfile(gridmet_monthly_path)): logging.error( ' GRIDMET daily or monthly CSV does not exist, skipping zone') continue # DEADBEEF - Eventually support generating only Landsat figures # logging.error( # ' GRIDMET daily and/or monthly CSV files do not exist.\n' # ' ETo and PPT will not be processed.') # Output paths output_doy_path = os.path.join( zone_figures_ws, '{}_timeseries_doy.html'.format(zone_name)) output_date_path = os.path.join( zone_figures_ws, '{}_timeseries_date.html'.format(zone_name)) logging.debug(' Reading Landsat CSV') landsat_df = pd.read_csv(landsat_daily_path) logging.debug(' Filtering Landsat dataframe') landsat_df = landsat_df[landsat_df['PIXEL_COUNT'] > 0] # QA field should have been written in zonal stats code # Eventually this block can be removed if 'QA' not in landsat_df.columns.values: landsat_df['QA'] = 0 # # This assumes that there are L5/L8 images in the dataframe # if not landsat_df.empty: # max_pixel_count = max(landsat_df['PIXEL_COUNT']) # else: # max_pixel_count = 0 if year_list: landsat_df = landsat_df[landsat_df['YEAR'].isin(year_list)] if month_list: landsat_df = landsat_df[landsat_df['MONTH'].isin(month_list)] if doy_list: landsat_df = landsat_df[landsat_df['DOY'].isin(doy_list)] # Assume the default is for these to be True and only filter if False if not ini['INPUTS']['landsat4_flag']: landsat_df = landsat_df[landsat_df['PLATFORM'] != 'LT04'] if not ini['INPUTS']['landsat5_flag']: landsat_df = landsat_df[landsat_df['PLATFORM'] != 'LT05'] if not ini['INPUTS']['landsat7_flag']: landsat_df = landsat_df[landsat_df['PLATFORM'] != 'LE07'] if not ini['INPUTS']['landsat8_flag']: landsat_df = landsat_df[landsat_df['PLATFORM'] != 'LC08'] if ini['INPUTS']['path_keep_list']: landsat_df = landsat_df[ landsat_df['PATH'].isin(ini['INPUTS']['path_keep_list'])] if (ini['INPUTS']['row_keep_list'] and ini['INPUTS']['row_keep_list'] != ['XXX']): landsat_df = landsat_df[ landsat_df['ROW'].isin(ini['INPUTS']['row_keep_list'])] if ini['INPUTS']['scene_id_keep_list']: # Replace XXX with primary ROW value for checking skip list SCENE_ID scene_id_df = pd.Series([ s.replace('XXX', '{:03d}'.format(int(r))) for s, r in zip(landsat_df['SCENE_ID'], landsat_df['ROW'])]) landsat_df = landsat_df[scene_id_df.isin( ini['INPUTS']['scene_id_keep_list']).values] # This won't work: SCENE_ID have XXX but scene_id_skip_list don't # landsat_df = landsat_df[landsat_df['SCENE_ID'].isin( # ini['INPUTS']['scene_id_keep_list'])] if ini['INPUTS']['scene_id_skip_list']: # Replace XXX with primary ROW value for checking skip list SCENE_ID scene_id_df = pd.Series([ s.replace('XXX', '{:03d}'.format(int(r))) for s, r in zip(landsat_df['SCENE_ID'], landsat_df['ROW'])]) landsat_df = landsat_df[np.logical_not(scene_id_df.isin( ini['INPUTS']['scene_id_skip_list']).values)] # This won't work: SCENE_ID have XXX but scene_id_skip_list don't # landsat_df = landsat_df[np.logical_not(landsat_df['SCENE_ID'].isin( # ini['INPUTS']['scene_id_skip_list']))] # Filter by QA/QC value if ini['SUMMARY']['max_qa'] >= 0 and not landsat_df.empty: logging.debug(' Maximum QA: {0}'.format( ini['SUMMARY']['max_qa'])) landsat_df = landsat_df[ landsat_df['QA'] <= ini['SUMMARY']['max_qa']] # Filter by average cloud score if ini['SUMMARY']['max_cloud_score'] < 100 and not landsat_df.empty: logging.debug(' Maximum cloud score: {0}'.format( ini['SUMMARY']['max_cloud_score'])) landsat_df = landsat_df[ landsat_df['CLOUD_SCORE'] <= ini['SUMMARY']['max_cloud_score']] # Filter by Fmask percentage if ini['SUMMARY']['max_fmask_pct'] < 100 and not landsat_df.empty: landsat_df['FMASK_PCT'] = 100 * ( landsat_df['FMASK_COUNT'] / landsat_df['FMASK_TOTAL']) logging.debug(' Max Fmask threshold: {}'.format( ini['SUMMARY']['max_fmask_pct'])) landsat_df = landsat_df[ landsat_df['FMASK_PCT'] <= ini['SUMMARY']['max_fmask_pct']] # Filter low count SLC-off images if ini['SUMMARY']['min_slc_off_pct'] > 0 and not landsat_df.empty: logging.debug(' Mininum SLC-off threshold: {}%'.format( ini['SUMMARY']['min_slc_off_pct'])) # logging.debug(' Maximum pixel count: {}'.format( # max_pixel_count)) slc_off_mask = ( (landsat_df['PLATFORM'] == 'LE07') & ((landsat_df['YEAR'] >= 2004) | ((landsat_df['YEAR'] == 2003) & (landsat_df['DOY'] > 151)))) slc_off_pct = 100 * (landsat_df['PIXEL_COUNT'] / landsat_df['PIXEL_TOTAL']) # slc_off_pct = 100 * (landsat_df['PIXEL_COUNT'] / max_pixel_count) landsat_df = landsat_df[ ((slc_off_pct >= ini['SUMMARY']['min_slc_off_pct']) & slc_off_mask) | (~slc_off_mask)] if landsat_df.empty: logging.error( ' Empty Landsat dataframe after filtering, skipping zone') continue # Aggregate GRIDMET (to water year) if os.path.isfile(gridmet_monthly_path): logging.debug(' Reading montly GRIDMET CSV') gridmet_df = pd.read_csv(gridmet_monthly_path) elif os.path.isfile(gridmet_daily_path): logging.debug(' Reading daily GRIDMET CSV') gridmet_df = pd.read_csv(gridmet_daily_path) logging.debug(' Computing GRIDMET summaries') # Summarize GRIDMET for target months year if (gridmet_start_month in [10, 11, 12] and gridmet_end_month in [10, 11, 12]): month_mask = ( (gridmet_df['MONTH'] >= gridmet_start_month) & (gridmet_df['MONTH'] <= gridmet_end_month)) gridmet_df.loc[month_mask, 'GROUP_YEAR'] = gridmet_df['YEAR'] + 1 elif (gridmet_start_month in [10, 11, 12] and gridmet_end_month not in [10, 11, 12]): month_mask = gridmet_df['MONTH'] >= gridmet_start_month gridmet_df.loc[month_mask, 'GROUP_YEAR'] = gridmet_df['YEAR'] + 1 month_mask = gridmet_df['MONTH'] <= gridmet_end_month gridmet_df.loc[month_mask, 'GROUP_YEAR'] = gridmet_df['YEAR'] else: month_mask = ( (gridmet_df['MONTH'] >= gridmet_start_month) & (gridmet_df['MONTH'] <= gridmet_end_month)) gridmet_df.loc[month_mask, 'GROUP_YEAR'] = gridmet_df['YEAR'] # GROUP_YEAR for rows not in the GRIDMET month range will be NAN gridmet_df = gridmet_df[~pd.isnull(gridmet_df['GROUP_YEAR'])] if year_list: gridmet_df = gridmet_df[gridmet_df['GROUP_YEAR'].isin(year_list)] if gridmet_df.empty: logging.error( ' Empty GRIDMET dataframe after filtering by year') continue # Group GRIDMET data by user specified range (default is water year) gridmet_group_df = gridmet_df \ .groupby(['ZONE_NAME', 'ZONE_FID', 'GROUP_YEAR']) \ .agg({'ETO': np.sum, 'PPT': np.sum}) \ .reset_index() \ .sort_values(by='GROUP_YEAR') # .rename(columns={'ETO': 'ETO', 'PPT': 'ETO'}) \ # Rename wasn't working when chained... gridmet_group_df.rename(columns={'GROUP_YEAR': 'YEAR'}, inplace=True) gridmet_group_df['YEAR'] = gridmet_group_df['YEAR'].astype(int) # # Group GRIDMET data by month # gridmet_month_df = gridmet_df\ # .groupby(['ZONE_NAME', 'ZONE_FID', 'GROUP_YEAR', 'MONTH']) \ # .agg({'ETO': np.sum, 'PPT': np.sum}) \ # .reset_index() \ # .sort_values(by=['GROUP_YEAR', 'MONTH']) # gridmet_month_df.rename(columns={'GROUP_YEAR': 'YEAR'}, inplace=True) # # Rename monthly PPT columns # gridmet_month_df['MONTH'] = 'PPT_M' + gridmet_month_df['MONTH'].astype(str) # # Pivot rows up to separate columns # gridmet_month_df = gridmet_month_df.pivot_table( # 'PPT', ['ZONE_NAME', 'YEAR'], 'MONTH') # gridmet_month_df.reset_index(inplace=True) # columns = ['ZONE_NAME', 'YEAR'] + ['PPT_M{}'.format(m) for m in gridmet_months] # gridmet_month_df = gridmet_month_df[columns] # del gridmet_month_df.index.name # Merge Landsat and GRIDMET collections zone_df = landsat_df.merge( gridmet_group_df, on=['ZONE_NAME', 'ZONE_FID', 'YEAR']) if zone_df is None or zone_df.empty: logging.info(' Empty zone dataframe, not generating figures') continue # Compute ETg zone_df['ETG_MEAN'] = zone_df['ETSTAR_MEAN'] * ( zone_df['ETO'] - zone_df['PPT']) zone_df['ETG_LPI'] = zone_df['ETSTAR_LPI'] * ( zone_df['ETO'] - zone_df['PPT']) zone_df['ETG_UPI'] = zone_df['ETSTAR_UPI'] * ( zone_df['ETO'] - zone_df['PPT']) zone_df['ETG_LCI'] = zone_df['ETSTAR_LCI'] * ( zone_df['ETO'] - zone_df['PPT']) zone_df['ETG_UCI'] = zone_df['ETSTAR_UCI'] * ( zone_df['ETO'] - zone_df['PPT']) # Compute ET zone_df['ET_MEAN'] = zone_df['ETG_MEAN'] + zone_df['PPT'] zone_df['ET_LPI'] = zone_df['ETG_LPI'] + zone_df['PPT'] zone_df['ET_UPI'] = zone_df['ETG_UPI'] + zone_df['PPT'] zone_df['ET_LCI'] = zone_df['ETG_LCI'] + zone_df['PPT'] zone_df['ET_UCI'] = zone_df['ETG_UCI'] + zone_df['PPT'] # ORIGINAL PLOTTING CODE # Check that plot variables are present for plot_var in plot_var_list: if plot_var not in landsat_df.columns.values: logging.error( ' The plotting variable {} does not exist in the ' 'dataframe'.format(plot_var)) sys.exit() # if ini['INPUTS']['scene_id_keep_list']: # # Replace XXX with primary ROW value for checking skip list SCENE_ID # scene_id_df = pd.Series([ # s.replace('XXX', '{:03d}'.format(int(r))) # for s, r in zip(landsat_df['SCENE_ID'], landsat_df['ROW'])]) # landsat_df = landsat_df[scene_id_df.isin( # ini['INPUTS']['scene_id_keep_list']).values] # # This won't work: SCENE_ID have XXX but scene_id_skip_list don't # # landsat_df = landsat_df[landsat_df['SCENE_ID'].isin( # # ini['INPUTS']['scene_id_keep_list'])] # if ini['INPUTS']['scene_id_skip_list']: # # Replace XXX with primary ROW value for checking skip list SCENE_ID # scene_id_df = pd.Series([ # s.replace('XXX', '{:03d}'.format(int(r))) # for s, r in zip(landsat_df['SCENE_ID'], landsat_df['ROW'])]) # landsat_df = landsat_df[np.logical_not(scene_id_df.isin( # ini['INPUTS']['scene_id_skip_list']).values)] # # This won't work: SCENE_ID have XXX but scene_id_skip_list don't # # landsat_df = landsat_df[np.logical_not(landsat_df['SCENE_ID'].isin( # # ini['INPUTS']['scene_id_skip_list']))] # Compute colors for each QA value logging.debug(' Building column data source') qa_values = sorted(list(set(zone_df['QA'].values))) colors = { qa: "#%02x%02x%02x" % (int(r), int(g), int(b)) for qa, (r, g, b, _) in zip( qa_values, 255 * cm.viridis(mpl.colors.Normalize()(qa_values))) } logging.debug(' QA values: {}'.format( ', '.join(map(str, qa_values)))) # Unpack the data by QA type to support interactive legends sources = dict() # sources = defaultdict(dict) # platform_list = ['LT04', 'LT05', 'LE07', 'LC08'] for qa_value in qa_values: # for platform in platform_list: # qa_df = zone_df[ # (zone_df['PLATFORM'] == platform) & # (zone_df['QA'] == qa_value)] qa_df = zone_df[zone_df['QA'] == qa_value] qa_data = { 'INDEX': list(range(len(qa_df.index))), 'PLATFORM': qa_df['PLATFORM'], 'DATE': pd.to_datetime(qa_df['DATE']), 'TIME': pd.to_datetime(qa_df['DATE']).map( lambda x: x.strftime('%Y-%m-%d')), 'DOY': qa_df['DOY'].values, 'QA': qa_df['QA'].values, 'COLOR': [colors[qa] for qa in qa_df['QA'].values] } for plot_var in plot_var_list: if plot_var in qa_df.columns.values: qa_data.update({plot_var: qa_df[plot_var].values}) sources[qa_value] = bokeh.models.ColumnDataSource(qa_data) # sources[qa_value][platform] = bokeh.models.ColumnDataSource( # qa_data) tooltips = [ ("LANDSAT", "@PLATFORM"), ("DATE", "@TIME"), ("DOY", "@DOY")] # hover_tool = bokeh.models.HoverTool(tooltips=tooltips) # tools="xwheel_zoom,xpan,xbox_zoom,reset,box_select" # tools = [ # hover_tool, # bokeh.models.WheelZoomTool(dimensions='width'), # bokeh.models.PanTool(dimensions='width'), # bokeh.models.BoxZoomTool(dimensions='width'), # bokeh.models.ResetTool(), # bokeh.models.BoxSelectTool()] # Selection hover_circle = Circle( fill_color='#ff0000', line_color='#ff0000') selected_circle = Circle( fill_color='COLOR', line_color='COLOR') nonselected_circle = Circle( fill_color='#aaaaaa', line_color='#aaaaaa') # Plot the data by DOY logging.debug(' Building DOY timeseries figure') if os.path.isfile(output_doy_path): os.remove(output_doy_path) output_file(output_doy_path, title=zone_name) figure_args = dict( plot_width=750, plot_height=250, title=None, tools="xwheel_zoom,xpan,xbox_zoom,reset,box_select", # tools="xwheel_zoom,xpan,xbox_zoom,reset,tap", active_scroll="xwheel_zoom") plot_args = dict( size=4, alpha=0.9, color='COLOR') if ini['SUMMARY']['max_qa'] > 0: plot_args['legend'] = 'QA' figures = [] for plot_i, plot_var in enumerate(plot_var_list): if plot_i == 0: f = figure( # x_range=bokeh.models.Range1d(1, 366, bounds=(1, 366)), y_axis_label=plot_var, **figure_args) else: f = figure( x_range=f.x_range, y_axis_label=plot_var, **figure_args) # # Add each QA level as a separate object # for qa, platform_sources in sorted(sources.items()): # for platform, source in platform_sources.items(): # if platform == 'LT05': # r = f.triangle( # 'DOY', plot_var, source=source, **plot_args) # elif platform == 'LE07': # r = f.square( # 'DOY', plot_var, source=source, **plot_args) # elif platform == 'LC08': # r = f.circle( # 'DOY', plot_var, source=source, **plot_args) # else: # r = f.diamond( # 'DOY', plot_var, source=source, **plot_args) # r.hover_glyph = hover_circle # r.selection_glyph = selected_circle # r.nonselection_glyph = nonselected_circle # r.muted_glyph = nonselected_circle # hover_tool.renderers.append(r) # Add each QA level as a separate object for qa, source in sorted(sources.items()): r = f.circle('DOY', plot_var, source=source, **plot_args) r.hover_glyph = hover_circle r.selection_glyph = selected_circle r.nonselection_glyph = nonselected_circle r.muted_glyph = nonselected_circle # # DEADBEEF - This will display high QA points as muted # if qa > ini['SUMMARY']['max_qa']: # r.muted = True # # r.visible = False f.add_tools(bokeh.models.HoverTool(tooltips=tooltips)) # if ini['SUMMARY']['max_qa'] > 0: f.legend.location = "top_left" f.legend.click_policy = "hide" # f.legend.click_policy = "mute" f.legend.orientation = "horizontal" figures.append(f) del f # Try to not allow more than 4 plots in a column p = gridplot( figures, ncols=len(plot_var_list) // 3, sizing_mode='stretch_both') if show_flag: show(p) save(p) # Plot the data by DATE logging.debug(' Building date timeseries figure') if os.path.isfile(output_date_path): os.remove(output_date_path) output_file(output_date_path, title=zone_name) figure_args = dict( plot_width=750, plot_height=250, title=None, tools="xwheel_zoom,xpan,xbox_zoom,reset,box_select", # tools="xwheel_zoom,xpan,xbox_zoom,reset,tap", active_scroll="xwheel_zoom", x_axis_type="datetime",) plot_args = dict( size=4, alpha=0.9, color='COLOR') if ini['SUMMARY']['max_qa'] > 0: plot_args['legend'] = 'QA' figures = [] for plot_i, plot_var in enumerate(plot_var_list): if plot_i == 0: f = figure( # x_range=bokeh.models.Range1d(x_limit[0], x_limit[1], bounds=x_limit), y_axis_label=plot_var, **figure_args) else: f = figure( x_range=f.x_range, y_axis_label=plot_var, **figure_args) if plot_var == 'TS': f.y_range.bounds = (270, None) # # Add each QA level as a separate object # for qa, platform_sources in sorted(sources.items()): # for platform, source in sorted(platform_sources.items()): # if platform == 'LT05': # r = f.triangle( # 'DATE', plot_var, source=source, **plot_args) # elif platform == 'LE07': # r = f.square( # 'DATE', plot_var, source=source, **plot_args) # elif platform == 'LC08': # r = f.circle( # 'DATE', plot_var, source=source, **plot_args) # else: # r = f.diamond( # 'DATE', plot_var, source=source, **plot_args) # r.hover_glyph = hover_circle # r.selection_glyph = selected_circle # r.nonselection_glyph = nonselected_circle # r.muted_glyph = nonselected_circle # hover_tool.renderers.append(r) # Add each QA level as a separate object for qa, source in sorted(sources.items()): r = f.circle('DATE', plot_var, source=source, **plot_args) r.hover_glyph = hover_circle r.selection_glyph = selected_circle r.nonselection_glyph = nonselected_circle r.muted_glyph = nonselected_circle # # DEADBEEF - This will display high QA points as muted # if qa > ini['SUMMARY']['max_qa']: # r.muted = True # # r.visible = False f.add_tools(bokeh.models.HoverTool(tooltips=tooltips)) # if ini['SUMMARY']['max_qa'] > 0: f.legend.location = "top_left" f.legend.click_policy = "hide" # f.legend.click_policy = "mute" f.legend.orientation = "horizontal" figures.append(f) del f # Try to not allow more than 4 plots in a column p = gridplot( figures, ncols=len(plot_var_list) // 3, sizing_mode='stretch_both') if show_flag: show(p) save(p) # Pause after each iteration if show is True if show_flag: input('Press ENTER to continue')
def test_month_range(start, end, expected): assert list(utils.month_range(start, end)) == expected