def main(ini_path, overwrite_flag=True): """Earth Engine Beamer ET Zonal Stats Args: ini_path (str): overwrite_flag (bool): if True, overwrite existing files Returns: None """ logging.info('\nEarth Engine Beamer ET Zonal Stats') # Read config file ini = inputs.read(ini_path) inputs.parse_section(ini, section='INPUTS') inputs.parse_section(ini, section='SPATIAL') inputs.parse_section(ini, section='BEAMER') inputs.parse_section(ini, section='EXPORT') inputs.parse_section(ini, section='ZONAL_STATS') # Overwrite landsat products with Beamer specific values ini['EXPORT']['landsat_products'] = [ 'ndvi_toa', 'ndwi_toa', 'albedo_sur', 'ts', 'evi_sur', 'etstar_mean', 'etg_mean', 'etg_lpi', 'etg_upi', 'etg_lci', 'etg_uci', 'etg_mean', 'et_lpi', 'et_upi', 'et_lci', 'et_uci' ] # First row of csv is header header_list = [ 'ZONE_NAME', 'ZONE_FID', 'DATE', 'SCENE_ID', 'PLATFORM', 'PATH', 'ROW', 'YEAR', 'MONTH', 'DAY', 'DOY', 'PIXEL_COUNT', 'PIXEL_TOTAL', 'FMASK_COUNT', 'FMASK_TOTAL', 'FMASK_PCT', 'ETSTAR_COUNT', 'CLOUD_SCORE', 'QA', 'NDVI_TOA', 'NDWI_TOA', 'ALBEDO_SUR', 'TS', 'EVI_SUR', 'ETSTAR_MEAN', 'ETG_MEAN', 'ETG_LPI', 'ETG_UPI', 'ETG_LCI', 'ETG_UCI', 'ET_MEAN', 'ET_LPI', 'ET_UPI', 'ET_LCI', 'ET_UCI', 'WY_ETO', 'WY_PPT' ] int_fields = [ 'ZONE_FID', 'PATH', 'ROW', 'YEAR', 'MONTH', 'DAY', 'DOY', 'PIXEL_COUNT', 'PIXEL_TOTAL', 'FMASK_COUNT', 'FMASK_TOTAL', 'ETSTAR_COUNT' ] float_fields = list( set(header_list) - set(int_fields) - set(['ZONE_NAME', 'DATE', 'SCENE_ID', 'PLATFORM'])) # Regular expression to pull out Landsat scene_id # If RE has capturing groups, findall call below will fail to extract ID landsat_re = re.compile('L[ETC]0[4578]_\d{3}XXX_\d{4}\d{2}\d{2}') # landsat_re = re.compile('L[ETC][4578]\d{3}XXX\d{4}\d{3}') # landsat_re = re.compile('L[ETC][4578]\d{3}\d{3}\d{4}\d{3}\D{3}\d{2}') # Remove the existing CSV output_path = os.path.join(ini['ZONAL_STATS']['output_ws'], ini['BEAMER']['output_name']) if overwrite_flag and os.path.isfile(output_path): os.remove(output_path) # Create an empty CSV if not os.path.isfile(output_path): data_df = pd.DataFrame(columns=header_list) data_df[int_fields] = data_df[int_fields].astype(np.int64) data_df[float_fields] = data_df[float_fields].astype(np.float32) data_df.to_csv(output_path, index=False) # Get ee features from shapefile zone_geom_list = gdc.shapefile_2_geom_list_func( ini['INPUTS']['zone_shp_path'], zone_field=ini['INPUTS']['zone_field'], reverse_flag=False) # zone_count = len(zone_geom_list) # output_fmt = '_{0:0%sd}.csv' % str(int(math.log10(zone_count)) + 1) # Check if the zone_names are unique # Eventually support merging common zone_names if len(set([z[1] for z in zone_geom_list])) != len(zone_geom_list): logging.error( '\nERROR: There appear to be duplicate zone ID/name values.' '\n Currently, the values in "{}" must be unique.' '\n Exiting.'.format(ini['INPUTS']['zone_field'])) return False # Filter features by FID if ini['INPUTS']['fid_keep_list']: zone_geom_list = [ zone_obj for zone_obj in zone_geom_list if zone_obj[0] in ini['INPUTS']['fid_keep_list'] ] if ini['INPUTS']['fid_skip_list']: zone_geom_list = [ zone_obj for zone_obj in zone_geom_list if zone_obj[0] not in ini['INPUTS']['fid_skip_list'] ] # Merge geometries if ini['INPUTS']['merge_geom_flag']: merge_geom = ogr.Geometry(ogr.wkbMultiPolygon) for zone in zone_geom_list: zone_multipolygon = ogr.ForceToMultiPolygon( ogr.CreateGeometryFromJson(json.dumps(zone[2]))) for zone_polygon in zone_multipolygon: merge_geom.AddGeometry(zone_polygon) # merge_json = json.loads(merge_mp.ExportToJson()) zone_geom_list = [[ 0, ini['INPUTS']['zone_filename'], json.loads(merge_geom.ExportToJson()) ]] ini['INPUTS']['zone_field'] = '' # Set all zone specific parameters into a dictionary zone = {} # Need zone_shp_path projection to build EE geometries zone['osr'] = gdc.feature_path_osr(ini['INPUTS']['zone_shp_path']) zone['proj'] = gdc.osr_wkt(zone['osr']) # zone['proj'] = ee.Projection(zone['proj']).wkt().getInfo() # zone['proj'] = zone['proj'].replace('\n', '').replace(' ', '') # logging.debug(' Zone Projection: {}'.format(zone['proj'])) # Check that shapefile has matching spatial reference if not gdc.matching_spatref(zone['osr'], ini['SPATIAL']['osr']): logging.warning(' Zone OSR:\n{}\n'.format(zone['osr'])) logging.warning(' Output OSR:\n{}\n'.format( ini['SPATIAL']['osr'].ExportToWkt())) logging.warning(' Zone Proj4: {}'.format( zone['osr'].ExportToProj4())) logging.warning(' Output Proj4: {}'.format( ini['SPATIAL']['osr'].ExportToProj4())) logging.warning( '\nWARNING: \n' 'The output and zone spatial references do not appear to match\n' 'This will likely cause problems!') input('Press ENTER to continue') else: logging.debug(' Zone Projection:\n{}\n'.format( zone['osr'].ExportToWkt())) logging.debug(' Output Projection:\n{}\n'.format( ini['SPATIAL']['osr'].ExportToWkt())) logging.debug(' Output Cellsize: {}'.format( ini['SPATIAL']['cellsize'])) # Initialize Earth Engine API key logging.info('\nInitializing Earth Engine') ee.Initialize() utils.ee_request(ee.Number(1).getInfo()) # Read in ETo and PPT data from file if (ini['BEAMER']['eto_source'] == 'file' or ini['BEAMER']['ppt_source'] == 'file'): data_array = np.atleast_1d( np.genfromtxt(ini['BEAMER']['data_path'], delimiter=',', names=True, encoding=None, dtype=None)) data_fields = data_array.dtype.names logging.debug(' CSV fields: {}'.format(', '.join(data_fields))) # DEADBEEF - Compare fields names assuming all upper case data_fields = [f.upper() for f in data_fields] eto_dict = defaultdict(dict) ppt_dict = defaultdict(dict) for row in data_array: z = str(row[data_fields.index(ini['BEAMER']['data_zone_field'])]) y = row[data_fields.index(ini['BEAMER']['data_year_field'])] if ini['BEAMER']['eto_source'] == 'file': # DEADBEEF - Compare fields names assuming all upper case eto_dict[z][y] = row[data_fields.index( ini['BEAMER']['data_eto_field'].upper())] if ini['BEAMER']['ppt_source'] == 'file': # DEADBEEF - Compare fields names assuming all upper case ppt_dict[z][y] = row[data_fields.index( ini['BEAMER']['data_ppt_field'].upper())] # Get filtered/merged/prepped Landsat collection landsat_args = { k: v for section in ['INPUTS'] for k, v in ini[section].items() if k in [ 'landsat4_flag', 'landsat5_flag', 'landsat7_flag', 'landsat8_flag', 'fmask_flag', 'acca_flag', 'start_year', 'end_year', 'start_month', 'end_month', 'start_doy', 'end_doy', 'scene_id_keep_list', 'scene_id_skip_list', 'path_keep_list', 'row_keep_list', 'tile_geom', 'adjust_method', 'mosaic_method', 'refl_sur_method' ] } landsat_args['products'] = ini['EXPORT']['landsat_products'] landsat = ee_common.Landsat(landsat_args) # Calculate zonal stats for each feature separately for zone_fid, zone_name, zone_json in zone_geom_list: zone['fid'] = zone_fid zone['name'] = zone_name.replace(' ', '_') zone['json'] = zone_json logging.info('ZONE: {} (FID: {})'.format(zone['name'], zone['fid'])) # zone_key used for wy_ppt and wy_eto inputs from csv file if ini['INPUTS']['zone_field'] == 'FID': zone_key = str(zone['fid']) print('Using FID as zone_field') else: zone_key = zone['name'] print('Using Name as zone_field') # Build EE geometry object for zonal stats zone['geom'] = ee.Geometry(geo_json=zone['json'], opt_proj=zone['proj'], opt_geodesic=False) # logging.debug(' Centroid: {}'.format( # zone['geom'].centroid(100).getInfo()['coordinates'])) # Use feature geometry to build extent, transform, and shape zone['extent'] = gdc.Extent( ogr.CreateGeometryFromJson(json.dumps(zone['json'])).GetEnvelope()) # zone['extent'] = gdc.Extent(zone['geom'].GetEnvelope()) zone['extent'] = zone['extent'].ogrenv_swap() zone['extent'] = zone['extent'].adjust_to_snap( 'EXPAND', ini['SPATIAL']['snap_x'], ini['SPATIAL']['snap_y'], ini['SPATIAL']['cellsize']) zone['geo'] = zone['extent'].geo(ini['SPATIAL']['cellsize']) zone['transform'] = gdc.geo_2_ee_transform(zone['geo']) # zone['transform'] = '[' + ','.join(map(str, zone['transform'])) + ']' zone['shape'] = zone['extent'].shape(ini['SPATIAL']['cellsize']) logging.debug(' Zone Shape: {}'.format(zone['shape'])) logging.debug(' Zone Transform: {}'.format(zone['transform'])) logging.debug(' Zone Extent: {}'.format(zone['extent'])) # logging.debug(' Zone Geom: {}'.format(zone['geom'].getInfo())) # Assume all pixels in all 14+2 images could be reduced zone['max_pixels'] = zone['shape'][0] * zone['shape'][1] logging.debug(' Max Pixels: {}'.format(zone['max_pixels'])) # Set output spatial reference # Eventually allow user to manually set these # output_crs = zone['proj'] # ini['INPUTS']['transform'] = zone['transform'] logging.debug(' Output Projection: {}'.format(ini['SPATIAL']['crs'])) logging.debug(' Output Transform: {}'.format(zone['transform'])) # Process date range by year start_dt = datetime.datetime(ini['INPUTS']['start_year'], 1, 1) end_dt = datetime.datetime(ini['INPUTS']['end_year'] + 1, 1, 1) - datetime.timedelta(0, 1) iter_months = ini['BEAMER']['month_step'] for i, iter_start_dt in enumerate( rrule.rrule( # rrule.YEARLY, interval=interval_cnt, rrule.MONTHLY, interval=iter_months, dtstart=start_dt, until=end_dt)): iter_end_dt = ( iter_start_dt + # relativedelta.relativedelta(years=interval_cnt) - relativedelta.relativedelta(months=iter_months) - datetime.timedelta(0, 1)) if ((ini['INPUTS']['start_month'] and iter_end_dt.month < ini['INPUTS']['start_month']) or (ini['INPUTS']['end_month'] and iter_start_dt.month > ini['INPUTS']['end_month'])): logging.debug(' {} {} skipping'.format( iter_start_dt.date(), iter_end_dt.date())) continue elif ( (ini['INPUTS']['start_doy'] and int(iter_end_dt.strftime('%j')) < ini['INPUTS']['start_doy']) or (ini['INPUTS']['end_doy'] and int(iter_start_dt.strftime('%j')) > ini['INPUTS']['end_doy'])): logging.debug(' {} {} skipping'.format( iter_start_dt.date(), iter_end_dt.date())) continue else: logging.info(' {} {}'.format(iter_start_dt.date(), iter_end_dt.date())) year = iter_start_dt.year # Filter the GRIDMET collection wy_start_date = '{}-10-01'.format(year - 1) wy_end_date = '{}-10-01'.format(year) logging.debug(' WY: {} {}'.format(wy_start_date, wy_end_date)) gridmet_coll = ee.ImageCollection('IDAHO_EPSCOR/GRIDMET') \ .filterDate(wy_start_date, wy_end_date) # # PRISM collection was uploaded as an asset # if ini['BEAMER']['ppt_source'] == 'prism': # def prism_time_start(input_image): # """Set time_start property on PRISM water year PPT collection""" # # Assume year is the 4th item separated by "_" # wy = ee.String( # input_image.get('system:index')).split('_').get(3) # date_start = ee.Date(ee.String(wy).cat('-10-01')) # return input_image.select([0], ['ppt']).setMulti({ # 'system:time_start': date_start.advance(-1, 'year').millis() # }) # prism_coll = ee.ImageCollection('users/cgmorton/prism_800m_ppt_wy') # prism_coll = ee.ImageCollection(prism_coll.map(prism_time_start)) \ # .filterDate(wy_start_dt, wy_end_dt) # # prism_coll = ee.ImageCollection( # # ee_common.MapsEngineAssets.prism_ppt_wy).filterDate( # # wy_start_dt, wy_end_dt) # Get water year PPT for centroid of zone or read from file # Convert all input data to mm to match GRIDMET data if ini['BEAMER']['ppt_source'] == 'file': wy_ppt_input = ppt_dict[zone_key][year] if ini['BEAMER']['data_ppt_units'] == 'mm': pass elif ini['BEAMER']['data_ppt_units'] == 'in': wy_ppt_input *= 25.4 elif ini['BEAMER']['data_ppt_units'] == 'ft': wy_ppt_input *= (25.4 * 12) elif ini['BEAMER']['ppt_source'] == 'gridmet': wy_ppt_input = float( utils.ee_getinfo( ee.ImageCollection( gridmet_coll.select(['pr'], ['ppt']).sum()).getRegion( zone['geom'].centroid(1), 500))[1][4]) # Calculate GRIDMET zonal mean of geometry # wy_ppt_input = float(ee.ImageCollection( # gridmet_coll.select(['pr'], ['ppt'])).reduceRegion( # reducer=ee.Reducer.sum(), # geometry=zone['geom'], # crs=ini['SPATIAL']['crs'], # crsTransform=zone['transform'], # bestEffort=False, # tileScale=1).getInfo()['ppt'] # elif ini['BEAMER']['ppt_source'] == 'prism': # # Calculate PRISM zonal mean of geometry # wy_ppt_input = float(utils.ee_getinfo(ee.ImageCollection( # prism_coll.map(ee_common.prism_ppt_func)).sum().reduceRegion( # reducer=ee.Reducer.mean(), # geometry=zone['geom'], # crs=ini['SPATIAL']['crs'], # crsTransform=zone['transform'], # bestEffort=False, # tileScale=1))['ppt']) # Get water year ETo for centroid of zone or read from file # Convert all input data to mm for Beamer Method if ini['BEAMER']['eto_source'] == 'FILE': wy_eto_input = eto_dict[zone_key][year] if ini['BEAMER']['data_eto_units'] == 'mm': pass elif ini['BEAMER']['data_eto_units'] == 'in': wy_eto_input *= 25.4 elif ini['BEAMER']['data_eto_units'] == 'ft': wy_eto_input *= (25.4 * 12) # This assumes GRIMET data is in millimeters elif ini['BEAMER']['eto_source'] == 'gridmet': wy_eto_input = float( utils.ee_getinfo( ee.ImageCollection(gridmet_coll.select( ['eto']).sum()).getRegion(zone['geom'].centroid(1), 500))[1][4]) # wy_eto_input = float(ee.ImageCollection( # gridmet_coll.select(['eto'])).reduceRegion( # reducer=ee.Reducer.sum(), # geometry=zone['geom'], # crs=ini['SPATIAL']['crs'], # crsTransform=zone['transform'], # bestEffort=False, # tileScale=1).getInfo() logging.debug(' Input ETO: {} mm PPT: {} mm'.format( wy_eto_input, wy_ppt_input)) # Scale ETo & PPT wy_eto_input *= ini['BEAMER']['eto_factor'] wy_ppt_input *= ini['BEAMER']['ppt_factor'] # Convert output units from mm wy_eto_output = wy_eto_input wy_ppt_output = wy_ppt_input if ini['BEAMER']['ppt_units'] == 'mm': pass elif ini['BEAMER']['ppt_units'] == 'in': wy_ppt_output /= 25.4 elif ini['BEAMER']['ppt_units'] == 'ft': wy_ppt_output /= (25.4 * 12) if ini['BEAMER']['eto_units'] == 'mm': pass elif ini['BEAMER']['eto_units'] == 'in': wy_eto_output /= 25.4 elif ini['BEAMER']['eto_units'] == 'ft': wy_eto_output /= (25.4 * 12) logging.debug(' Output ETO: {} {} PPT: {} {}'.format( wy_eto_output, ini['BEAMER']['eto_units'], wy_ppt_output, ini['BEAMER']['ppt_units'])) # Initialize the Landsat object landsat.zone_geom = zone['geom'] landsat.start_date = iter_start_dt.strftime('%Y-%m-%d') landsat.end_date = iter_end_dt.strftime('%Y-%m-%d') landsat_coll = landsat.get_collection() if ee.Image(landsat_coll.first()).getInfo() is None: logging.info(' No images, skipping') continue # # Print the collection SCENE_ID list # logging.debug('{}'.format(', '.join([ # f['properties']['SCENE_ID'] # for f in landsat_coll.getInfo()['features']]))) # input('ENTER') # Add water year ETo and PPT values to each image def eto_ppt_func(img): """""" return ee.Image(img).setMulti({ 'wy_eto': wy_eto_input, 'wy_ppt': wy_ppt_input }) landsat_coll = ee.ImageCollection(landsat_coll.map(eto_ppt_func)) # Compute ETg image_coll = ee.ImageCollection(landsat_coll.map(landsat_etg_func)) # # Get the output image URL # output_url = ee.Image(landsat_coll.first()) \ # .select(['red', 'green', 'blue']) \ # .visualize(min=[0, 0, 0], max=[0.4, 0.4, 0.4]) \ # .getThumbUrl({'format': 'png', 'size': '600'}) # # This would load the image in your browser # import webbrowser # webbrowser.open(output_url) # # webbrowser.read(output_url) # # Show the output image # window = tk.Tk() # output_file = Image.open(io.BytesIO(urllib.urlopen(output_url).read())) # output_photo = ImageTk.PhotoImage(output_file) # label = tk.Label(window, image=output_photo) # label.pack() # window.mainloop() # Compute zonal stats of polygon def beamer_zonal_stats_func(input_image): """""" # Beamer function adds 5 ETg and 1 ET* band # Landsat collection adds 3 ancillary bands bands = len(landsat_args['products']) + 3 + 6 # .clip(zone['geom']) \ input_mean = input_image \ .reduceRegion( reducer=ee.Reducer.mean(), geometry=zone['geom'], crs=ini['SPATIAL']['crs'], crsTransform=zone['transform'], bestEffort=False, tileScale=1, maxPixels=zone['max_pixels'] * bands) fmask_img = input_image.select(['fmask']) input_count = fmask_img.gt(1) \ .addBands(fmask_img.gte(0).unmask()) \ .rename(['fmask', 'pixel']) \ .reduceRegion( reducer=ee.Reducer.sum().combine( ee.Reducer.count(), '', True), geometry=zone['geom'], crs=ini['SPATIAL']['crs'], crsTransform=zone['transform'], bestEffort=False, tileScale=1, maxPixels=zone['max_pixels'] * 3) etstar_count = input_image \ .select(['etstar_mean'], ['etstar_count']) \ .lte(ini['BEAMER']['etstar_threshold']) \ .reduceRegion( reducer=ee.Reducer.sum(), geometry=zone['geom'], crs=ini['SPATIAL']['crs'], crsTransform=zone['transform'], bestEffort=False, tileScale=1, maxPixels=zone['max_pixels'] * 2) # Save as image properties return ee.Feature( None, { 'scene_id': ee.String(input_image.get('SCENE_ID')), 'time': input_image.get('system:time_start'), 'row': input_mean.get('row'), 'pixel_count': input_count.get('pixel_sum'), 'pixel_total': input_count.get('pixel_count'), 'fmask_count': input_count.get('fmask_sum'), 'fmask_total': input_count.get('fmask_count'), 'cloud_score': input_mean.get('cloud_score'), 'etstar_count': etstar_count.get('etstar_count'), 'ndvi_toa': input_mean.get('ndvi_toa'), 'ndwi_toa': input_mean.get('ndwi_toa'), 'albedo_sur': input_mean.get('albedo_sur'), 'ts': input_mean.get('ts'), 'evi_sur': input_mean.get('evi_sur'), 'etstar_mean': input_mean.get('etstar_mean'), 'etg_mean': input_mean.get('etg_mean'), 'etg_lpi': input_mean.get('etg_lpi'), 'etg_upi': input_mean.get('etg_upi'), 'etg_lci': input_mean.get('etg_lci'), 'etg_uci': input_mean.get('etg_uci') }) # Calculate values and statistics stats_coll = ee.ImageCollection( image_coll.map(beamer_zonal_stats_func)) # # DEADBEEF - Test the function for a single image # stats_info = beamer_zonal_stats_func( # ee.Image(image_coll.first())).getInfo() # print(stats_info) # for k, v in sorted(stats_info['properties'].items()): # logging.info('{:24s}: {}'.format(k, v)) # input('ENTER') # return False # # DEADBEEF - Print the stats info to the screen # stats_info = stats_coll.getInfo() # import pprint # pp = pprint.PrettyPrinter(indent=4) # for ftr in stats_info['features']: # pp.pprint(ftr) # input('ENTER') # # return False # Get the values from EE stats_desc = utils.ee_getinfo(stats_coll) if stats_desc is None: logging.error(' Timeout error, skipping') continue # Save data for writing row_list = [] for ftr in stats_desc['features']: try: count = int(ftr['properties']['pixel_count']) except (KeyError, TypeError) as e: # logging.debug(' Exception: {}'.format(e)) continue if count == 0: logging.info(' COUNT: 0, skipping') continue # First get scene ID and time try: scene_id = landsat_re.findall( ftr['properties']['scene_id'])[0] scene_time = datetime.datetime.utcfromtimestamp( float(ftr['properties']['time']) / 1000) except: pp = pprint.PrettyPrinter(indent=4) pp.pprint(ftr) input('ENTER') # Extract and save other properties try: row_list.append({ 'ZONE_FID': zone_fid, 'ZONE_NAME': zone_name, 'SCENE_ID': scene_id, 'PLATFORM': scene_id[0:4], 'PATH': int(scene_id[5:8]), 'ROW': int(ftr['properties']['row']), # 'ROW': int(scene_id[8:11]), 'DATE': scene_time.date().isoformat(), 'YEAR': int(scene_time.year), 'MONTH': int(scene_time.month), 'DAY': int(scene_time.day), 'DOY': int(scene_time.strftime('%j')), 'PIXEL_COUNT': int(ftr['properties']['pixel_count']), 'PIXEL_TOTAL': int(ftr['properties']['pixel_total']), 'FMASK_COUNT': int(ftr['properties']['fmask_count']), 'FMASK_TOTAL': int(ftr['properties']['fmask_total']), 'CLOUD_SCORE': float(ftr['properties']['cloud_score']), 'ETSTAR_COUNT': int(ftr['properties']['etstar_count']), 'NDVI_TOA': float(ftr['properties']['ndvi_toa']), 'NDWI_TOA': float(ftr['properties']['ndwi_toa']), 'ALBEDO_SUR': float(ftr['properties']['albedo_sur']), 'TS': float(ftr['properties']['ts']), 'EVI_SUR': float(ftr['properties']['evi_sur']), 'ETSTAR_MEAN': float(ftr['properties']['etstar_mean']), 'ETG_MEAN': float(ftr['properties']['etg_mean']), 'ETG_LPI': float(ftr['properties']['etg_lpi']), 'ETG_UPI': float(ftr['properties']['etg_upi']), 'ETG_LCI': float(ftr['properties']['etg_lci']), 'ETG_UCI': float(ftr['properties']['etg_uci']), 'WY_ETO': wy_eto_output, 'WY_PPT': wy_ppt_output }) except (KeyError, TypeError) as e: logging.info(' ERROR: {}\n SCENE_ID: {}\n ' ' There may not be an SR image to join to\n' ' {}'.format(e, scene_id, ftr['properties'])) # input('ENTER') # Save all values to the dataframe (and export) if row_list: logging.debug(' Appending') data_df = data_df.append(row_list, ignore_index=True) # DEADBEEF if data_df['QA'].isnull().any(): data_df.loc[data_df['QA'].isnull(), 'QA'] = 0 fmask_mask = data_df['FMASK_TOTAL'] > 0 if fmask_mask.any(): data_df.loc[fmask_mask, 'FMASK_PCT'] = 100.0 * ( data_df.loc[fmask_mask, 'FMASK_COUNT'] / data_df.loc[fmask_mask, 'FMASK_TOTAL']) logging.debug(' Saving') data_df[int_fields] = data_df[int_fields].astype(np.int64) data_df[float_fields] = data_df[float_fields].astype( np.float32) # Compute ET from ETg and PPT offline # (must be after float conversion above) data_df['ET_MEAN'] = data_df['ETG_MEAN'] + data_df['WY_PPT'] data_df['ET_LPI'] = data_df['ETG_LPI'] + data_df['WY_PPT'] data_df['ET_UPI'] = data_df['ETG_UPI'] + data_df['WY_PPT'] data_df['ET_LCI'] = data_df['ETG_LCI'] + data_df['WY_PPT'] data_df['ET_UCI'] = data_df['ETG_UCI'] + data_df['WY_PPT'] # Convert float fields to objects, set NaN to None for field in data_df.columns.values: if field.upper() not in float_fields: continue data_df[field] = data_df[field].astype(object) null_mask = data_df[field].isnull() data_df.loc[null_mask, field] = None data_df.loc[~null_mask, field] = data_df.loc[~null_mask, field].map( lambda x: '{0:10.6f}'.format(x).strip()) # data_df.loc[~null_mask, [field]] = data_df.loc[~null_mask, [field]].apply( # lambda x: '{0:10.6f}'.format(x[0]).strip(), axis=1) # data_df = data_df.reindex_axis(header_list, axis=1) data_df = data_df.reindex(header_list, axis=1) # data_df.reset_index(drop=False, inplace=True) data_df.sort_values(['ZONE_FID', 'DATE', 'ROW'], ascending=True, inplace=True) # data_df.sort( # ['ZONE_NAME', 'DATE'], ascending=[True, True], inplace=True) data_df.to_csv(output_path, index=False) del row_list
def main(ini_path, overwrite_flag=True): """Generate Beamer ETg summary tables Args: ini_path (str): overwrite_flag (bool): if True, overwrite existing tables Default is True (for now) """ logging.info('\nGenerate Beamer ETg summary tables') # # Eventually get from INI (like ini['BEAMER']['landsat_products']) # daily_fields = [ # 'ZONE_NAME', 'ZONE_FID', 'DATE', 'SCENE_ID', 'PLATFORM', 'PATH', 'ROW', # 'YEAR', 'MONTH', 'DAY', 'DOY', 'WATER_YEAR', # 'PIXEL_COUNT', 'ETSTAR_COUNT', # 'NDVI_TOA', 'NDWI_TOA', 'ALBEDO_SUR', 'TS', 'EVI_SUR', 'ETSTAR_MEAN', # 'ETG_MEAN', 'ETG_LPI', 'ETG_UPI', 'ETG_LCI', 'ETG_UCI', # 'ET_MEAN', 'ET_LPI', 'ET_UPI', 'ET_LCI', 'ET_UCI', # 'ETO', 'PPT'] # annual_fields = [ # 'SCENE_COUNT', 'PIXEL_COUNT', 'ETSTAR_COUNT', # 'NDVI_TOA', 'NDWI_TOA', 'ALBEDO_SUR', 'TS', # 'EVI_SUR_MEAN', 'EVI_SUR_MEDIAN', 'EVI_SUR_MIN', 'EVI_SUR_MAX', # 'ETSTAR_MEAN', # 'ETG_MEAN', 'ETG_LPI', 'ETG_UPI', 'ETG_LCI', 'ETG_UCI', # 'ET_MEAN', 'ET_LPI', 'ET_UPI', 'ET_LCI', 'ET_UCI', # 'ETO', 'PPT'] # For unit conversion eto_fields = [ 'ETG_MEAN', 'ETG_LPI', 'ETG_UPI', 'ETG_LCI', 'ETG_UCI', 'ET_MEAN', 'ET_LPI', 'ET_UPI', 'ET_LCI', 'ET_UCI', 'ETO' ] ppt_fields = ['PPT'] # Read config file ini = inputs.read(ini_path) inputs.parse_section(ini, section='INPUTS') inputs.parse_section(ini, section='ZONAL_STATS') inputs.parse_section(ini, section='BEAMER') inputs.parse_section(ini, section='SUMMARY') inputs.parse_section(ini, section='TABLES') # Hardcode GRIDMET month range to the water year ini['SUMMARY']['gridmet_start_month'] = 10 ini['SUMMARY']['gridmet_end_month'] = 9 # Output paths output_daily_path = os.path.join( ini['SUMMARY']['output_ws'], ini['BEAMER']['output_name'].replace('.csv', '_daily.xlsx')) output_annual_path = os.path.join( ini['SUMMARY']['output_ws'], ini['BEAMER']['output_name'].replace('.csv', '_annual.xlsx')) # Check if files already exist if overwrite_flag: if os.path.isfile(output_daily_path): os.remove(output_daily_path) if os.path.isfile(output_annual_path): os.remove(output_annual_path) else: if (os.path.isfile(output_daily_path) and os.path.isfile(output_annual_path)): logging.info('\nOutput files already exist and ' 'overwrite is False, exiting') return True # Start/end year year_list = list( range(ini['INPUTS']['start_year'], ini['INPUTS']['end_year'] + 1)) month_list = list( utils.wrapped_range(ini['INPUTS']['start_month'], ini['INPUTS']['end_month'], 1, 12)) doy_list = list( utils.wrapped_range(ini['INPUTS']['start_doy'], ini['INPUTS']['end_doy'], 1, 366)) # GRIDMET month range (default to water year) gridmet_start_month = ini['SUMMARY']['gridmet_start_month'] gridmet_end_month = ini['SUMMARY']['gridmet_end_month'] gridmet_months = list( utils.month_range(gridmet_start_month, gridmet_end_month)) logging.info('\nGridmet months: {}'.format(', '.join( map(str, gridmet_months)))) # Get ee features from shapefile zone_geom_list = gdc.shapefile_2_geom_list_func( ini['INPUTS']['zone_shp_path'], zone_field=ini['INPUTS']['zone_field'], reverse_flag=False) # Filter features by FID before merging geometries if ini['INPUTS']['fid_keep_list']: zone_geom_list = [ zone_obj for zone_obj in zone_geom_list if zone_obj[0] in ini['INPUTS']['fid_keep_list'] ] if ini['INPUTS']['fid_skip_list']: zone_geom_list = [ zone_obj for zone_obj in zone_geom_list if zone_obj[0] not in ini['INPUTS']['fid_skip_list'] ] # # Filter features by FID before merging geometries # if ini['INPUTS']['fid_keep_list']: # landsat_df = landsat_df[landsat_df['ZONE_FID'].isin( # ini['INPUTS']['fid_keep_list'])] # if ini['INPUTS']['fid_skip_list']: # landsat_df = landsat_df[~landsat_df['ZONE_FID'].isin( # ini['INPUTS']['fid_skip_list'])] logging.info('\nProcessing zones') zone_df_dict = {} for zone_fid, zone_name, zone_json in zone_geom_list: zone_name = zone_name.replace(' ', '_') logging.info('ZONE: {} (FID: {})'.format(zone_name, zone_fid)) zone_stats_ws = os.path.join(ini['ZONAL_STATS']['output_ws'], zone_name) if not os.path.isdir(zone_stats_ws): logging.debug( ' Folder {} does not exist, skipping'.format(zone_stats_ws)) continue # Input paths landsat_daily_path = os.path.join( zone_stats_ws, '{}_landsat_daily.csv'.format(zone_name)) gridmet_daily_path = os.path.join( zone_stats_ws, '{}_gridmet_daily.csv'.format(zone_name)) gridmet_monthly_path = os.path.join( zone_stats_ws, '{}_gridmet_monthly.csv'.format(zone_name)) if not os.path.isfile(landsat_daily_path): logging.error(' Landsat daily CSV does not exist, skipping zone') continue elif (not os.path.isfile(gridmet_daily_path) and not os.path.isfile(gridmet_monthly_path)): logging.error( ' GRIDMET daily or monthly CSV does not exist, skipping zone') continue # DEADBEEF - Eventually support generating only Landsat figures # logging.error( # ' GRIDMET daily and/or monthly CSV files do not exist.\n' # ' ETo and PPT will not be processed.') logging.debug(' Reading Landsat CSV') landsat_df = pd.read_csv(landsat_daily_path) logging.debug(' Filtering Landsat dataframe') landsat_df = landsat_df[landsat_df['PIXEL_COUNT'] > 0] # QA field should have been written in zonal stats code # Eventually this block can be removed if 'QA' not in landsat_df.columns.values: landsat_df['QA'] = 0 # # This assumes that there are L5/L8 images in the dataframe # if not landsat_df.empty: # max_pixel_count = max(landsat_df['PIXEL_COUNT']) # else: # max_pixel_count = 0 if year_list: landsat_df = landsat_df[landsat_df['YEAR'].isin(year_list)] if month_list: landsat_df = landsat_df[landsat_df['MONTH'].isin(month_list)] if doy_list: landsat_df = landsat_df[landsat_df['DOY'].isin(doy_list)] # Assume the default is for these to be True and only filter if False if not ini['INPUTS']['landsat4_flag']: landsat_df = landsat_df[landsat_df['PLATFORM'] != 'LT04'] if not ini['INPUTS']['landsat5_flag']: landsat_df = landsat_df[landsat_df['PLATFORM'] != 'LT05'] if not ini['INPUTS']['landsat7_flag']: landsat_df = landsat_df[landsat_df['PLATFORM'] != 'LE07'] if not ini['INPUTS']['landsat8_flag']: landsat_df = landsat_df[landsat_df['PLATFORM'] != 'LC08'] if ini['INPUTS']['path_keep_list']: landsat_df = landsat_df[landsat_df['PATH'].isin( ini['INPUTS']['path_keep_list'])] if (ini['INPUTS']['row_keep_list'] and ini['INPUTS']['row_keep_list'] != ['XXX']): landsat_df = landsat_df[landsat_df['ROW'].isin( ini['INPUTS']['row_keep_list'])] if ini['INPUTS']['scene_id_keep_list']: # Replace XXX with primary ROW value for checking skip list SCENE_ID scene_id_df = pd.Series([ s.replace('XXX', '{:03d}'.format(int(r))) for s, r in zip(landsat_df['SCENE_ID'], landsat_df['ROW']) ]) landsat_df = landsat_df[scene_id_df.isin( ini['INPUTS']['scene_id_keep_list']).values] # This won't work: SCENE_ID have XXX but scene_id_skip_list don't # landsat_df = landsat_df[landsat_df['SCENE_ID'].isin( # ini['INPUTS']['scene_id_keep_list'])] if ini['INPUTS']['scene_id_skip_list']: # Replace XXX with primary ROW value for checking skip list SCENE_ID scene_id_df = pd.Series([ s.replace('XXX', '{:03d}'.format(int(r))) for s, r in zip(landsat_df['SCENE_ID'], landsat_df['ROW']) ]) landsat_df = landsat_df[np.logical_not( scene_id_df.isin(ini['INPUTS']['scene_id_skip_list']).values)] # This won't work: SCENE_ID have XXX but scene_id_skip_list don't # landsat_df = landsat_df[np.logical_not(landsat_df['SCENE_ID'].isin( # ini['INPUTS']['scene_id_skip_list']))] # Filter by QA/QC value if ini['SUMMARY']['max_qa'] >= 0 and not landsat_df.empty: logging.debug(' Maximum QA: {0}'.format( ini['SUMMARY']['max_qa'])) landsat_df = landsat_df[ landsat_df['QA'] <= ini['SUMMARY']['max_qa']] # Filter by average cloud score if ini['SUMMARY']['max_cloud_score'] < 100 and not landsat_df.empty: logging.debug(' Maximum cloud score: {0}'.format( ini['SUMMARY']['max_cloud_score'])) landsat_df = landsat_df[ landsat_df['CLOUD_SCORE'] <= ini['SUMMARY']['max_cloud_score']] # Filter by Fmask percentage if ini['SUMMARY']['max_fmask_pct'] < 100 and not landsat_df.empty: landsat_df['FMASK_PCT'] = 100 * (landsat_df['FMASK_COUNT'] / landsat_df['FMASK_TOTAL']) logging.debug(' Max Fmask threshold: {}'.format( ini['SUMMARY']['max_fmask_pct'])) landsat_df = landsat_df[ landsat_df['FMASK_PCT'] <= ini['SUMMARY']['max_fmask_pct']] # Filter low count SLC-off images if ini['SUMMARY']['min_slc_off_pct'] > 0 and not landsat_df.empty: logging.debug(' Mininum SLC-off threshold: {}%'.format( ini['SUMMARY']['min_slc_off_pct'])) # logging.debug(' Maximum pixel count: {}'.format( # max_pixel_count)) slc_off_mask = ((landsat_df['PLATFORM'] == 'LE07') & ((landsat_df['YEAR'] >= 2004) | ((landsat_df['YEAR'] == 2003) & (landsat_df['DOY'] > 151)))) slc_off_pct = 100 * (landsat_df['PIXEL_COUNT'] / landsat_df['PIXEL_TOTAL']) # slc_off_pct = 100 * (landsat_df['PIXEL_COUNT'] / max_pixel_count) landsat_df = landsat_df[( (slc_off_pct >= ini['SUMMARY']['min_slc_off_pct']) & slc_off_mask) | (~slc_off_mask)] if landsat_df.empty: logging.error( ' Empty Landsat dataframe after filtering, skipping zone') continue # Aggregate GRIDMET (to water year) if os.path.isfile(gridmet_monthly_path): logging.debug(' Reading montly GRIDMET CSV') gridmet_df = pd.read_csv(gridmet_monthly_path) elif os.path.isfile(gridmet_daily_path): logging.debug(' Reading daily GRIDMET CSV') gridmet_df = pd.read_csv(gridmet_daily_path) logging.debug(' Computing GRIDMET summaries') # Summarize GRIDMET for target months year if (gridmet_start_month in [10, 11, 12] and gridmet_end_month in [10, 11, 12]): month_mask = ((gridmet_df['MONTH'] >= gridmet_start_month) & (gridmet_df['MONTH'] <= gridmet_end_month)) gridmet_df.loc[month_mask, 'GROUP_YEAR'] = gridmet_df['YEAR'] + 1 elif (gridmet_start_month in [10, 11, 12] and gridmet_end_month not in [10, 11, 12]): month_mask = gridmet_df['MONTH'] >= gridmet_start_month gridmet_df.loc[month_mask, 'GROUP_YEAR'] = gridmet_df['YEAR'] + 1 month_mask = gridmet_df['MONTH'] <= gridmet_end_month gridmet_df.loc[month_mask, 'GROUP_YEAR'] = gridmet_df['YEAR'] else: month_mask = ((gridmet_df['MONTH'] >= gridmet_start_month) & (gridmet_df['MONTH'] <= gridmet_end_month)) gridmet_df.loc[month_mask, 'GROUP_YEAR'] = gridmet_df['YEAR'] # GROUP_YEAR for rows not in the GRIDMET month range will be NAN gridmet_df = gridmet_df[~pd.isnull(gridmet_df['GROUP_YEAR'])] if year_list: gridmet_df = gridmet_df[gridmet_df['GROUP_YEAR'].isin(year_list)] if gridmet_df.empty: logging.error( ' Empty GRIDMET dataframe after filtering by year') continue # Group GRIDMET data by user specified range (default is water year) gridmet_group_df = gridmet_df \ .groupby(['ZONE_NAME', 'ZONE_FID', 'GROUP_YEAR']) \ .agg({'ETO': np.sum, 'PPT': np.sum}) \ .reset_index() \ .sort_values(by='GROUP_YEAR') # .rename(columns={'ETO': 'ETO', 'PPT': 'PPT'}) \ # Rename wasn't working when chained... gridmet_group_df.rename(columns={'GROUP_YEAR': 'YEAR'}, inplace=True) gridmet_group_df['YEAR'] = gridmet_group_df['YEAR'].astype(int) # # Group GRIDMET data by month # gridmet_month_df = gridmet_df\ # .groupby(['ZONE_NAME', 'ZONE_FID', 'GROUP_YEAR', 'MONTH']) \ # .agg({'ETO': np.sum, 'PPT': np.sum}) \ # .reset_index() \ # .sort_values(by=['GROUP_YEAR', 'MONTH']) # gridmet_month_df.rename(columns={'GROUP_YEAR': 'YEAR'}, inplace=True) # # Rename monthly PPT columns # gridmet_month_df['MONTH'] = 'PPT_M' + gridmet_month_df['MONTH'].astype(str) # # Pivot rows up to separate columns # gridmet_month_df = gridmet_month_df.pivot_table( # 'PPT', ['ZONE_NAME', 'YEAR'], 'MONTH') # gridmet_month_df.reset_index(inplace=True) # columns = ['ZONE_NAME', 'YEAR'] + ['PPT_M{}'.format(m) for m in gridmet_months] # gridmet_month_df = gridmet_month_df[columns] # del gridmet_month_df.index.name # Merge Landsat and GRIDMET collections zone_df = landsat_df.merge(gridmet_group_df, on=['ZONE_NAME', 'ZONE_FID', 'YEAR']) if zone_df is None or zone_df.empty: logging.info(' Empty zone dataframe, not generating figures') continue # Compute ETg zone_df['ETG_MEAN'] = zone_df['ETSTAR_MEAN'] * (zone_df['ETO'] - zone_df['PPT']) zone_df['ETG_LPI'] = zone_df['ETSTAR_LPI'] * (zone_df['ETO'] - zone_df['PPT']) zone_df['ETG_UPI'] = zone_df['ETSTAR_UPI'] * (zone_df['ETO'] - zone_df['PPT']) zone_df['ETG_LCI'] = zone_df['ETSTAR_LCI'] * (zone_df['ETO'] - zone_df['PPT']) zone_df['ETG_UCI'] = zone_df['ETSTAR_UCI'] * (zone_df['ETO'] - zone_df['PPT']) # Compute ET zone_df['ET_MEAN'] = zone_df['ETG_MEAN'] + zone_df['PPT'] zone_df['ET_LPI'] = zone_df['ETG_LPI'] + zone_df['PPT'] zone_df['ET_UPI'] = zone_df['ETG_UPI'] + zone_df['PPT'] zone_df['ET_LCI'] = zone_df['ETG_LCI'] + zone_df['PPT'] zone_df['ET_UCI'] = zone_df['ETG_UCI'] + zone_df['PPT'] # Append zone dataframes zone_df_dict[zone_name] = zone_df # Export each zone to a separate tab if not os.path.isfile(output_daily_path): logging.info('\nWriting daily values to Excel') excel_f = ExcelWriter(output_daily_path) for zone_name, zone_df in sorted(zone_df_dict.items()): logging.info(' {}'.format(zone_name)) zone_df.to_excel(excel_f, zone_name, index=False, float_format='%.4f') # zone_df.to_excel(excel_f, zone_name, index=False) del zone_df excel_f.save() if not os.path.isfile(output_annual_path): logging.info('\nComputing annual summaries') annual_df = pd.concat(list(zone_df_dict.values())) \ .groupby(['ZONE_NAME', 'YEAR']) \ .agg({ 'PIXEL_COUNT': ['count', 'mean'], 'PIXEL_TOTAL': ['mean'], 'FMASK_COUNT': 'mean', 'FMASK_TOTAL': 'mean', 'CLOUD_SCORE': 'mean', 'ETSTAR_COUNT': 'mean', 'NDVI_TOA': 'mean', 'NDWI_TOA': 'mean', 'ALBEDO_SUR': 'mean', 'TS': 'mean', # 'EVI_SUR': 'mean', 'EVI_SUR': ['mean', 'median', 'min', 'max'], 'ETSTAR_MEAN': 'mean', 'ETG_MEAN': 'mean', 'ETG_LPI': 'mean', 'ETG_UPI': 'mean', 'ETG_LCI': 'mean', 'ETG_UCI': 'mean', 'ET_MEAN': 'mean', 'ET_LPI': 'mean', 'ET_UPI': 'mean', 'ET_LCI': 'mean', 'ET_UCI': 'mean', 'ETO': 'mean', 'PPT': 'mean' }) annual_df.columns = annual_df.columns.map('_'.join) annual_df = annual_df.rename(columns={ 'PIXEL_COUNT_count': 'SCENE_COUNT', 'PIXEL_COUNT_mean': 'PIXEL_COUNT' }) annual_df = annual_df.rename( columns={ 'EVI_SUR_mean': 'EVI_SUR_MEAN', 'EVI_SUR_median': 'EVI_SUR_MEDIAN', 'EVI_SUR_min': 'EVI_SUR_MIN', 'EVI_SUR_max': 'EVI_SUR_MAX' }) annual_df.rename(columns=lambda x: str(x).replace('_mean', ''), inplace=True) annual_df['SCENE_COUNT'] = annual_df['SCENE_COUNT'].astype(np.int) annual_df['PIXEL_COUNT'] = annual_df['PIXEL_COUNT'].astype(np.int) annual_df['PIXEL_TOTAL'] = annual_df['PIXEL_TOTAL'].astype(np.int) annual_df['FMASK_COUNT'] = annual_df['FMASK_COUNT'].astype(np.int) annual_df['FMASK_TOTAL'] = annual_df['FMASK_TOTAL'].astype(np.int) annual_df['ETSTAR_COUNT'] = annual_df['ETSTAR_COUNT'].astype(np.int) annual_df = annual_df.reset_index() # Convert ETo units if (ini['BEAMER']['eto_units'] == 'mm' and ini['TABLES']['eto_units'] == 'mm'): pass elif (ini['BEAMER']['eto_units'] == 'mm' and ini['TABLES']['eto_units'] == 'in'): annual_df[eto_fields] /= (25.4) elif (ini['BEAMER']['eto_units'] == 'mm' and ini['TABLES']['eto_units'] == 'ft'): annual_df[eto_fields] /= (12 * 25.4) else: logging.error( ('\nERROR: Input units {} and output units {} are not ' + 'currently supported, exiting').format( ini['BEAMER']['eto_units'], ini['TABLES']['eto_units'])) sys.exit() # Convert PPT units if (ini['BEAMER']['ppt_units'] == 'mm' and ini['TABLES']['ppt_units'] == 'mm'): pass elif (ini['BEAMER']['ppt_units'] == 'mm' and ini['TABLES']['ppt_units'] == 'in'): annual_df[ppt_fields] /= (25.4) elif (ini['BEAMER']['ppt_units'] == 'mm' and ini['TABLES']['ppt_units'] == 'ft'): annual_df[ppt_fields] /= (12 * 25.4) else: logging.error( ('\nERROR: Input units {} and output units {} are not ' + 'currently supported, exiting').format( ini['BEAMER']['ppt_units'], ini['TABLES']['ppt_units'])) sys.exit() logging.info('\nWriting annual values to Excel') excel_f = ExcelWriter(output_annual_path) for zone_name in sorted(zone_df_dict.keys()): logging.info(' {}'.format(zone_name)) zone_df = annual_df[annual_df['ZONE_NAME'] == zone_name] zone_df.to_excel(excel_f, zone_name, index=False, float_format='%.4f') del zone_df excel_f.save()
def main(ini_path, show_flag=False, overwrite_flag=True): """Generate Bokeh figures Bokeh issues: Adjust y range based on non-muted data https://stackoverflow.com/questions/43620837/how-to-get-bokeh-to-dynamically-adjust-y-range-when-panning Linked interactive legends so that there is only one legend for the gridplot Maybe hide or mute QA values above max (instead of filtering them in advance) Args: ini_path (str): show_flag (bool): if True, show the figures in the browser. Default is False. overwrite_flag (bool): if True, overwrite existing tables. Default is True (for now) """ logging.info('\nGenerate interactive timeseries figures') # Eventually read from INI plot_var_list = ['NDVI_TOA', 'ALBEDO_SUR', 'TS', 'NDWI_TOA', 'EVI_SUR'] # plot_var_list = [ # 'NDVI_TOA', 'ALBEDO_SUR', 'TS', 'NDWI_TOA', # 'CLOUD_SCORE', 'FMASK_PCT'] output_folder = 'figures' # Read config file ini = inputs.read(ini_path) inputs.parse_section(ini, section='INPUTS') inputs.parse_section(ini, section='ZONAL_STATS') inputs.parse_section(ini, section='SUMMARY') inputs.parse_section(ini, section='FIGURES') inputs.parse_section(ini, section='BEAMER') # Output paths output_ws = os.path.join(ini['SUMMARY']['output_ws'], output_folder) if not os.path.isdir(output_ws): os.makedirs(output_ws) # Start/end year year_list = list( range(ini['INPUTS']['start_year'], ini['INPUTS']['end_year'] + 1)) month_list = list( utils.wrapped_range(ini['INPUTS']['start_month'], ini['INPUTS']['end_month'], 1, 12)) doy_list = list( utils.wrapped_range(ini['INPUTS']['start_doy'], ini['INPUTS']['end_doy'], 1, 366)) # GRIDMET month range (default to water year) gridmet_start_month = ini['SUMMARY']['gridmet_start_month'] gridmet_end_month = ini['SUMMARY']['gridmet_end_month'] gridmet_months = list( utils.month_range(gridmet_start_month, gridmet_end_month)) logging.info('\nGridmet months: {}'.format(', '.join( map(str, gridmet_months)))) # Read in the zonal stats CSV logging.debug(' Reading zonal stats CSV file') input_df = pd.read_csv( os.path.join(ini['ZONAL_STATS']['output_ws'], ini['BEAMER']['output_name'])) logging.debug(input_df.head()) logging.debug(' Filtering Landsat dataframe') input_df = input_df[input_df['PIXEL_COUNT'] > 0] # # This assumes that there are L5/L8 images in the dataframe # if not input_df.empty: # max_pixel_count = max(input_df['PIXEL_COUNT']) # else: # max_pixel_count = 0 if ini['INPUTS']['fid_keep_list']: input_df = input_df[input_df['ZONE_FID'].isin( ini['INPUTS']['fid_keep_list'])] if ini['INPUTS']['fid_skip_list']: input_df = input_df[~input_df['ZONE_FID']. isin(ini['INPUTS']['fid_skip_list'])] if year_list: input_df = input_df[input_df['YEAR'].isin(year_list)] if month_list: input_df = input_df[input_df['MONTH'].isin(month_list)] if doy_list: input_df = input_df[input_df['DOY'].isin(doy_list)] if ini['INPUTS']['path_keep_list']: input_df = input_df[input_df['PATH'].isin( ini['INPUTS']['path_keep_list'])] if (ini['INPUTS']['row_keep_list'] and ini['INPUTS']['row_keep_list'] != ['XXX']): input_df = input_df[input_df['ROW'].isin( ini['INPUTS']['row_keep_list'])] # Assume the default is for these to be True and only filter if False if not ini['INPUTS']['landsat4_flag']: input_df = input_df[input_df['PLATFORM'] != 'LT04'] if not ini['INPUTS']['landsat5_flag']: input_df = input_df[input_df['PLATFORM'] != 'LT05'] if not ini['INPUTS']['landsat7_flag']: input_df = input_df[input_df['PLATFORM'] != 'LE07'] if not ini['INPUTS']['landsat8_flag']: input_df = input_df[input_df['PLATFORM'] != 'LC08'] if ini['INPUTS']['scene_id_keep_list']: # Replace XXX with primary ROW value for checking skip list SCENE_ID scene_id_df = pd.Series([ s.replace('XXX', '{:03d}'.format(int(r))) for s, r in zip(input_df['SCENE_ID'], input_df['ROW']) ]) input_df = input_df[scene_id_df.isin( ini['INPUTS']['scene_id_keep_list']).values] # This won't work: SCENE_ID have XXX but scene_id_skip_list don't # input_df = input_df[input_df['SCENE_ID'].isin( # ini['INPUTS']['scene_id_keep_list'])] if ini['INPUTS']['scene_id_skip_list']: # Replace XXX with primary ROW value for checking skip list SCENE_ID scene_id_df = pd.Series([ s.replace('XXX', '{:03d}'.format(int(r))) for s, r in zip(input_df['SCENE_ID'], input_df['ROW']) ]) input_df = input_df[np.logical_not( scene_id_df.isin(ini['INPUTS']['scene_id_skip_list']).values)] # This won't work: SCENE_ID have XXX but scene_id_skip_list don't # input_df = input_df[~input_df['SCENE_ID'].isin( # ini['INPUTS']['scene_id_skip_list'])] # Filter by QA/QC value if ini['SUMMARY']['max_qa'] >= 0 and not input_df.empty: logging.debug(' Maximum QA: {0}'.format(ini['SUMMARY']['max_qa'])) input_df = input_df[input_df['QA'] <= ini['SUMMARY']['max_qa']] # First filter by average cloud score if ini['SUMMARY']['max_cloud_score'] < 100 and not input_df.empty: logging.debug(' Maximum cloud score: {0}'.format( ini['SUMMARY']['max_cloud_score'])) input_df = input_df[ input_df['CLOUD_SCORE'] <= ini['SUMMARY']['max_cloud_score']] # Filter by Fmask percentage if ini['SUMMARY']['max_fmask_pct'] < 100 and not input_df.empty: input_df['FMASK_PCT'] = 100 * (input_df['FMASK_COUNT'] / input_df['FMASK_TOTAL']) logging.debug(' Max Fmask threshold: {}'.format( ini['SUMMARY']['max_fmask_pct'])) input_df = input_df[ input_df['FMASK_PCT'] <= ini['SUMMARY']['max_fmask_pct']] # Filter low count SLC-off images if ini['SUMMARY']['min_slc_off_pct'] > 0 and not input_df.empty: logging.debug(' Mininum SLC-off threshold: {}%'.format( ini['SUMMARY']['min_slc_off_pct'])) # logging.debug(' Maximum pixel count: {}'.format( # max_pixel_count)) slc_off_mask = ((input_df['PLATFORM'] == 'LE07') & ((input_df['YEAR'] >= 2004) | ((input_df['YEAR'] == 2003) & (input_df['DOY'] > 151)))) slc_off_pct = 100 * (input_df['PIXEL_COUNT'] / input_df['PIXEL_TOTAL']) # slc_off_pct = 100 * (input_df['PIXEL_COUNT'] / max_pixel_count) input_df = input_df[((slc_off_pct >= ini['SUMMARY']['min_slc_off_pct']) & slc_off_mask) | (~slc_off_mask)] if input_df.empty: logging.error(' Empty dataframe after filtering, exiting') return False # Process each zone separately logging.debug(input_df.head()) zone_name_list = sorted(list(set(input_df['ZONE_NAME'].values))) for zone_name in zone_name_list: logging.info('ZONE: {}'.format(zone_name)) # The names are currently stored in the CSV as spaces zone_output_name = zone_name.replace(' ', '_') zone_df = input_df[input_df['ZONE_NAME'] == zone_name] if zone_df.empty: logging.info(' Empty zone dataframe, skipping zone') continue # Output file paths output_doy_path = os.path.join( output_ws, '{}_timeseries_doy.html'.format(zone_output_name)) output_date_path = os.path.join( output_ws, '{}_timeseries_date.html'.format(zone_output_name)) # # Check for QA field # if 'QA' not in zone_df.columns.values: # # logging.warning( # # ' WARNING: QA field not present in CSV\n' # # ' To compute QA/QC values, please run "ee_summary_qaqc.py"\n' # # ' Script will continue with no QA/QC values') # zone_df['QA'] = 0 # # raw_input('ENTER') # # logging.error( # # '\nPlease run the "ee_summary_qaqc.py" script ' # # 'to compute QA/QC values\n') # # sys.exit() # Check that plot variables are present for plot_var in plot_var_list: if plot_var not in zone_df.columns.values: logging.error(' The variable {} does not exist in the ' 'dataframe'.format(plot_var)) sys.exit() # if ini['INPUTS']['scene_id_keep_list']: # # Replace XXX with primary ROW value for checking skip list SCENE_ID # scene_id_df = pd.Series([ # s.replace('XXX', '{:03d}'.format(int(r))) # for s, r in zip(zone_df['SCENE_ID'], zone_df['ROW'])]) # zone_df = zone_df[scene_id_df.isin( # ini['INPUTS']['scene_id_keep_list']).values] # # This won't work: SCENE_ID have XXX but scene_id_skip_list don't # # zone_df = zone_df[zone_df['SCENE_ID'].isin( # # ini['INPUTS']['scene_id_keep_list'])] # if ini['INPUTS']['scene_id_skip_list']: # # Replace XXX with primary ROW value for checking skip list SCENE_ID # scene_id_df = pd.Series([ # s.replace('XXX', '{:03d}'.format(int(r))) # for s, r in zip(zone_df['SCENE_ID'], zone_df['ROW'])]) # zone_df = zone_df[np.logical_not(scene_id_df.isin( # ini['INPUTS']['scene_id_skip_list']).values)] # # This won't work: SCENE_ID have XXX but scene_id_skip_list don't # # zone_df = zone_df[np.logical_not(zone_df['SCENE_ID'].isin( # # ini['INPUTS']['scene_id_skip_list']))] # Compute colors for each QA value logging.debug(' Building column data source') qa_values = sorted(list(set(zone_df['QA'].values))) colors = { qa: "#%02x%02x%02x" % (int(r), int(g), int(b)) for qa, ( r, g, b, _) in zip(qa_values, 255 * cm.viridis(mpl.colors.Normalize()(qa_values))) } logging.debug(' QA values: {}'.format(', '.join(map(str, qa_values)))) # Unpack the data by QA type to support interactive legends sources = dict() for qa_value in qa_values: qa_df = zone_df[zone_df['QA'] == qa_value] qa_data = { 'INDEX': list(range(len(qa_df.index))), 'PLATFORM': qa_df['PLATFORM'], 'DATE': pd.to_datetime(qa_df['DATE']), 'DATE_STR': pd.to_datetime( qa_df['DATE']).map(lambda x: x.strftime('%Y-%m-%d')), 'DOY': qa_df['DOY'].values, 'QA': qa_df['QA'].values, 'COLOR': [colors[qa] for qa in qa_df['QA'].values] } for plot_var in plot_var_list: if plot_var in qa_df.columns.values: qa_data.update({plot_var: qa_df[plot_var].values}) sources[qa_value] = bokeh.models.ColumnDataSource(qa_data) tooltips = [("LANDSAT", "@PLATFORM"), ("DATE", "@TIME"), ("DOY", "@DOY")] # Selection hover_circle = Circle(fill_color='#ff0000', line_color='#ff0000') selected_circle = Circle(fill_color='COLOR', line_color='COLOR') nonselected_circle = Circle(fill_color='#aaaaaa', line_color='#aaaaaa') # Plot the data by DOY logging.debug(' Building DOY timeseries figure') if os.path.isfile(output_doy_path): os.remove(output_doy_path) output_file(output_doy_path, title=zone_name) figure_args = dict( plot_width=750, plot_height=250, title=None, tools="xwheel_zoom,xpan,xbox_zoom,reset,box_select", # tools="xwheel_zoom,xpan,xbox_zoom,reset,tap", active_scroll="xwheel_zoom") plot_args = dict(size=4, alpha=0.9, color='COLOR') if ini['SUMMARY']['max_qa'] > 0: plot_args['legend'] = 'QA' figures = [] for plot_i, plot_var in enumerate(plot_var_list): if plot_i == 0: f = figure( # x_range=Range1d(1, 366, bounds=(1, 366)), y_axis_label=plot_var, **figure_args) else: f = figure(x_range=f.x_range, y_axis_label=plot_var, **figure_args) for qa, source in sorted(sources.items()): r = f.circle('DOY', plot_var, source=source, **plot_args) r.hover_glyph = hover_circle r.selection_glyph = selected_circle r.nonselection_glyph = nonselected_circle r.muted_glyph = nonselected_circle # DEADBEEF - This will display high QA points as muted # if qa > ini['SUMMARY']['max_qa']: # r.muted = True # # r.visible = False f.add_tools(bokeh.models.HoverTool(tooltips=tooltips)) # if ini['SUMMARY']['max_qa'] > 0: f.legend.location = "top_left" f.legend.click_policy = "hide" # f.legend.click_policy = "mute" f.legend.orientation = "horizontal" figures.append(f) # Try to not allow more than 4 plots in a column p = gridplot(figures, ncols=len(plot_var_list) // 3, sizing_mode='stretch_both') if show_flag: show(p) save(p) # Plot the data by DATE logging.debug(' Building date timeseries figure') if os.path.isfile(output_date_path): os.remove(output_date_path) output_file(output_date_path, title=zone_name) figure_args = dict( plot_width=750, plot_height=250, title=None, tools="xwheel_zoom,xpan,xbox_zoom,reset,box_select", # tools="xwheel_zoom,xpan,xbox_zoom,reset,tap", active_scroll="xwheel_zoom", x_axis_type="datetime", ) plot_args = dict(size=4, alpha=0.9, color='COLOR') if ini['SUMMARY']['max_qa'] > 0: plot_args['legend'] = 'QA' figures = [] for plot_i, plot_var in enumerate(plot_var_list): if plot_i == 0: f = figure( # x_range=Range1d(x_limit[0], x_limit[1], bounds=x_limit), y_axis_label=plot_var, **figure_args) else: f = figure(x_range=f.x_range, y_axis_label=plot_var, **figure_args) if plot_var == 'TS': f.y_range.bounds = (270, None) for qa, source in sorted(sources.items()): r = f.circle('DATE', plot_var, source=source, **plot_args) r.hover_glyph = hover_circle r.selection_glyph = selected_circle r.nonselection_glyph = nonselected_circle r.muted_glyph = nonselected_circle # DEADBEEF - This will display high QA points as muted # if qa > ini['SUMMARY']['max_qa']: # r.muted = True # # r.visible = False f.add_tools(bokeh.models.HoverTool(tooltips=tooltips)) # if ini['SUMMARY']['max_qa'] > 0: f.legend.location = "top_left" f.legend.click_policy = "hide" # f.legend.click_policy = "mute" f.legend.orientation = "horizontal" figures.append(f) # Try to not allow more than 4 plots in a column p = gridplot(figures, ncols=len(plot_var_list) // 3, sizing_mode='stretch_both') if show_flag: show(p) save(p) # Pause after each iteration if show is True if show_flag: input('Press ENTER to continue')
def main(ini_path, show_flag=False, overwrite_flag=False): """Generate Beamer ETg summary figures Args: ini_path (str): show_flag (bool): if True, show the figures in the browser. Default is False. overwrite_flag (bool): if True, overwrite existing figures Default is True (for now) """ logging.info('\nGenerate Beamer ETg summary figures') ncolors = [ '#348ABD', '#7A68A6', '#A60628', '#467821', '#CF4457', '#188487', '#E24A33'] xtick_fs = 8 ytick_fs = 8 xlabel_fs = 8 ylabel_fs = 8 ms = 2 figsize = (3.0, 2.5) output_folder = 'figures' # For unit conversion eto_fields = [ 'ETG_MEAN', 'ETG_LPI', 'ETG_UPI', 'ETG_LCI', 'ETG_UCI', 'ET_MEAN', 'ET_LPI', 'ET_UPI', 'ET_LCI', 'ET_UCI', 'WY_ETO'] ppt_fields = ['WY_PPT'] # Read config file ini = inputs.read(ini_path) inputs.parse_section(ini, section='INPUTS') inputs.parse_section(ini, section='ZONAL_STATS') inputs.parse_section(ini, section='SUMMARY') inputs.parse_section(ini, section='FIGURES') inputs.parse_section(ini, section='BEAMER') # Output paths output_ws = os.path.join( ini['SUMMARY']['output_ws'], output_folder) if not os.path.isdir(output_ws): os.makedirs(output_ws) # Start/end year year_list = list(range( ini['INPUTS']['start_year'], ini['INPUTS']['end_year'] + 1)) month_list = list(utils.wrapped_range( ini['INPUTS']['start_month'], ini['INPUTS']['end_month'], 1, 12)) doy_list = list(utils.wrapped_range( ini['INPUTS']['start_doy'], ini['INPUTS']['end_doy'], 1, 366)) # GRIDMET month range (default to water year) gridmet_start_month = ini['SUMMARY']['gridmet_start_month'] gridmet_end_month = ini['SUMMARY']['gridmet_end_month'] gridmet_months = list(utils.month_range( gridmet_start_month, gridmet_end_month)) logging.info('\nGridmet months: {}'.format( ', '.join(map(str, gridmet_months)))) # Read in the zonal stats CSV logging.debug(' Reading zonal stats CSV file') input_df = pd.read_csv(os.path.join( ini['ZONAL_STATS']['output_ws'], ini['BEAMER']['output_name'])) logging.debug(input_df.head()) logging.debug(' Filtering Landsat dataframe') input_df = input_df[input_df['PIXEL_COUNT'] > 0] # # This assumes that there are L5/L8 images in the dataframe # if not input_df.empty: # max_pixel_count = max(input_df['PIXEL_COUNT']) # else: # max_pixel_count = 0 if ini['INPUTS']['fid_keep_list']: input_df = input_df[input_df['ZONE_FID'].isin( ini['INPUTS']['fid_keep_list'])] if ini['INPUTS']['fid_skip_list']: input_df = input_df[~input_df['ZONE_FID'].isin( ini['INPUTS']['fid_skip_list'])] if year_list: input_df = input_df[input_df['YEAR'].isin(year_list)] if month_list: input_df = input_df[input_df['MONTH'].isin(month_list)] if doy_list: input_df = input_df[input_df['DOY'].isin(doy_list)] if ini['INPUTS']['path_keep_list']: input_df = input_df[ input_df['PATH'].isin(ini['INPUTS']['path_keep_list'])] if (ini['INPUTS']['row_keep_list'] and ini['INPUTS']['row_keep_list'] != ['XXX']): input_df = input_df[ input_df['ROW'].isin(ini['INPUTS']['row_keep_list'])] # Assume the default is for these to be True and only filter if False if not ini['INPUTS']['landsat4_flag']: input_df = input_df[input_df['PLATFORM'] != 'LT04'] if not ini['INPUTS']['landsat5_flag']: input_df = input_df[input_df['PLATFORM'] != 'LT05'] if not ini['INPUTS']['landsat7_flag']: input_df = input_df[input_df['PLATFORM'] != 'LE07'] if not ini['INPUTS']['landsat8_flag']: input_df = input_df[input_df['PLATFORM'] != 'LC08'] if ini['INPUTS']['scene_id_keep_list']: # Replace XXX with primary ROW value for checking skip list SCENE_ID scene_id_df = pd.Series([ s.replace('XXX', '{:03d}'.format(int(r))) for s, r in zip(input_df['SCENE_ID'], input_df['ROW'])]) input_df = input_df[scene_id_df.isin( ini['INPUTS']['scene_id_keep_list']).values] # This won't work: SCENE_ID have XXX but scene_id_skip_list don't # input_df = input_df[input_df['SCENE_ID'].isin( # ini['INPUTS']['scene_id_keep_list'])] if ini['INPUTS']['scene_id_skip_list']: # Replace XXX with primary ROW value for checking skip list SCENE_ID scene_id_df = pd.Series([ s.replace('XXX', '{:03d}'.format(int(r))) for s, r in zip(input_df['SCENE_ID'], input_df['ROW'])]) input_df = input_df[np.logical_not(scene_id_df.isin( ini['INPUTS']['scene_id_skip_list']).values)] # This won't work: SCENE_ID have XXX but scene_id_skip_list don't # input_df = input_df[~input_df['SCENE_ID'].isin( # ini['INPUTS']['scene_id_skip_list'])] # Filter by QA/QC value if ini['SUMMARY']['max_qa'] >= 0 and not input_df.empty: logging.debug(' Maximum QA: {0}'.format( ini['SUMMARY']['max_qa'])) input_df = input_df[input_df['QA'] <= ini['SUMMARY']['max_qa']] # First filter by average cloud score if ini['SUMMARY']['max_cloud_score'] < 100 and not input_df.empty: logging.debug(' Maximum cloud score: {0}'.format( ini['SUMMARY']['max_cloud_score'])) input_df = input_df[ input_df['CLOUD_SCORE'] <= ini['SUMMARY']['max_cloud_score']] # Filter by Fmask percentage if ini['SUMMARY']['max_fmask_pct'] < 100 and not input_df.empty: input_df['FMASK_PCT'] = 100 * ( input_df['FMASK_COUNT'] / input_df['FMASK_TOTAL']) logging.debug(' Max Fmask threshold: {}'.format( ini['SUMMARY']['max_fmask_pct'])) input_df = input_df[ input_df['FMASK_PCT'] <= ini['SUMMARY']['max_fmask_pct']] # Filter low count SLC-off images if ini['SUMMARY']['min_slc_off_pct'] > 0 and not input_df.empty: logging.debug(' Mininum SLC-off threshold: {}%'.format( ini['SUMMARY']['min_slc_off_pct'])) # logging.debug(' Maximum pixel count: {}'.format( # max_pixel_count)) slc_off_mask = ( (input_df['PLATFORM'] == 'LE07') & ((input_df['YEAR'] >= 2004) | ((input_df['YEAR'] == 2003) & (input_df['DOY'] > 151)))) slc_off_pct = 100 * (input_df['PIXEL_COUNT'] / input_df['PIXEL_TOTAL']) # slc_off_pct = 100 * (input_df['PIXEL_COUNT'] / max_pixel_count) input_df = input_df[ ((slc_off_pct >= ini['SUMMARY']['min_slc_off_pct']) & slc_off_mask) | (~slc_off_mask)] if input_df.empty: logging.error(' Empty dataframe after filtering, exiting') return False # Process each zone separately logging.debug(input_df.head()) zone_name_list = sorted(list(set(input_df['ZONE_NAME'].values))) for zone_name in zone_name_list: logging.info('ZONE: {}'.format(zone_name)) # The names are currently stored in the CSV with spaces zone_output_name = zone_name.replace(' ', '_') zone_df = input_df[input_df['ZONE_NAME'] == zone_name] if zone_df.empty: logging.info(' Empty zone dataframe, skipping zone') continue logging.debug(' Computing annual summaries') annual_df = zone_df \ .groupby(['ZONE_NAME', 'YEAR']) \ .agg({ 'PIXEL_COUNT': ['count', 'mean'], 'PIXEL_TOTAL': ['mean'], 'FMASK_COUNT': 'mean', 'FMASK_TOTAL': 'mean', 'CLOUD_SCORE': 'mean', 'ETSTAR_COUNT': 'mean', 'NDVI_TOA': 'mean', 'NDWI_TOA': 'mean', 'ALBEDO_SUR': 'mean', 'TS': 'mean', 'EVI_SUR': 'mean', 'ETSTAR_MEAN': 'mean', 'ETG_MEAN': 'mean', 'ETG_LPI': 'mean', 'ETG_UPI': 'mean', 'ETG_LCI': 'mean', 'ETG_UCI': 'mean', 'ET_MEAN': 'mean', 'ET_LPI': 'mean', 'ET_UPI': 'mean', 'ET_LCI': 'mean', 'ET_UCI': 'mean', 'WY_ETO': 'mean', 'WY_PPT': 'mean' }) annual_df.columns = annual_df.columns.map('_'.join) annual_df = annual_df.rename(columns={ 'PIXEL_COUNT_count': 'SCENE_COUNT', 'PIXEL_COUNT_mean': 'PIXEL_COUNT'}) annual_df.rename( columns=lambda x: str(x).replace('_mean', ''), inplace=True) annual_df['SCENE_COUNT'] = annual_df['SCENE_COUNT'].astype(np.int) annual_df['PIXEL_COUNT'] = annual_df['PIXEL_COUNT'].astype(np.int) annual_df['PIXEL_TOTAL'] = annual_df['PIXEL_TOTAL'].astype(np.int) annual_df['FMASK_COUNT'] = annual_df['FMASK_COUNT'].astype(np.int) annual_df['FMASK_TOTAL'] = annual_df['FMASK_TOTAL'].astype(np.int) annual_df['ETSTAR_COUNT'] = annual_df['ETSTAR_COUNT'].astype(np.int) annual_df = annual_df.reset_index() # Convert ETo units if (ini['BEAMER']['eto_units'] == 'mm' and ini['FIGURES']['eto_units'] == 'mm'): pass elif (ini['BEAMER']['eto_units'] == 'mm' and ini['FIGURES']['eto_units'] == 'in'): annual_df[eto_fields] /= (25.4) elif (ini['BEAMER']['eto_units'] == 'mm' and ini['FIGURES']['eto_units'] == 'ft'): annual_df[eto_fields] /= (12 * 25.4) else: logging.error( ('\nERROR: Input units {} and output units {} are not ' + 'currently supported, exiting').format( ini['BEAMER']['eto_units'], ini['FIGURES']['eto_units'])) sys.exit() # Convert PPT units if (ini['BEAMER']['ppt_units'] == 'mm' and ini['FIGURES']['ppt_units'] == 'mm'): pass elif (ini['BEAMER']['ppt_units'] == 'mm' and ini['FIGURES']['ppt_units'] == 'in'): annual_df[ppt_fields] /= (25.4) elif (ini['BEAMER']['ppt_units'] == 'mm' and ini['FIGURES']['ppt_units'] == 'ft'): annual_df[ppt_fields] /= (12 * 25.4) else: logging.error( ('\nERROR: Input units {} and output units {} are not ' 'currently supported, exiting').format( ini['BEAMER']['ppt_units'], ini['FIGURES']['ppt_units'])) sys.exit() logging.debug(' Generating figures') zone_df = annual_df[annual_df['ZONE_NAME'] == zone_name] year_min, year_max = min(zone_df['YEAR']), max(zone_df['YEAR']) # Set default PPT min/max scaling ppt_min = 0 if ini['FIGURES']['ppt_units'] == 'mm': ppt_max = 100 * math.ceil((max(zone_df['WY_PPT']) + 100) / 100) elif ini['FIGURES']['ppt_units'] == 'ft': ppt_max = 0.2 * math.ceil((max(zone_df['WY_PPT']) + 0.1) / 0.2) else: ppt_max = 1.2 * max(zone_df['WY_PPT']) logging.debug(' EVI vs PPT') figure_path = os.path.join( output_ws, '{}_evi.png'.format(zone_output_name)) fig = plt.figure(figsize=figsize) ax1 = fig.add_axes([0.20, 0.21, 0.65, 0.75]) ax1.set_xlabel('Year', fontsize=xlabel_fs) ax2 = ax1.twinx() ax1.plot( zone_df['YEAR'].values, zone_df['WY_PPT'], marker='o', c='0.5', ms=ms, label='WY PPT') ax1.yaxis.tick_right() ax1.yaxis.set_label_position("right") ax1.set_xlim([year_min - 1, year_max + 1]) ax1.set_ylim([ppt_min, ppt_max]) ax1.tick_params(axis='y', labelsize=ytick_fs) ax1.tick_params(axis='x', labelsize=xtick_fs) ax1.tick_params(axis='x', which='both', top='off') ax1.xaxis.set_minor_locator(MultipleLocator(1)) for tick in ax1.get_xticklabels(): tick.set_rotation(45) tick.set_ha('right') ax1.set_ylabel( 'PPT [{}/yr]'.format(ini['FIGURES']['ppt_units']), fontsize=ylabel_fs) ax2.plot( zone_df['YEAR'].values, zone_df['EVI_SUR'].values, marker='o', c=ncolors[0], ms=ms, label='EVI') ax1.plot(0, 0, marker='o', c=ncolors[0], ms=ms, label='EVI') ax2.yaxis.tick_left() ax2.yaxis.set_label_position("left") ax2.set_ylim([ 0.05 * math.floor((min(zone_df['EVI_SUR']) - 0.01) / 0.05), 0.05 * math.ceil((max(zone_df['EVI_SUR']) + 0.01) / 0.05)]) ax2.tick_params(axis='y', labelsize=ytick_fs) ax2.set_ylabel('EVI [dimensionless]', fontsize=ylabel_fs) ax1.legend( loc='upper right', frameon=False, fontsize=6, numpoints=1) if overwrite_flag or not os.path.isfile(figure_path): plt.savefig(figure_path, dpi=300) plt.close() del fig, ax1, ax2 logging.debug(' ETo vs PPT') figure_path = os.path.join( output_ws, '{}_eto.png'.format(zone_output_name)) fig = plt.figure(figsize=figsize) ax1 = fig.add_axes([0.18, 0.21, 0.67, 0.75]) ax1.set_xlabel('Year', fontsize=xlabel_fs) ax2 = ax1.twinx() ax1.plot( zone_df['YEAR'].values, zone_df['WY_PPT'], marker='o', c='0.5', ms=ms, label='WY PPT') ax1.yaxis.tick_right() ax1.yaxis.set_label_position("right") ax1.set_xlim([year_min - 1, year_max + 1]) ax1.set_ylim([ppt_min, ppt_max]) ax1.tick_params(axis='y', labelsize=ytick_fs) ax1.tick_params(axis='x', labelsize=xtick_fs) ax1.tick_params(axis='x', which='both', top='off') ax1.xaxis.set_minor_locator(MultipleLocator(1)) for tick in ax1.get_xticklabels(): tick.set_rotation(45) tick.set_ha('right') ax1.set_ylabel( 'PPT [{}/yr]'.format(ini['FIGURES']['ppt_units']), fontsize=ylabel_fs) ax2.plot( zone_df['YEAR'].values, zone_df['WY_ETO'].values, marker='o', c=ncolors[1], ms=ms, label='ETo') ax1.plot(0, 0, marker='o', c=ncolors[1], ms=ms, label='ETo') ax2.yaxis.tick_left() ax2.yaxis.set_label_position("left") ax2.set_ylim([ max(0, 0.9 * min(zone_df['WY_ETO'])), 1.1 * max(zone_df['WY_ETO'])]) # ax2.set_ylim([ # max(0, 100 * math.floor((min(zone_df['WY_ETO']) - 100) / 100)), # 100 * math.ceil((max(zone_df['WY_ETO']) + 100) / 100)]) ax2.tick_params(axis='y', labelsize=ytick_fs) ax2.set_ylabel( 'ETo [{}/yr]'.format(ini['FIGURES']['eto_units']), fontsize=ylabel_fs) ax1.legend(loc='upper right', frameon=False, fontsize=6, numpoints=1) if overwrite_flag or not os.path.isfile(figure_path): plt.savefig(figure_path, dpi=300) plt.close() del fig, ax1, ax2 logging.debug(' ET vs PPT') figure_path = os.path.join( output_ws, '{}_et.png'.format(zone_output_name)) fig = plt.figure(figsize=figsize) ax1 = fig.add_axes([0.18, 0.21, 0.67, 0.75]) ax1.set_xlabel('Year', fontsize=xlabel_fs) ax2 = ax1.twinx() ax1.plot( zone_df['YEAR'].values, zone_df['WY_PPT'], marker='o', c='0.5', ms=ms, label='WY PPT') ax1.yaxis.tick_right() ax1.yaxis.set_label_position("right") ax1.set_xlim([year_min - 1, year_max + 1]) ax1.set_ylim([ppt_min, ppt_max]) ax1.tick_params(axis='y', labelsize=ytick_fs) ax1.tick_params(axis='x', labelsize=xtick_fs) ax1.tick_params(axis='x', which='both', top='off') ax1.xaxis.set_minor_locator(MultipleLocator(1)) for tick in ax1.get_xticklabels(): tick.set_rotation(45) tick.set_ha('right') ax1.set_ylabel( 'PPT [{}/yr]'.format(ini['FIGURES']['ppt_units']), fontsize=ylabel_fs) ax2.plot( zone_df['YEAR'].values, zone_df['ET_UCI'].values, marker='', c=ncolors[2], alpha=0.5, lw=0.75) ax2.plot( zone_df['YEAR'].values, zone_df['ET_LCI'].values, marker='', c=ncolors[2], alpha=0.5, lw=0.75) ax2.plot( zone_df['YEAR'].values, zone_df['ET_MEAN'].values, marker='o', c=ncolors[2], ms=ms, label='ET') ax1.plot(0, 0, marker='o', c=ncolors[2], ms=ms, label='ET') ax2.yaxis.tick_left() ax2.yaxis.set_label_position("left") ax2.set_ylim([ max(0, 0.9 * min(zone_df['ET_LCI'])), 1.1 * max(zone_df['ET_UCI'])]) # ax2.set_ylim([ # max(0, 100 * math.floor((min(zone_df['ET_MEAN']) - 100) / 100)), # 100 * math.ceil((max(zone_df['ET_MEAN']) + 100) / 100)]) ax2.tick_params(axis='y', labelsize=ytick_fs) ax2.set_ylabel( 'ET [{}/yr]'.format(ini['FIGURES']['eto_units']), fontsize=ylabel_fs) ax1.legend(loc='upper right', frameon=False, fontsize=6, numpoints=1) if overwrite_flag or not os.path.isfile(figure_path): plt.savefig(figure_path, dpi=300) plt.close() del fig, ax1, ax2 logging.debug(' ETg vs PPT') figure_path = os.path.join( output_ws, '{}_etg.png'.format(zone_output_name)) fig = plt.figure(figsize=figsize) ax1 = fig.add_axes([0.18, 0.21, 0.67, 0.75]) ax1.set_xlabel('Year', fontsize=xlabel_fs) ax2 = ax1.twinx() ax1.plot( zone_df['YEAR'].values, zone_df['WY_PPT'], marker='o', c='0.5', ms=ms, label='WY PPT') ax1.yaxis.tick_right() ax1.yaxis.set_label_position("right") ax1.set_xlim([year_min - 1, year_max + 1]) ax1.set_ylim([ppt_min, ppt_max]) ax1.tick_params(axis='y', labelsize=ytick_fs) ax1.tick_params(axis='x', labelsize=xtick_fs) ax1.tick_params(axis='x', which='both', top='off') ax1.xaxis.set_minor_locator(MultipleLocator(1)) for tick in ax1.get_xticklabels(): tick.set_rotation(45) tick.set_ha('right') ax1.set_ylabel( 'PPT [{}/yr]'.format(ini['FIGURES']['ppt_units']), fontsize=ylabel_fs) ax2.plot( zone_df['YEAR'].values, zone_df['ETG_UCI'].values, marker='', c=ncolors[3], alpha=0.5, lw=0.75) ax2.plot( zone_df['YEAR'].values, zone_df['ETG_LCI'].values, marker='', c=ncolors[3], alpha=0.5, lw=0.75) ax2.plot( zone_df['YEAR'].values, zone_df['ETG_MEAN'].values, marker='o', c=ncolors[3], ms=ms, label='ETg') ax1.plot(0, 0, marker='o', c=ncolors[3], ms=ms, label='ETg') ax2.yaxis.tick_left() ax2.yaxis.set_label_position("left") ax2.set_ylim([ max(0, 0.9 * min(zone_df['ETG_LCI'])), 1.1 * max(zone_df['ETG_UCI'])]) # ax2.set_ylim([ # max(0, 100 * math.floor((min(zone_df['ETG_MEAN']) - 100) / 100)), # 100 * math.ceil((max(zone_df['ETG_MEAN']) + 100) / 100)]) ax2.tick_params(axis='y', labelsize=ytick_fs) ax2.set_ylabel( 'ETg [{}/yr]'.format(ini['FIGURES']['eto_units']), fontsize=ylabel_fs) ax1.legend(loc='upper right', frameon=False, fontsize=6, numpoints=1) if overwrite_flag or not os.path.isfile(figure_path): plt.savefig(figure_path, dpi=300) plt.close() del fig, ax1, ax2 logging.debug(' Complimentary') figure_path = os.path.join( output_ws, '{}_complimentary.png'.format(zone_output_name)) fig = plt.figure(figsize=(3, 2.5)) ax = fig.add_axes([0.18, 0.16, 0.78, 0.80]) # ax = fig.add_axes([0.18, 0.21, 0.67, 0.70]) ax.plot( zone_df['WY_PPT'].values, zone_df['WY_ETO'].values, linestyle='', marker='o', c=ncolors[1], ms=3, label='ETo') ax.plot( zone_df['WY_PPT'].values, zone_df['ET_MEAN'].values, linestyle='', marker='o', c=ncolors[2], ms=3, label='ET') # xmax = 100 * math.ceil(max(zone_df['WY_PPT']) / 100) # ymax = 200 * math.ceil((max(zone_df['WY_ETO']) + 200) / 200) ax.set_xlim([ppt_min, ppt_max]) ax.set_ylim([0, 1.2 * max(zone_df['WY_ETO'])]) ax.tick_params(axis='y', labelsize=ytick_fs) ax.tick_params(axis='x', labelsize=xtick_fs) ax.tick_params(axis='x', which='both', top='off') ax.tick_params(axis='y', which='both', right='off') ax.set_xlabel('PPT [{}/yr]'.format( ini['FIGURES']['ppt_units']), fontsize=xlabel_fs) ax.set_ylabel('ET and ETo [{}/yr]'.format( ini['FIGURES']['eto_units']), fontsize=ylabel_fs) ax.legend(loc='upper right', frameon=False, fontsize=6, numpoints=1) if overwrite_flag or not os.path.isfile(figure_path): plt.savefig(figure_path, dpi=300) plt.close() del fig, ax
def ee_image_download(ini_path=None, overwrite_flag=False): """Earth Engine Annual Mean Image Download Parameters ---------- ini_path : str overwrite_flag : bool, optional If True, overwrite existing files (the default is False). """ logging.info('\nEarth Engine EDDI Image Download') # 12 month EDDI aggregation_days = 365 export_name = 'eddi_12month' output_name = 'eddi.12month' eddi_date_list = [ '0131', '0228', '0331', '0430', '0531', '0630', '0731', '0831', '0930', '1031', '1130', '1231' ] # eddi_date_list = ['0930', '1231'] # eddi_date_list = ['{:02d}01'.format(m) for m in range(1, 13)] # eddi_date_list = [] eddi_folder = 'eddi' # Do we need to support separate EDDI years? # start_year = 1984 # end_year = 2016 # climo_year_start = 1979 climo_year_end = 2017 # Read config file # ini = inputs.ini_parse(ini_path, section='IMAGE') ini = inputs.read(ini_path) inputs.parse_section(ini, section='INPUTS') inputs.parse_section(ini, section='SPATIAL') inputs.parse_section(ini, section='EXPORT') inputs.parse_section(ini, section='IMAGES') nodata_value = -9999 # Manually set output spatial reference logging.info('\nHardcoding GRIDMET snap, cellsize and spatial reference') ini['output_x'], ini['output_y'] = -124.79299639209513, 49.41685579737572 ini['SPATIAL']['cellsize'] = 0.041666001963701 # ini['SPATIAL']['cellsize'] = [0.041666001963701, 0.041666001489718] # ini['output_x'] = -124.79166666666666666667 # ini['output_y'] = 25.04166666666666666667 # ini['SPATIAL']['cellsize'] = 1. / 24 ini['SPATIAL']['osr'] = gdc.epsg_osr(4326) # ini['SPATIAL']['osr'] = gdc.epsg_osr(4269) ini['SPATIAL']['crs'] = 'EPSG:4326' logging.debug(' Snap: {} {}'.format(ini['output_x'], ini['output_y'])) logging.debug(' Cellsize: {}'.format(ini['SPATIAL']['cellsize'])) logging.debug(' OSR: {}'.format(ini['SPATIAL']['osr'])) # Get ee features from shapefile zone_geom_list = gdc.shapefile_2_geom_list_func( ini['INPUTS']['zone_shp_path'], zone_field=ini['INPUTS']['zone_field'], reverse_flag=False) # Filter features by FID before merging geometries if ini['INPUTS']['fid_keep_list']: zone_geom_list = [ zone_obj for zone_obj in zone_geom_list if zone_obj[0] in ini['INPUTS']['fid_keep_list'] ] if ini['INPUTS']['fid_skip_list']: zone_geom_list = [ zone_obj for zone_obj in zone_geom_list if zone_obj[0] not in ini['INPUTS']['fid_skip_list'] ] # Merge geometries if ini['INPUTS']['merge_geom_flag']: merge_geom = ogr.Geometry(ogr.wkbMultiPolygon) for zone in zone_geom_list: zone_multipolygon = ogr.ForceToMultiPolygon( ogr.CreateGeometryFromJson(json.dumps(zone[2]))) for zone_polygon in zone_multipolygon: merge_geom.AddGeometry(zone_polygon) # merge_json = json.loads(merge_mp.ExportToJson()) zone_geom_list = [[ 0, ini['INPUTS']['zone_filename'], json.loads(merge_geom.ExportToJson()) ]] ini['INPUTS']['zone_field'] = '' # Need zone_shp_path projection to build EE geometries zone_osr = gdc.feature_path_osr(ini['INPUTS']['zone_shp_path']) zone_proj = gdc.osr_wkt(zone_osr) # zone_proj = ee.Projection(zone_proj).wkt().getInfo() # zone_proj = zone_proj.replace('\n', '').replace(' ', '') logging.debug(' Zone Projection: {}'.format(zone_proj)) # Initialize Earth Engine API key logging.info('\nInitializing Earth Engine') ee.Initialize() utils.ee_request(ee.Number(1).getInfo()) # Get current running tasks tasks = utils.get_ee_tasks() # Download images for each feature separately for zone_fid, zone_name, zone_json in zone_geom_list: zone_name = zone_name.replace(' ', '_') logging.info('ZONE: {} (FID: {})'.format(zone_name, zone_fid)) # Build EE geometry object for zonal stats zone_geom = ee.Geometry(zone_json, zone_proj, False) # Project the zone_geom to the GRIDMET projection # if zone_proj != output_proj: zone_geom = zone_geom.transform(ini['SPATIAL']['crs'], 0.001) # Get the extent from the Earth Engine geometry object? zone_extent = zone_geom.bounds().getInfo()['coordinates'][0] zone_extent = gdc.Extent([ min(zip(*zone_extent)[0]), min(zip(*zone_extent)[1]), max(zip(*zone_extent)[0]), max(zip(*zone_extent)[1]) ]) # # Use GDAL and geometry json to build extent, transform, and shape # zone_extent = gdc.Extent( # ogr.CreateGeometryFromJson(json.dumps(zone_json)).GetEnvelope()) # # zone_extent = gdc.Extent(zone_geom.GetEnvelope()) # zone_extent.ymin, zone_extent.xmax = zone_extent.xmax, zone_extent.ymin # Adjust extent to match raster zone_extent = zone_extent.adjust_to_snap('EXPAND', ini['output_x'], ini['output_y'], ini['SPATIAL']['cellsize']) zone_geo = zone_extent.geo(ini['SPATIAL']['cellsize']) zone_transform = gdc.geo_2_ee_transform(zone_geo) zone_transform = '[' + ','.join(map(str, zone_transform)) + ']' zone_shape = zone_extent.shape(ini['SPATIAL']['cellsize']) logging.debug(' Zone Shape: {}'.format(zone_shape)) logging.debug(' Zone Transform: {}'.format(zone_transform)) logging.debug(' Zone Extent: {}'.format(zone_extent)) # logging.debug(' Geom: {}'.format(zone_geom.getInfo())) # output_transform = zone_transform[:] output_transform = '[' + ','.join(map(str, zone_transform)) + ']' output_shape = '[{1}x{0}]'.format(*zone_shape) logging.debug(' Output Projection: {}'.format(ini['SPATIAL']['crs'])) logging.debug(' Output Transform: {}'.format(output_transform)) logging.debug(' Output Shape: {}'.format(output_shape)) zone_eddi_ws = os.path.join(ini['IMAGES']['output_ws'], zone_name, eddi_folder) if not os.path.isdir(zone_eddi_ws): os.makedirs(zone_eddi_ws) # GRIDMET PDSI # Process each image in the collection by date export_list = [] export_list = list( date_range(datetime.datetime(ini['INPUTS']['start_year'], 1, 1), datetime.datetime(ini['INPUTS']['end_year'], 12, 31), skip_leap_days=True)) # Filter list to only keep last dekad of October and December if eddi_date_list: export_list = [ tgt_dt for tgt_dt in export_list if tgt_dt.strftime('%m%d') in eddi_date_list ] for tgt_dt in export_list: date_str = tgt_dt.strftime('%Y%m%d') logging.info('{} {}'.format(tgt_dt.strftime('%Y-%m-%d'), output_name)) if tgt_dt >= datetime.datetime.today(): logging.info(' Date after current date, skipping') continue # Rename to match naming style from getDownloadURL # image_name.band.tif export_id = '{}_{}_{}'.format(ini['INPUTS']['zone_filename'], date_str, export_name.lower()) output_id = '{}_{}'.format(date_str, output_name) export_path = os.path.join(ini['EXPORT']['export_ws'], export_id + '.tif') output_path = os.path.join(zone_eddi_ws, output_id + '.tif') logging.debug(' Export: {}'.format(export_path)) logging.debug(' Output: {}'.format(output_path)) if overwrite_flag: if export_id in tasks.keys(): logging.debug(' Task already submitted, cancelling') ee.data.cancelTask(tasks[export_id]) del tasks[export_id] if os.path.isfile(export_path): logging.debug(' Export image already exists, removing') utils.remove_file(export_path) # os.remove(export_path) if os.path.isfile(output_path): logging.debug(' Output image already exists, removing') utils.remove_file(output_path) # os.remove(output_path) else: if os.path.isfile(export_path): logging.debug(' Export image already exists, moving') shutil.move(export_path, output_path) gdc.raster_path_set_nodata(output_path, nodata_value) # DEADBEEF - should raster stats be computed? # gdc.raster_statistics(output_path) continue elif os.path.isfile(output_path): logging.debug(' Output image already exists, skipping') continue elif export_id in tasks.keys(): logging.debug(' Task already submitted, skipping') continue eddi_image = ee_eddi_image(tgt_dt.strftime('%Y-%m-%d'), agg_days=aggregation_days, variable='eddi', year_start=climo_year_start, year_end=climo_year_end) logging.debug(' Building export task') # if ini['EXPORT']['export_dest'] == 'gdrive': task = ee.batch.Export.image.toDrive( image=eddi_image, description=export_id, # folder=ini['EXPORT']['export_folder'], fileNamePrefix=export_id, dimensions=output_shape, crs=ini['SPATIAL']['crs'], crsTransform=output_transform) # elif ini['EXPORT']['export_dest'] == 'gdrive': # task = ee.batch.Export.image.toCloudStorage( # image=eddi_image, # description=export_id, # bucket=ini['EXPORT']['export_folder'], # fileNamePrefix=export_id, # dimensions=output_shape, # crs=ini['SPATIAL']['crs'], # crsTransform=output_transform) logging.debug(' Starting export task') utils.ee_request(task.start())
def ee_image_download(ini_path=None, overwrite_flag=False): """Earth Engine Annual Mean Image Download Parameters ---------- ini_path : str overwrite_flag : bool, optional If True, overwrite existing files (the default is False). """ logging.info('\nEarth Engine Landsat Image Download') images_folder = 'landsat' if overwrite_flag: logging.warning( '\nAre you sure you want to overwrite existing images?') input('Press ENTER to continue') # Regular expression to pull out Landsat scene_id # landsat_re = re.compile( # 'L[ETC][4578]\d{6}(?P<YEAR>\d{4})(?P<DOY>\d{3})\D{3}\d{2}') # Read config file ini = inputs.read(ini_path) inputs.parse_section(ini, section='INPUTS') inputs.parse_section(ini, section='SPATIAL') inputs.parse_section(ini, section='EXPORT') inputs.parse_section(ini, section='IMAGES') nodata_value = -9999 # Float32/Float64 float_output_type = 'Float32' float_nodata_value = np.finfo(np.float32).min # Byte/Int16/UInt16/UInt32/Int32 int_output_type = 'Byte' int_nodata_value = 255 int_bands = ['cloud_score', 'fmask'] # Get ee features from shapefile zone_geom_list = gdc.shapefile_2_geom_list_func( ini['INPUTS']['zone_shp_path'], zone_field=ini['INPUTS']['zone_field'], reverse_flag=False) # Check if the zone_names are unique # Eventually support merging common zone_names if len(set([z[1] for z in zone_geom_list])) != len(zone_geom_list): logging.error( '\nERROR: There appear to be duplicate zone ID/name values.' '\n Currently, the values in "{}" must be unique.' '\n Exiting.'.format(ini['INPUTS']['zone_field'])) return False # Filter features by FID before merging geometries if ini['INPUTS']['fid_keep_list']: zone_geom_list = [ zone_obj for zone_obj in zone_geom_list if zone_obj[0] in ini['INPUTS']['fid_keep_list']] if ini['INPUTS']['fid_skip_list']: zone_geom_list = [ zone_obj for zone_obj in zone_geom_list if zone_obj[0] not in ini['INPUTS']['fid_skip_list']] # Merge geometries if ini['INPUTS']['merge_geom_flag']: merge_geom = ogr.Geometry(ogr.wkbMultiPolygon) for zone in zone_geom_list: zone_multipolygon = ogr.ForceToMultiPolygon( ogr.CreateGeometryFromJson(json.dumps(zone[2]))) for zone_polygon in zone_multipolygon: merge_geom.AddGeometry(zone_polygon) # merge_json = json.loads(merge_mp.ExportToJson()) zone_geom_list = [[ 0, ini['INPUTS']['zone_filename'], json.loads(merge_geom.ExportToJson())]] ini['INPUTS']['zone_field'] = '' # Need zone_shp_path projection to build EE geometries zone_osr = gdc.feature_path_osr(ini['INPUTS']['zone_shp_path']) zone_proj = gdc.osr_wkt(zone_osr) # zone_proj = ee.Projection(zone_proj).wkt().getInfo() # zone_proj = zone_proj.replace('\n', '').replace(' ', '') # logging.debug(' Zone Projection: {}'.format(zone_proj)) # Check that shapefile has matching spatial reference if not gdc.matching_spatref(zone_osr, ini['SPATIAL']['osr']): logging.warning(' Zone OSR:\n{}\n'.format(zone_osr)) logging.warning(' Output OSR:\n{}\n'.format( ini['SPATIAL']['osr'])) logging.warning(' Zone Proj4: {}'.format(zone_osr.ExportToProj4())) logging.warning(' Output Proj4: {}'.format( ini['SPATIAL']['osr'].ExportToProj4())) logging.warning( '\nWARNING: \n' 'The output and zone spatial references do not appear to match\n' 'This will likely cause problems!') input('Press ENTER to continue') else: logging.debug(' Zone Projection:\n{}\n'.format( zone_osr.ExportToWkt())) logging.debug(' Output Projection:\n{}\n'.format( ini['SPATIAL']['osr'].ExportToWkt())) logging.debug(' Output Cellsize: {}'.format( ini['SPATIAL']['cellsize'])) # Keyword arguments for ee_common.get_landsat_collection() and # ee_common.get_landsat_image() # Zone geom will be updated inside the loop landsat_args = { k: v for section in ['INPUTS'] for k, v in ini[section].items() if k in [ 'landsat4_flag', 'landsat5_flag', 'landsat7_flag', 'landsat8_flag', 'fmask_flag', 'acca_flag', 'start_year', 'end_year', 'start_month', 'end_month', 'start_doy', 'end_doy', 'scene_id_keep_list', 'scene_id_skip_list', 'path_keep_list', 'row_keep_list', 'refl_sur_method', 'adjust_method', 'mosaic_method']} # landsat_args['start_date'] = start_date # landsat_args['end_date'] = end_date # For composite images, compute all components bands landsat_args['products'] = ini['IMAGES']['download_bands'][:] if 'refl_toa' in landsat_args['products']: landsat_args['products'].extend([ 'blue_toa', 'green_toa', 'red_toa', 'nir_toa', 'swir1_toa', 'swir2_toa']) landsat_args['products'].remove('refl_toa') if 'refl_sur' in landsat_args['products']: landsat_args['products'].extend([ 'blue_sur', 'green_sur', 'red_sur', 'nir_sur', 'swir1_sur', 'swir2_sur']) landsat_args['products'].remove('refl_sur') if 'tasseled_cap' in landsat_args['products']: landsat_args['products'].extend([ 'tc_green', 'tc_bright', 'tc_wet']) landsat_args['products'].remove('tasseled_cap') # Initialize Earth Engine API key logging.info('\nInitializing Earth Engine') ee.Initialize() utils.ee_request(ee.Number(1).getInfo()) # Get current running tasks tasks = utils.get_ee_tasks() # Download images for each feature separately for zone_fid, zone_name, zone_json in zone_geom_list: zone_name = zone_name.replace(' ', '_') logging.info('ZONE: {} (FID: {})'.format(zone_name, zone_fid)) # Build EE geometry object for zonal stats zone_geom = ee.Geometry( geo_json=zone_json, opt_proj=zone_proj, opt_geodesic=False) landsat_args['zone_geom'] = zone_geom # logging.debug(' Centroid: {}'.format( # zone_geom.centroid(100).getInfo()['coordinates'])) # Use feature geometry to build extent, transform, and shape zone_extent = gdc.Extent( ogr.CreateGeometryFromJson(json.dumps(zone_json)).GetEnvelope()) # zone_extent = gdc.Extent(zone_geom.GetEnvelope()) zone_extent.ymin, zone_extent.xmax = zone_extent.xmax, zone_extent.ymin zone_extent = zone_extent.buffer(ini['IMAGES']['image_buffer']) zone_extent = zone_extent.adjust_to_snap( 'EXPAND', ini['SPATIAL']['snap_x'], ini['SPATIAL']['snap_y'], ini['SPATIAL']['cellsize']) zone_geo = zone_extent.geo(ini['SPATIAL']['cellsize']) zone_transform = gdc.geo_2_ee_transform(zone_geo) zone_transform = '[' + ','.join(map(str, zone_transform)) + ']' zone_shape = zone_extent.shape(ini['SPATIAL']['cellsize']) logging.debug(' Zone Shape: {}'.format(zone_shape)) logging.debug(' Zone Transform: {}'.format(zone_transform)) logging.debug(' Zone Extent: {}'.format(zone_extent)) # logging.debug(' Zone Geom: {}'.format(zone_geom.getInfo())) # output_transform = zone_transform[:] output_transform = '[' + ','.join(map(str, zone_transform)) + ']' output_shape = '{1}x{0}'.format(*zone_shape) # logging.debug(' Image Transform: {}'.format(output_transform)) # logging.debug(' Image Shape: {}'.format(output_shape)) zone_images_ws = os.path.join( ini['IMAGES']['output_ws'], zone_name, images_folder) if not os.path.isdir(zone_images_ws): os.makedirs(zone_images_ws) # Move to EE common? def get_collection_ids(image): return ee.Feature(None, {'id': image.get('SCENE_ID')}) # Get list of available Landsat images landsat_obj = ee_common.Landsat(landsat_args) scene_id_list = [ f['properties']['id'] for f in landsat_obj.get_collection().map( get_collection_ids).getInfo()['features']] # Get list of unique image "dates" # Keep scene_id components as string for set operation # If not mosaicing images, include path/row in set # otherwise set to None if not ini['INPUTS']['mosaic_method']: scene_id_list = set([ (image_id[12:20], image_id[0:4], image_id[5:8], image_id[8:11]) for image_id in scene_id_list]) else: scene_id_list = set([ (image_id[12:20], image_id[0:4], None, None) for image_id in scene_id_list]) logging.debug(' Scene Count: {}\n'.format(len(scene_id_list))) # Process each image in the collection by date # Leave scene_id components as strings for date, landsat, path, row in sorted(scene_id_list): scene_dt = datetime.datetime.strptime(date, '%Y%m%d') year = scene_dt.strftime('%Y') doy = scene_dt.strftime('%j') # If not mosaicing images, include path/row in name if not ini['INPUTS']['mosaic_method']: landsat_str = '{}{}{}'.format(landsat, path, row) else: landsat_str = '{}'.format(landsat) logging.info('{} {} (DOY {})'.format( landsat.upper(), scene_dt.strftime('%Y-%m-%d'), doy)) zone_year_ws = os.path.join(zone_images_ws, year) if not os.path.isdir(zone_year_ws): os.makedirs(zone_year_ws) # Get the prepped Landsat image by ID landsat_image = ee.Image(landsat_obj.get_image( landsat, year, doy, path, row)) # Clip using the feature geometry if ini['IMAGES']['clip_landsat_flag']: landsat_image = landsat_image.clip(zone_geom) else: landsat_image = landsat_image.clip(ee.Geometry.Rectangle( list(zone_extent), ini['SPATIAL']['crs'], False)) # DEADBEEF - Display a single image # ee_common.show_thumbnail(landsat_image.visualize( # bands=['fmask', 'fmask', 'fmask'], min=0, max=4)) # ee_common.show_thumbnail(landsat_image.visualize( # bands=['toa_red', 'toa_green', 'toa_blue'], # min=0.05, max=0.35, gamma=1.4)) # return True # Set the masked values to a nodata value # so that the TIF can have a nodata value other than 0 set landsat_image = landsat_image.unmask(nodata_value, False) for band in ini['IMAGES']['download_bands']: logging.debug(' Band: {}'.format(band)) # Rename to match naming style from getDownloadURL # image_name.band.tif export_id = '{}_{}_{}_{}_{}'.format( ini['INPUTS']['zone_filename'], date, doy, landsat_str.lower(), band.lower()) output_id = '{}_{}_{}.{}'.format( date, doy, landsat_str.lower(), band) export_path = os.path.join( ini['EXPORT']['export_ws'], export_id + '.tif') output_path = os.path.join( zone_year_ws, output_id + '.tif') logging.debug(' Export: {}'.format(export_path)) logging.debug(' Output: {}'.format(output_path)) if overwrite_flag: if export_id in tasks.keys(): logging.debug(' Task already submitted, cancelling') ee.data.cancelTask(tasks[export_id]) del tasks[export_id] if os.path.isfile(export_path): logging.debug( ' Export image already exists, removing') utils.remove_file(export_path) # os.remove(export_path) if os.path.isfile(output_path): logging.debug( ' Output image already exists, removing') utils.remove_file(output_path) # os.remove(output_path) else: if os.path.isfile(export_path): logging.debug(' Export image already exists, moving') if band in int_bands: subprocess.check_output([ 'gdalwarp', '-ot', int_output_type, '-overwrite', '-of', 'GTiff', '-co', 'COMPRESS=LZW', '-srcnodata', str(nodata_value), '-dstnodata', str(int_nodata_value), export_path, output_path]) else: subprocess.check_output([ 'gdalwarp', '-ot', float_output_type, '-overwrite', '-of', 'GTiff', '-co', 'COMPRESS=LZW', '-srcnodata', str(nodata_value), '-dstnodata', '{:f}'.format(float_nodata_value), export_path, output_path]) with open(os.devnull, 'w') as devnull: subprocess.check_call( ['gdalinfo', '-stats', output_path], stdout=devnull) subprocess.check_output( ['gdalmanage', 'delete', export_path]) continue elif os.path.isfile(output_path): logging.debug( ' Output image already exists, skipping') continue elif export_id in tasks.keys(): logging.debug( ' Task already submitted, skipping') continue # Should composites include Ts? if band == 'refl_toa': band_list = [ 'blue_toa', 'green_toa', 'red_toa', 'nir_toa', 'swir1_toa', 'swir2_toa'] elif band == 'refl_sur': band_list = [ 'blue_sur', 'green_sur', 'red_sur', 'nir_sur', 'swir1_sur', 'swir2_sur'] elif band == 'tasseled_cap': band_list = ['tc_bright', 'tc_green', 'tc_wet'] else: band_list = [band] band_image = landsat_image.select(band_list) # CGM 2016-09-26 - Don't apply any cloud masks to images # # Apply cloud mask before exporting # if fmask_flag and band not in ['refl_sur', 'cloud', 'fmask']: # fmask = ee.Image(landsat_image.select(['fmask'])) # cloud_mask = fmask.eq(2).Or(fmask.eq(3)).Or(fmask.eq(4)).Not() # band_image = band_image.updateMask(cloud_mask) logging.debug(' Building export task') # if ini['EXPORT']['export_dest'] == 'gdrive': task = ee.batch.Export.image.toDrive( band_image, description=export_id, # folder=ini['EXPORT']['export_folder'], fileNamePrefix=export_id, dimensions=output_shape, crs=ini['SPATIAL']['crs'], crsTransform=output_transform) # elif ini['EXPORT']['export_dest'] == 'cloud': # task = ee.batch.Export.image.toCloudStorage( # band_image, # description=export_id, # bucket=ini['EXPORT']['export_folder'], # fileNamePrefix=export_id, # dimensions=output_shape, # crs=ini['SPATIAL']['crs'], # crsTransform=output_transform) logging.debug(' Starting export task') utils.ee_request(task.start())
def ee_beamer_et(ini_path=None, overwrite_flag=False): """Earth Engine Beamer ET Image Download Args: ini_path (str): overwrite_flag (bool): if True, overwrite existing files Returns: None """ logging.info('\nEarth Engine Beamer Annual Mean ETg Image Download') # Read config file ini = inputs.read(ini_path) inputs.parse_section(ini, section='INPUTS') inputs.parse_section(ini, section='SPATIAL') inputs.parse_section(ini, section='IMAGES') inputs.parse_section(ini, section='BEAMER') ini['IMAGES']['download_bands'] = [ 'etg_mean', 'etg_lci', 'etg_uci', 'etg_lpi', 'etg_upi' ] stat_list = ['mean', 'median'] nodata_value = -9999 zips_folder = 'zips' annuals_folder = 'annuals' # Get ee features from shapefile zone_geom_list = gdc.shapefile_2_geom_list_func( ini['INPUTS']['zone_shp_path'], zone_field=ini['INPUTS']['zone_field'], reverse_flag=False) # zone_count = len(zone_geom_list) # output_fmt = '_{0:0%sd}.csv' % str(int(math.log10(zone_count)) + 1) # Check if the zone_names are unique # Eventually support merging common zone_names if len(set([z[1] for z in zone_geom_list])) != len(zone_geom_list): logging.error( '\nERROR: There appear to be duplicate zone ID/name values.' '\n Currently, the values in "{}" must be unique.' '\n Exiting.'.format(ini['INPUTS']['zone_field'])) return False # Filter features by FID if ini['INPUTS']['fid_keep_list']: zone_geom_list = [ zone_obj for zone_obj in zone_geom_list if zone_obj[0] in ini['INPUTS']['fid_keep_list'] ] if ini['INPUTS']['fid_skip_list']: zone_geom_list = [ zone_obj for zone_obj in zone_geom_list if zone_obj[0] not in ini['INPUTS']['fid_skip_list'] ] # Merge geometries if ini['INPUTS']['merge_geom_flag']: merge_geom = ogr.Geometry(ogr.wkbMultiPolygon) for zone in zone_geom_list: zone_multipolygon = ogr.ForceToMultiPolygon( ogr.CreateGeometryFromJson(json.dumps(zone[2]))) for zone_polygon in zone_multipolygon: merge_geom.AddGeometry(zone_polygon) # merge_json = json.loads(merge_mp.ExportToJson()) zone_geom_list = [[ 0, ini['INPUTS']['zone_filename'], json.loads(merge_geom.ExportToJson()) ]] ini['INPUTS']['zone_field'] = '' # Set all zone specific parameters into a dictionary zone = {} # Need zone_shp_path projection to build EE geometries zone['osr'] = gdc.feature_path_osr(ini['INPUTS']['zone_shp_path']) zone['proj'] = gdc.osr_wkt(zone['osr']) # zone['proj'] = ee.Projection(zone['proj']).wkt().getInfo() # zone['proj'] = zone['proj'].replace('\n', '').replace(' ', '') # logging.debug(' Zone Projection: {}'.format(zone['proj'])) # Check that shapefile has matching spatial reference if not gdc.matching_spatref(zone['osr'], ini['SPATIAL']['osr']): logging.warning(' Zone OSR:\n{}\n'.format(zone['osr'])) logging.warning(' Output OSR:\n{}\n'.format( ini['SPATIAL']['osr'].ExportToWkt())) logging.warning(' Zone Proj4: {}'.format( zone['osr'].ExportToProj4())) logging.warning(' Output Proj4: {}'.format( ini['SPATIAL']['osr'].ExportToProj4())) logging.warning( '\nWARNING: \n' 'The output and zone spatial references do not appear to match\n' 'This will likely cause problems!') input('Press ENTER to continue') else: logging.debug(' Zone Projection:\n{}\n'.format( zone['osr'].ExportToWkt())) logging.debug(' Output Projection:\n{}\n'.format( ini['SPATIAL']['osr'].ExportToWkt())) logging.debug(' Output Cellsize: {}'.format( ini['SPATIAL']['cellsize'])) # Initialize Earth Engine API key logging.info('\nInitializing Earth Engine') ee.Initialize() utils.ee_request(ee.Number(1).getInfo()) # Get list of path/row strings to centroid coordinates if ini['INPUTS']['tile_keep_list']: ini['INPUTS']['tile_geom'] = [ wrs2.tile_centroids[tile] for tile in ini['INPUTS']['tile_keep_list'] if tile in wrs2.tile_centroids.keys() ] ini['INPUTS']['tile_geom'] = ee.Geometry.MultiPoint( ini['INPUTS']['tile_geom'], 'EPSG:4326') else: ini['INPUTS']['tile_geom'] = None # Read in ETo and PPT data from file if (ini['BEAMER']['eto_source'] == 'file' or ini['BEAMER']['ppt_source'] == 'file'): data_array = np.genfromtxt(ini['BEAMER']['data_path'], delimiter=',', names=True, dtype=None) data_fields = data_array.dtype.names logging.debug(' CSV fields: {}'.format(', '.join(data_fields))) # DEADBEEF - Compare fields names assuming all upper case data_fields = [f.upper() for f in data_fields] eto_dict = defaultdict(dict) ppt_dict = defaultdict(dict) for row in data_array: z = str(row[data_fields.index(ini['BEAMER']['data_zone_field'])]) y = int(row[data_fields.index(ini['BEAMER']['data_year_field'])]) if ini['BEAMER']['eto_source'] == 'file': # DEADBEEF - Compare fields names assuming all upper case eto_dict[z][y] = row[data_fields.index( ini['BEAMER']['data_eto_field'].upper())] if ini['BEAMER']['ppt_source'] == 'file': # DEADBEEF - Compare fields names assuming all upper case ppt_dict[z][y] = row[data_fields.index( ini['BEAMER']['data_ppt_field'].upper())] # Get filtered/merged/prepped Landsat collection landsat_args = { k: v for section in ['INPUTS'] for k, v in ini[section].items() if k in [ 'landsat4_flag', 'landsat5_flag', 'landsat7_flag', 'landsat8_flag', 'fmask_flag', 'acca_flag', 'start_year', 'end_year', 'start_month', 'end_month', 'start_doy', 'end_doy', 'scene_id_keep_list', 'scene_id_skip_list', 'path_keep_list', 'row_keep_list', 'tile_geom', 'adjust_method', 'mosaic_method', 'refl_sur_method' ] } landsat_args['products'] = ['evi_sur'] landsat = ee_common.Landsat(landsat_args) # Download images for each feature separately for zone_fid, zone_name, zone_json in zone_geom_list: zone['fid'] = zone_fid zone['name'] = zone_name.replace(' ', '_') zone['json'] = zone_json logging.info('ZONE: {} (FID: {})'.format(zone['name'], zone['fid'])) # Build EE geometry object for zonal stats zone['geom'] = ee.Geometry(geo_json=zone['json'], opt_proj=zone['proj'], opt_geodesic=False) # logging.debug(' Centroid: {}'.format( # zone['geom'].centroid(100).getInfo()['coordinates'])) # Use feature geometry to build extent, transform, and shape zone['extent'] = gdc.Extent( ogr.CreateGeometryFromJson(json.dumps(zone['json'])).GetEnvelope()) # zone['extent'] = gdc.Extent(zone['geom'].GetEnvelope()) zone['extent'] = zone['extent'].ogrenv_swap() zone['extent'] = zone['extent'].adjust_to_snap( 'EXPAND', ini['SPATIAL']['snap_x'], ini['SPATIAL']['snap_y'], ini['SPATIAL']['cellsize']) zone['geo'] = zone['extent'].geo(ini['SPATIAL']['cellsize']) zone['transform'] = gdc.geo_2_ee_transform(zone['geo']) # zone['transform'] = '[' + ','.join(map(str, zone['transform'])) + ']' zone['shape'] = zone['extent'].shape(ini['SPATIAL']['cellsize']) logging.debug(' Zone Shape: {}'.format(zone['shape'])) logging.debug(' Zone Transform: {}'.format(zone['transform'])) logging.debug(' Zone Extent: {}'.format(zone['extent'])) # logging.debug(' Zone Geom: {}'.format(zone['geom'].getInfo())) # Assume all pixels in all 14+2 images could be reduced zone['max_pixels'] = zone['shape'][0] * zone['shape'][1] logging.debug(' Max Pixels: {}'.format(zone['max_pixels'])) # Set output spatial reference # Eventually allow user to manually set these # output_crs = zone['proj'] logging.debug(' Image Projection: {}'.format(ini['SPATIAL']['crs'])) # output_transform = zone['transform'][:] output_transform = '[' + ','.join(map(str, zone['transform'])) + ']' output_shape = '{1}x{0}'.format(*zone['shape']) logging.debug(' Image Transform: {}'.format(output_transform)) logging.debug(' Image Shape: {}'.format(output_shape)) zone_output_ws = os.path.join(ini['IMAGES']['output_ws'], zone_name) zone_zips_ws = os.path.join(zone_output_ws, zips_folder) zone_annuals_ws = os.path.join(zone_output_ws, annuals_folder) if not os.path.isdir(zone_zips_ws): os.makedirs(zone_zips_ws) if not os.path.isdir(zone_annuals_ws): os.makedirs(zone_annuals_ws) # Process date range by year interval_cnt = 1 start_dt = datetime.datetime(ini['INPUTS']['start_year'], 1, 1) end_dt = datetime.datetime(ini['INPUTS']['end_year'] + 1, 1, 1) - datetime.timedelta(0, 1) for i, iter_start_dt in enumerate( rrule.rrule(rrule.YEARLY, interval=interval_cnt, dtstart=start_dt, until=end_dt)): iter_end_dt = (iter_start_dt + relativedelta.relativedelta(years=interval_cnt) - datetime.timedelta(0, 1)) if ((ini['INPUTS']['start_month'] and iter_end_dt.month < ini['INPUTS']['start_month']) or (ini['INPUTS']['end_month'] and iter_start_dt.month > ini['INPUTS']['end_month'])): logging.debug(' {} {} skipping'.format( iter_start_dt.date(), iter_end_dt.date())) continue elif ( (ini['INPUTS']['start_doy'] and int(iter_end_dt.strftime('%j')) < ini['INPUTS']['start_doy']) or (ini['INPUTS']['end_doy'] and int(iter_start_dt.strftime('%j')) > ini['INPUTS']['end_doy'])): logging.debug(' {} {} skipping'.format( iter_start_dt.date(), iter_end_dt.date())) continue else: logging.info('{} {}'.format(iter_start_dt.date(), iter_end_dt.date())) year = iter_start_dt.year # image_id = 'etg_{}_{}'.format( image_id = '{}_{}'.format(zone_name.lower().replace(' ', '_'), year) zip_path = os.path.join(zone_zips_ws, image_id + '.zip') # median_path = os.path.join( # zone_output_ws, image_id + '.img') logging.debug(' Zip: {}'.format(zip_path)) if os.path.isfile(zip_path) and overwrite_flag: logging.debug(' Output already exists, removing zip') os.remove(zip_path) elif os.path.isfile(zip_path) and not overwrite_flag: # Check that existing ZIP files can be opened try: with zipfile.ZipFile(zip_path, 'r') as z: pass except Exception as e: logging.warning(' Zip file error, removing'.format(i)) os.remove(zip_path) # Filter the GRIDMET collection wy_start_date = '{}-10-01'.format(year - 1) wy_end_date = '{}-10-01'.format(year) logging.debug(' WY: {} {}'.format(wy_start_date, wy_end_date)) gridmet_coll = ee.ImageCollection('IDAHO_EPSCOR/GRIDMET') \ .filterDate(wy_start_date, wy_end_date) # # PRISM collection was uploaded as an asset # if ini['BEAMER']['ppt_source'] == 'prism': # def prism_time_start(input_image): # """Set time_start property on PRISM water year PPT collection""" # # Assume year is the 4th item separated by "_" # water_year = ee.String(input_image.get('system:index')).split('_').get(3) # date_start = ee.Date(ee.String(water_year).cat('-10-01')) # return input_image.select([0], ['ppt']).set({ # 'system:time_start': date_start.millis() # }) # prism_coll = ee.ImageCollection('users/cgmorton/prism_800m_ppt_wy') # prism_coll = prism_coll.map(prism_time_start) \ # .filterDate(wy_start_date, wy_end_date) # Get water year PPT from file # Convert all input data to mm to match GRIDMET data if ini['BEAMER']['ppt_source'] == 'file': wy_ppt_input = ppt_dict[zone_name][year] if ini['BEAMER']['data_ppt_units'] == 'mm': pass elif ini['BEAMER']['data_ppt_units'] == 'in': wy_ppt_input *= 25.4 elif ini['BEAMER']['data_ppt_units'] == 'ft': wy_ppt_input *= (25.4 * 12) elif ini['BEAMER']['ppt_source'] == 'gridmet': # GET GRIDMET value at centroid of geometry wy_ppt_input = float( utils.ee_getinfo( ee.ImageCollection( gridmet_coll.select(['pr'], ['ppt']).sum()).getRegion( zone['geom'].centroid(1), 500))[1][4]) # Calculate GRIDMET zonal mean of geometry # wy_ppt_input = float(ee.ImageCollection( # gridmet_coll.select(['pr'], ['ppt'])).reduceRegion( # reducer=ee.Reducer.sum(), # geometry=zone['geom'], # crs=ini['SPATIAL']['crs'], # crsTransform=zone['transform'], # bestEffort=False, # tileScale=1).getInfo()['ppt'] # elif ini['BEAMER']['ppt_source'] == 'prism': # # Calculate PRISM zonal mean of geometry # wy_ppt_input = float(ee.ImageCollection( # prism_coll.map(ee_common.prism_ppt_func)).sum().reduceRegion( # reducer=ee.Reducer.mean(), # geometry=zone['geom'], # crs=ini['SPATIAL']['crs'], # crsTransform=zone['transform'], # bestEffort=False, # tileScale=1).getInfo()['ppt']) # Get water year ETo read from file # Convert all input data to mm for Beamer Method if ini['BEAMER']['eto_source'] == 'file': wy_eto_input = eto_dict[zone_name][year] if ini['BEAMER']['data_eto_units'] == 'mm': pass elif ini['BEAMER']['data_eto_units'] == 'in': wy_eto_input *= 25.4 elif ini['BEAMER']['data_eto_units'] == 'ft': wy_eto_input *= (25.4 * 12) # This assumes GRIMET data is in millimeters elif ini['BEAMER']['eto_source'] == 'gridmet': wy_eto_input = float( utils.ee_getinfo( ee.ImageCollection(gridmet_coll.select( ['eto']).sum()).getRegion(zone['geom'].centroid(1), 500))[1][4]) # wy_eto_input = float(ee.ImageCollection( # gridmet_coll.select(['eto'])).reduceRegion( # reducer=ee.Reducer.sum(), # geometry=zone['geom'], # crs=ini['SPATIAL']['crs'], # crsTransform=zone['transform'], # bestEffort=False, # tileScale=1).getInfo() logging.debug(' Input ETO: {} mm PPT: {} mm'.format( wy_eto_input, wy_ppt_input)) # Scale ETo & PPT wy_eto_input *= ini['BEAMER']['eto_factor'] wy_ppt_input *= ini['BEAMER']['ppt_factor'] # Convert output units from mm wy_ppt_output = wy_ppt_input wy_eto_output = wy_eto_input if ini['IMAGES']['ppt_units'] == 'mm': pass elif ini['IMAGES']['ppt_units'] == 'in': wy_ppt_output /= 25.4 elif ini['IMAGES']['ppt_units'] == 'ft': wy_ppt_output /= (25.4 * 12) if ini['IMAGES']['eto_units'] == 'mm': pass elif ini['IMAGES']['eto_units'] == 'in': wy_eto_output /= 25.4 elif ini['IMAGES']['eto_units'] == 'ft': wy_eto_output /= (25.4 * 12) logging.debug(' Output ETO: {} {} PPT: {} {}'.format( wy_eto_output, ini['IMAGES']['eto_units'], wy_ppt_output, ini['IMAGES']['ppt_units'])) # Initialize the Landsat object for target zone and iteration landsat.zone_geom = zone['geom'] landsat.start_date = iter_start_dt.strftime('%Y-%m-%d') landsat.end_date = iter_end_dt.strftime('%Y-%m-%d') landsat_coll = landsat.get_collection() # print(sorted(utils.ee_getinfo( # landsat_coll.aggregate_histogram('SCENE_ID')))) # input('ENTER') # Skip if Landsat collection is empty if not utils.ee_getinfo( landsat_coll.aggregate_histogram('SCENE_ID')): logging.info(' Empty Landsat collection, skipping') continue # Add water year ETo and PPT values to each image def eto_ppt_func(img): """""" return ee.Image(img).setMulti({ 'wy_eto': wy_eto_output, 'wy_ppt': wy_ppt_output }) landsat_coll = ee.ImageCollection(landsat_coll.map(eto_ppt_func)) # Build each collection separately then merge etg_coll = ee.ImageCollection(landsat_coll.map( ee_common.beamer_func)) \ .select(ini['IMAGES']['download_bands']) # Clip using the feature geometry # Set the masked values to a nodata value # so that the TIF can have a nodata value other than 0 set etg_image = ee.Image(etg_coll.mean()) \ .clip(zone['geom']) \ .unmask(nodata_value, False) if not os.path.isfile(zip_path): # Get the download URL logging.debug(' Requesting URL') zip_url = utils.ee_request( etg_image.getDownloadURL({ 'name': image_id, 'crs': ini['SPATIAL']['crs'], 'crs_transform': output_transform, 'dimensions': output_shape })) # Try downloading a few times logging.info(' Downloading') for i in range(1, 10): try: response = urlrequest.urlopen(zip_url) with open(zip_path, 'wb') as output_f: shutil.copyfileobj(response, output_f) break except Exception as e: logging.info(' Resending query') logging.debug(' {}'.format(e)) sleep(i**2) os.remove(zip_path) # Try extracting the files try: logging.info(' Extracting') with zipfile.ZipFile(zip_path, 'r') as z: z.extractall(zone_annuals_ws) except Exception as e: logging.warning(' Error: could not extract'.format(i)) logging.debug(' {}'.format(e)) try: os.remove(zip_path) except Exception as e: pass # Set nodata value for item in os.listdir(zone_annuals_ws): if item.startswith(image_id) and item.endswith('.tif'): gdc.raster_path_set_nodata( os.path.join(zone_annuals_ws, item), nodata_value) raster_statistics(os.path.join(zone_annuals_ws, item)) logging.info('\nComputing composite rasters from annual means') for stat in stat_list: logging.info(' Stat: {}'.format(stat)) for band in ini['IMAGES']['download_bands']: logging.info(' {}'.format(band)) image_band_list = [ os.path.join(zone_annuals_ws, item) for item in os.listdir(zone_annuals_ws) if item.endswith('.{}.tif'.format(band.lower())) ] # for image_path in image_band_list: # raster_path_set_nodata(image_path, nodata_value) output_path = os.path.join( # zone_output_ws, 'etg_{}_{}.{}.tif'.format( zone_output_ws, '{}_{}.{}.tif'.format(zone_name.lower().replace(' ', '_'), stat.lower(), band.lower())) logging.debug(' {}'.format(output_path)) # Use GDAL to compute the composite raster cell_statistics(image_band_list, output_path, stat.lower()) raster_statistics(output_path)
def main(ini_path=None, overwrite_flag=True, show_flag=False): """Generate summary figures Args: ini_path (str): file path of the control file overwrite_flag (bool): if True, overwrite existing figures show_flag (bool): if True, show figures as they are being built """ logging.info('\nGenerate summary figures') # Read config file ini = inputs.read(ini_path) inputs.parse_section(ini, section='INPUTS') inputs.parse_section(ini, section='ZONAL_STATS') inputs.parse_section(ini, section='SUMMARY') inputs.parse_section(ini, section='FIGURES') # Band options band_list = [ 'albedo_sur', 'cloud_score', 'eto', 'evi_sur', 'fmask_count', 'fmask_total', 'ndvi_sur', 'ndvi_toa', 'ndwi_green_nir_sur', 'ndwi_green_nir_toa', 'ndwi_green_swir1_sur', 'ndwi_green_swir1_toa', 'ndwi_nir_swir1_sur', 'ndwi_nir_swir1_toa', 'ndwi_swir1_green_sur', 'ndwi_swir1_green_toa', # 'ndwi_sur', 'ndwi_toa', 'pixel_count', 'pixel_total', 'ppt', 'tc_bright', 'tc_green', 'tc_wet', 'ts' ] band_name = { 'albedo_sur': 'Albedo', 'cloud_score': 'Cloud Score', 'eto': 'ETo', 'evi_sur': 'EVI', 'fmask_count': 'Fmask Count', 'fmask_total': 'Fmask Total', 'ndvi_sur': 'NDVI', 'ndvi_toa': 'NDVI (TOA)', 'ndwi_green_nir_sur': 'NDWI (Green, NIR)', 'ndwi_green_nir_toa': 'NDWI (Green, NIR) (TOA)', 'ndwi_green_swir1_sur': 'NDWI (Green, SWIR1)', 'ndwi_green_swir1_toa': 'NDWI (Green, SWIR1) (TOA)', 'ndwi_nir_swir1_sur': 'NDWI (NIR, SWIR1)', 'ndwi_nir_swir1_toa': 'NDWI (NIR, SWIR1) (TOA)', 'ndwi_swir1_green_sur': 'NDWI (SWIR1, Green)', 'ndwi_swir1_green_toa': 'NDWI (SWIR1, Green) (TOA)', # 'ndwi_sur': 'NDWI (SWIR1, GREEN)', # 'ndwi_toa': 'NDWI (SWIR1, GREEN) (TOA)', 'pixel_count': 'Pixel Count', 'pixel_total': 'Pixel Total', 'ppt': 'PPT', 'tc_bright': 'Brightness', 'tc_green': 'Greeness', 'tc_wet': 'Wetness', 'ts': 'Ts' } band_unit = { 'albedo_sur': 'dimensionless', 'cloud_score': 'dimensionless', 'evi_sur': 'dimensionless', 'eto': 'mm', 'fmask_count': 'dimensionless', 'fmask_total': 'dimensionless', 'ndvi_sur': 'dimensionless', 'ndvi_toa': 'dimensionless', 'ndwi_green_nir_sur': 'dimensionless', 'ndwi_green_nir_toa': 'dimensionless', 'ndwi_green_swir1_sur': 'dimensionless', 'ndwi_green_swir1_toa': 'dimensionless', 'ndwi_nir_swir1_sur': 'dimensionless', 'ndwi_nir_swir1_toa': 'dimensionless', 'ndwi_swir1_green_sur': 'dimensionless', 'ndwi_swir1_green_toa': 'dimensionless', # 'ndwi_sur': 'dimensionless', # 'ndwi_toa': 'dimensionless', 'pixel_count': 'dimensionless', 'pixel_total': 'dimensionless', 'ppt': 'mm', 'tc_bright': 'dimensionless', 'tc_green': 'dimensionless', 'tc_wet': 'dimensionless', 'ts': 'K', } band_color = { 'albedo_sur': '#CF4457', 'cloud_score': '0.5', 'eto': '#348ABD', 'fmask_count': '0.5', 'fmask_total': '0.5', 'evi_sur': '#FFA500', 'ndvi_sur': '#A60628', 'ndvi_toa': '#A60628', 'ndwi_green_nir_sur': '#4eae4b', 'ndwi_green_nir_toa': '#4eae4b', 'ndwi_green_swir1_sur': '#4eae4b', 'ndwi_green_swir1_toa': '#4eae4b', 'ndwi_nir_swir1_sur': '#4eae4b', 'ndwi_nir_swir1_toa': '#4eae4b', 'ndwi_swir1_green_sur': '#4eae4b', 'ndwi_swir1_green_toa': '#4eae4b', # 'ndwi_sur': '#4eae4b', # 'ndwi_toa': '#4eae4b', 'pixel_count': '0.5', 'pixel_total': '0.5', 'ppt': '0.5', 'tc_bright': '#E24A33', 'tc_green': '#E24A33', 'tc_wet': '#E24A33', 'ts': '#188487' } # A couple of color palettes to sample from # import seaborn as sns # print(sns.color_palette('hls', 20).as_hex()) # print(sns.color_palette('husl', 20).as_hex()) # print(sns.color_palette('hsv', 20).as_hex()) # print(sns.color_palette('Set1', 20).as_hex()) # print(sns.color_palette('Set2', 20).as_hex()) # Hardcoded plot options figures_folder = 'figures' fig_type = 'large' plot_dict = dict() # Center y-labels in figure window (instead of centering on ticks/axes) plot_dict['center_ylabel'] = False # Axes percentages must be 0-1 plot_dict['timeseries_band_ax_pct'] = [0.3, 0.92] plot_dict['timeseries_ppt_ax_pct'] = [0.0, 0.35] plot_dict['complement_band_ax_pct'] = [0.0, 0.5] plot_dict['complement_eto_ax_pct'] = [0.4, 1.0] if fig_type.lower() == 'large': plot_dict['title_fs'] = 12 plot_dict['xtick_fs'] = 10 plot_dict['ytick_fs'] = 10 plot_dict['xlabel_fs'] = 10 plot_dict['ylabel_fs'] = 10 plot_dict['legend_fs'] = 10 plot_dict['ts_ms'] = 3 plot_dict['comp_ms'] = 4 plot_dict['timeseries_ax'] = [0.12, 0.13, 0.78, 0.81] plot_dict['scatter_ax'] = [0.12, 0.10, 0.82, 0.84] plot_dict['complement_ax'] = [0.12, 0.10, 0.78, 0.84] plot_dict['fig_size'] = (6.0, 5.0) elif fig_type.lower() == 'small': plot_dict['title_fs'] = 10 plot_dict['xtick_fs'] = 8 plot_dict['ytick_fs'] = 8 plot_dict['xlabel_fs'] = 8 plot_dict['ylabel_fs'] = 8 plot_dict['legend_fs'] = 8 plot_dict['ts_ms'] = 1.5 plot_dict['comp_ms'] = 2 plot_dict['timeseries_ax'] = [0.18, 0.21, 0.67, 0.70] plot_dict['scatter_ax'] = [0.18, 0.21, 0.67, 0.70] plot_dict['complement_ax'] = [0.18, 0.16, 0.67, 0.75] plot_dict['fig_size'] = (3.0, 2.5) plot_dict['fig_dpi'] = 300 plot_dict['show'] = show_flag plot_dict['overwrite'] = overwrite_flag # CSV parameters landsat_annual_fields = [ 'ZONE_FID', 'ZONE_NAME', 'YEAR', 'SCENE_COUNT', 'CLOUD_SCORE', 'PIXEL_COUNT', 'PIXEL_TOTAL', 'FMASK_COUNT', 'FMASK_TOTAL', 'TS', 'ALBEDO_SUR', 'NDVI_TOA', 'NDVI_SUR', 'EVI_SUR', 'NDWI_GREEN_NIR_SUR', 'NDWI_GREEN_SWIR1_SUR', 'NDWI_NIR_SWIR1_SUR', # 'NDWI_GREEN_NIR_TOA', 'NDWI_GREEN_SWIR1_TOA', 'NDWI_NIR_SWIR1_TOA', # 'NDWI_SWIR1_GREEN_TOA', 'NDWI_SWIR1_GREEN_SUR', # 'NDWI_TOA', 'NDWI_SUR', 'TC_BRIGHT', 'TC_GREEN', 'TC_WET' ] # Add merged row XXX to keep list ini['INPUTS']['row_keep_list'].append('XXX') # Check figure bands timeseries_bands = ini['FIGURES']['timeseries_bands'] scatter_bands = ini['FIGURES']['scatter_bands'] complementary_bands = ini['FIGURES']['complementary_bands'] if timeseries_bands: logging.info('Timeseries Bands:') for band in timeseries_bands: if band not in band_list: logging.info( ' Invalid timeseries band: {}, exiting'.format(band)) return False logging.info(' {}'.format(band)) if scatter_bands: logging.info('Scatter Bands (x:y):') for band_x, band_y in scatter_bands: if band_x not in band_list: logging.info( ' Invalid scatter band: {}, exiting'.format(band_x)) return False elif band_y not in band_list: logging.info(' Invalid band: {}, exiting'.format(band_y)) return False logging.info(' {}:{}'.format(band_x, band_y)) if complementary_bands: logging.info('Complementary Bands:') for band in complementary_bands: if band not in band_list: logging.info( ' Invalid complementary band: {}, exiting'.format(band)) return False logging.info(' {}'.format(band)) # Add input plot options plot_dict['ppt_plot_type'] = ini['FIGURES']['ppt_plot_type'] plot_dict['scatter_best_fit'] = ini['FIGURES']['scatter_best_fit'] # Start/end year year_list = list( range(ini['INPUTS']['start_year'], ini['INPUTS']['end_year'] + 1)) month_list = list( utils.wrapped_range(ini['INPUTS']['start_month'], ini['INPUTS']['end_month'], 1, 12)) doy_list = list( utils.wrapped_range(ini['INPUTS']['start_doy'], ini['INPUTS']['end_doy'], 1, 366)) # GRIDMET month range (default to water year) gridmet_start_month = ini['SUMMARY']['gridmet_start_month'] gridmet_end_month = ini['SUMMARY']['gridmet_end_month'] gridmet_months = list( utils.month_range(gridmet_start_month, gridmet_end_month)) logging.info('\nGridmet months: {}'.format(', '.join( map(str, gridmet_months)))) # Get ee features from shapefile zone_geom_list = gdc.shapefile_2_geom_list_func( ini['INPUTS']['zone_shp_path'], zone_field=ini['INPUTS']['zone_field'], reverse_flag=False) # Filter features by FID before merging geometries if ini['INPUTS']['fid_keep_list']: zone_geom_list = [ zone_obj for zone_obj in zone_geom_list if zone_obj[0] in ini['INPUTS']['fid_keep_list'] ] if ini['INPUTS']['fid_skip_list']: zone_geom_list = [ zone_obj for zone_obj in zone_geom_list if zone_obj[0] not in ini['INPUTS']['fid_skip_list'] ] # # Filter features by FID before merging geometries # if ini['INPUTS']['fid_keep_list']: # landsat_df = landsat_df[landsat_df['ZONE_FID'].isin( # ini['INPUTS']['fid_keep_list'])] # if ini['INPUTS']['fid_skip_list']: # landsat_df = landsat_df[~landsat_df['ZONE_FID'].isin( # ini['INPUTS']['fid_skip_list'])] logging.info('\nProcessing zones') for zone_fid, zone_name, zone_json in zone_geom_list: zone_name = zone_name.replace(' ', '_') logging.info('ZONE: {} (FID: {})'.format(zone_name, zone_fid)) zone_stats_ws = os.path.join(ini['ZONAL_STATS']['output_ws'], zone_name) zone_figures_ws = os.path.join(ini['SUMMARY']['output_ws'], zone_name, figures_folder) if not os.path.isdir(zone_stats_ws): logging.debug( ' Folder {} does not exist, skipping'.format(zone_stats_ws)) continue elif not os.path.isdir(zone_figures_ws): os.makedirs(zone_figures_ws) # Input paths landsat_daily_path = os.path.join( zone_stats_ws, '{}_landsat_daily.csv'.format(zone_name)) gridmet_daily_path = os.path.join( zone_stats_ws, '{}_gridmet_daily.csv'.format(zone_name)) gridmet_monthly_path = os.path.join( zone_stats_ws, '{}_gridmet_monthly.csv'.format(zone_name)) if not os.path.isfile(landsat_daily_path): logging.error(' Landsat daily CSV does not exist, skipping zone') continue elif (not os.path.isfile(gridmet_daily_path) and not os.path.isfile(gridmet_monthly_path)): logging.error( ' GRIDMET daily or monthly CSV does not exist, skipping zone') continue # DEADBEEF - Eventually support generating only Landsat figures # logging.error( # ' GRIDMET daily and/or monthly CSV files do not exist.\n' # ' ETo and PPT will not be processed.') # Output paths landsat_summary_path = os.path.join( zone_figures_ws, '{}_landsat_figures.csv'.format(zone_name)) gridmet_summary_path = os.path.join( zone_figures_ws, '{}_gridmet_figures.csv'.format(zone_name)) zone_summary_path = os.path.join( zone_figures_ws, '{}_zone_figures.csv'.format(zone_name)) logging.debug(' Reading Landsat CSV') landsat_df = pd.read_csv(landsat_daily_path) logging.debug(' Filtering Landsat dataframe') landsat_df = landsat_df[landsat_df['PIXEL_COUNT'] > 0] # QA field should have been written in zonal stats code # Eventually this block can be removed if 'QA' not in landsat_df.columns.values: landsat_df['QA'] = 0 # # This assumes that there are L5/L8 images in the dataframe # if not landsat_df.empty: # max_pixel_count = max(landsat_df['PIXEL_COUNT']) # else: # max_pixel_count = 0 if year_list: landsat_df = landsat_df[landsat_df['YEAR'].isin(year_list)] if month_list: landsat_df = landsat_df[landsat_df['MONTH'].isin(month_list)] if doy_list: landsat_df = landsat_df[landsat_df['DOY'].isin(doy_list)] # Assume the default is for these to be True and only filter if False if not ini['INPUTS']['landsat4_flag']: landsat_df = landsat_df[landsat_df['PLATFORM'] != 'LT04'] if not ini['INPUTS']['landsat5_flag']: landsat_df = landsat_df[landsat_df['PLATFORM'] != 'LT05'] if not ini['INPUTS']['landsat7_flag']: landsat_df = landsat_df[landsat_df['PLATFORM'] != 'LE07'] if not ini['INPUTS']['landsat8_flag']: landsat_df = landsat_df[landsat_df['PLATFORM'] != 'LC08'] if ini['INPUTS']['path_keep_list']: landsat_df = landsat_df[landsat_df['PATH'].isin( ini['INPUTS']['path_keep_list'])] if (ini['INPUTS']['row_keep_list'] and ini['INPUTS']['row_keep_list'] != ['XXX']): landsat_df = landsat_df[landsat_df['ROW'].isin( ini['INPUTS']['row_keep_list'])] if ini['INPUTS']['scene_id_keep_list']: # Replace XXX with primary ROW value for checking skip list SCENE_ID scene_id_df = pd.Series([ s.replace('XXX', '{:03d}'.format(int(r))) for s, r in zip(landsat_df['SCENE_ID'], landsat_df['ROW']) ]) landsat_df = landsat_df[scene_id_df.isin( ini['INPUTS']['scene_id_keep_list']).values] # This won't work: SCENE_ID have XXX but scene_id_skip_list don't # landsat_df = landsat_df[landsat_df['SCENE_ID'].isin( # ini['INPUTS']['scene_id_keep_list'])] if ini['INPUTS']['scene_id_skip_list']: # Replace XXX with primary ROW value for checking skip list SCENE_ID scene_id_df = pd.Series([ s.replace('XXX', '{:03d}'.format(int(r))) for s, r in zip(landsat_df['SCENE_ID'], landsat_df['ROW']) ]) landsat_df = landsat_df[np.logical_not( scene_id_df.isin(ini['INPUTS']['scene_id_skip_list']).values)] # This won't work: SCENE_ID have XXX but scene_id_skip_list don't # landsat_df = landsat_df[np.logical_not(landsat_df['SCENE_ID'].isin( # ini['INPUTS']['scene_id_skip_list']))] # Filter by QA/QC value if ini['SUMMARY']['max_qa'] >= 0 and not landsat_df.empty: logging.debug(' Maximum QA: {0}'.format( ini['SUMMARY']['max_qa'])) landsat_df = landsat_df[ landsat_df['QA'] <= ini['SUMMARY']['max_qa']] # Filter by average cloud score if ini['SUMMARY']['max_cloud_score'] < 100 and not landsat_df.empty: logging.debug(' Maximum cloud score: {0}'.format( ini['SUMMARY']['max_cloud_score'])) landsat_df = landsat_df[ landsat_df['CLOUD_SCORE'] <= ini['SUMMARY']['max_cloud_score']] # Filter by Fmask percentage if ini['SUMMARY']['max_fmask_pct'] < 100 and not landsat_df.empty: landsat_df['FMASK_PCT'] = 100 * (landsat_df['FMASK_COUNT'] / landsat_df['FMASK_TOTAL']) logging.debug(' Max Fmask threshold: {}'.format( ini['SUMMARY']['max_fmask_pct'])) landsat_df = landsat_df[ landsat_df['FMASK_PCT'] <= ini['SUMMARY']['max_fmask_pct']] # Filter low count SLC-off images if ini['SUMMARY']['min_slc_off_pct'] > 0 and not landsat_df.empty: logging.debug(' Mininum SLC-off threshold: {}%'.format( ini['SUMMARY']['min_slc_off_pct'])) # logging.debug(' Maximum pixel count: {}'.format( # max_pixel_count)) slc_off_mask = ((landsat_df['LANDSAT'] == 'LE7') & ((landsat_df['YEAR'] >= 2004) | ((landsat_df['YEAR'] == 2003) & (landsat_df['DOY'] > 151)))) slc_off_pct = 100 * (landsat_df['PIXEL_COUNT'] / landsat_df['PIXEL_TOTAL']) # slc_off_pct = 100 * (landsat_df['PIXEL_COUNT'] / max_pixel_count) landsat_df = landsat_df[( (slc_off_pct >= ini['SUMMARY']['min_slc_off_pct']) & slc_off_mask) | (~slc_off_mask)] if landsat_df.empty: logging.error( ' Empty Landsat dataframe after filtering, skipping zone') continue logging.debug(' Computing Landsat annual summaries') agg_dict = { 'PIXEL_COUNT': { 'PIXEL_COUNT': 'mean', 'SCENE_COUNT': 'count' }, 'PIXEL_TOTAL': { 'PIXEL_TOTAL': 'mean' }, 'FMASK_COUNT': { 'FMASK_COUNT': 'mean' }, 'FMASK_TOTAL': { 'FMASK_TOTAL': 'mean' }, 'CLOUD_SCORE': { 'CLOUD_SCORE': 'mean' } } for field in landsat_df.columns.values: if field in landsat_annual_fields: agg_dict.update({field: {field: 'mean'}}) landsat_df = landsat_df \ .groupby(['ZONE_NAME', 'ZONE_FID', 'YEAR']) \ .agg(agg_dict) landsat_df.columns = landsat_df.columns.droplevel(0) landsat_df.reset_index(inplace=True) # landsat_df = landsat_df[landsat_annual_fields] landsat_df['YEAR'] = landsat_df['YEAR'].astype(np.int) landsat_df['SCENE_COUNT'] = landsat_df['SCENE_COUNT'].astype(np.int) landsat_df['PIXEL_COUNT'] = landsat_df['PIXEL_COUNT'].astype(np.int) landsat_df['PIXEL_TOTAL'] = landsat_df['PIXEL_TOTAL'].astype(np.int) landsat_df['FMASK_COUNT'] = landsat_df['FMASK_COUNT'].astype(np.int) landsat_df['FMASK_TOTAL'] = landsat_df['FMASK_TOTAL'].astype(np.int) landsat_df.sort_values(by='YEAR', inplace=True) # Aggregate GRIDMET (to water year) if os.path.isfile(gridmet_monthly_path): logging.debug(' Reading montly GRIDMET CSV') gridmet_df = pd.read_csv(gridmet_monthly_path) elif os.path.isfile(gridmet_daily_path): logging.debug(' Reading daily GRIDMET CSV') gridmet_df = pd.read_csv(gridmet_daily_path) logging.debug(' Computing GRIDMET summaries') # Summarize GRIDMET for target months year if (gridmet_start_month in [10, 11, 12] and gridmet_end_month in [10, 11, 12]): month_mask = ((gridmet_df['MONTH'] >= gridmet_start_month) & (gridmet_df['MONTH'] <= gridmet_end_month)) gridmet_df.loc[month_mask, 'GROUP_YEAR'] = gridmet_df['YEAR'] + 1 elif (gridmet_start_month in [10, 11, 12] and gridmet_end_month not in [10, 11, 12]): month_mask = gridmet_df['MONTH'] >= gridmet_start_month gridmet_df.loc[month_mask, 'GROUP_YEAR'] = gridmet_df['YEAR'] + 1 month_mask = gridmet_df['MONTH'] <= gridmet_end_month gridmet_df.loc[month_mask, 'GROUP_YEAR'] = gridmet_df['YEAR'] else: month_mask = ((gridmet_df['MONTH'] >= gridmet_start_month) & (gridmet_df['MONTH'] <= gridmet_end_month)) gridmet_df.loc[month_mask, 'GROUP_YEAR'] = gridmet_df['YEAR'] # GROUP_YEAR for rows not in the GRIDMET month range will be NAN gridmet_df = gridmet_df[~pd.isnull(gridmet_df['GROUP_YEAR'])] if year_list: gridmet_df = gridmet_df[gridmet_df['GROUP_YEAR'].isin(year_list)] if gridmet_df.empty: logging.error( ' Empty GRIDMET dataframe after filtering by year') continue # Group GRIDMET data by user specified range (default is water year) gridmet_group_df = gridmet_df \ .groupby(['ZONE_FID', 'ZONE_NAME', 'GROUP_YEAR']) \ .agg({'ETO': {'ETO': 'sum'}, 'PPT': {'PPT': 'sum'}}) gridmet_group_df.columns = gridmet_group_df.columns.droplevel(0) gridmet_group_df.reset_index(inplace=True) gridmet_group_df.rename(columns={'GROUP_YEAR': 'YEAR'}, inplace=True) gridmet_group_df.sort_values(by='YEAR', inplace=True) # # Group GRIDMET data by month # gridmet_month_df = gridmet_df.groupby( # ['ZONE_FID', 'ZONE_NAME', 'GROUP_YEAR', 'MONTH']).agg({ # 'ETO': {'ETO': 'sum'}, 'PPT': {'PPT': 'sum'}}) # gridmet_month_df.columns = gridmet_month_df.columns.droplevel(0) # gridmet_month_df.reset_index(inplace=True) # gridmet_month_df.rename(columns={'GROUP_YEAR': 'YEAR'}, inplace=True) # # gridmet_month_df.sort_values(by=['YEAR', 'MONTH'], inplace=True) # gridmet_month_df.reset_index(inplace=True) # # Rename monthly PPT columns # gridmet_month_df['MONTH'] = 'PPT_M' + gridmet_month_df['MONTH'].astype(str) # # Pivot rows up to separate columns # gridmet_month_df = gridmet_month_df.pivot_table( # 'PPT', ['ZONE_FID', 'YEAR'], 'MONTH') # gridmet_month_df.reset_index(inplace=True) # columns = ['ZONE_FID', 'YEAR'] + ['PPT_M{}'.format(m) for m in gridmet_months] # gridmet_month_df = gridmet_month_df[columns] # del gridmet_month_df.index.name # Merge Landsat and GRIDMET collections zone_df = landsat_df.merge(gridmet_group_df, on=['ZONE_FID', 'ZONE_NAME', 'YEAR']) # gridmet_group_df, on=['ZONE_FID', 'YEAR']) # zone_df = zone_df.merge( # gridmet_month_df, on=['ZONE_FID', 'ZONE_NAME', 'YEAR']) # gridmet_month_df, on=['ZONE_FID', 'YEAR']) if zone_df is None or zone_df.empty: logging.info(' Empty zone dataframe, not generating figures') continue # Save annual Landsat and GRIDMET tables logging.debug(' Saving summary tables') logging.debug(' {}'.format(landsat_summary_path)) landsat_df.sort_values(by=['YEAR'], inplace=True) landsat_df.to_csv(landsat_summary_path, index=False) # columns=export_fields logging.debug(' {}'.format(gridmet_summary_path)) gridmet_group_df.sort_values(by=['YEAR'], inplace=True) gridmet_group_df.to_csv(gridmet_summary_path, index=False) # columns=export_fields logging.debug(' {}'.format(zone_summary_path)) zone_df.sort_values(by=['YEAR'], inplace=True) zone_df.to_csv(zone_summary_path, index=False) # columns=export_fields # Adjust year range based on data availability? # start_year = min(zone_df['YEAR']), # end_year = max(zone_df['YEAR']) logging.debug(' Generating figures') for band in timeseries_bands: timeseries_plot(band, zone_df, zone_name, zone_figures_ws, ini['INPUTS']['start_year'], ini['INPUTS']['end_year'], band_name, band_unit, band_color, plot_dict) for band_x, band_y in scatter_bands: scatter_plot(band_x, band_y, zone_df, zone_name, zone_figures_ws, band_name, band_unit, band_color, plot_dict) for band in complementary_bands: complementary_plot(band, zone_df, zone_name, zone_figures_ws, band_name, band_unit, band_color, plot_dict) del landsat_df, gridmet_df, zone_df
def ee_image_download(ini_path=None, overwrite_flag=False): """Earth Engine Annual Mean Image Download Parameters ---------- ini_path : str overwrite_flag : bool, optional If True, overwrite existing files (the default is False). """ logging.info('\nEarth Engine GRIDMET Image Download') # Do we need to support separate GRIDMET years? # start_year = 1984 # end_year = 2016 gridmet_download_bands = { 'eto': 'ETo', 'pr': 'PPT', } # If false, script will export annual and water year total images gridmet_monthly_flag = False gridmet_flag = True pdsi_flag = False pdsi_date_list = [ '0120', '0220', '0320', '0420', '0520', '0620', '0720', '0820', '0920', '1020', '1120', '1220', ] # pdsi_date_list = ['0920', '1220'] # pdsi_date_list = [] if gridmet_monthly_flag: gridmet_folder = 'gridmet_monthly' else: gridmet_folder = 'gridmet_annual' if not pdsi_date_list: pdsi_folder = 'pdsi_full' else: pdsi_folder = 'pdsi' # Read config file ini = inputs.read(ini_path) inputs.parse_section(ini, section='INPUTS') inputs.parse_section(ini, section='SPATIAL') inputs.parse_section(ini, section='EXPORT') inputs.parse_section(ini, section='IMAGES') nodata_value = -9999 # Manually set output spatial reference logging.info('\nHardcoding GRIDMET snap, cellsize and spatial reference') ini['output_x'], ini['output_y'] = -124.79299639209513, 49.41685579737572 ini['SPATIAL']['cellsize'] = 0.041666001963701 # ini['SPATIAL']['cellsize'] = [0.041666001963701, 0.041666001489718] # ini['output_x'], ini['output_y'] = -124.79166666666666666667, 25.04166666666666666667 # ini['SPATIAL']['cellsize'] = 1. / 24 ini['SPATIAL']['osr'] = gdc.epsg_osr(4326) # ini['SPATIAL']['osr'] = gdc.epsg_osr(4269) ini['SPATIAL']['crs'] = 'EPSG:4326' logging.debug(' Snap: {} {}'.format(ini['output_x'], ini['output_y'])) logging.debug(' Cellsize: {}'.format(ini['SPATIAL']['cellsize'])) logging.debug(' OSR: {}'.format(ini['SPATIAL']['osr'])) # Get ee features from shapefile zone_geom_list = gdc.shapefile_2_geom_list_func( ini['INPUTS']['zone_shp_path'], zone_field=ini['INPUTS']['zone_field'], reverse_flag=False) # Filter features by FID before merging geometries if ini['INPUTS']['fid_keep_list']: zone_geom_list = [ zone_obj for zone_obj in zone_geom_list if zone_obj[0] in ini['INPUTS']['fid_keep_list'] ] if ini['INPUTS']['fid_skip_list']: zone_geom_list = [ zone_obj for zone_obj in zone_geom_list if zone_obj[0] not in ini['INPUTS']['fid_skip_list'] ] # Merge geometries if ini['INPUTS']['merge_geom_flag']: merge_geom = ogr.Geometry(ogr.wkbMultiPolygon) for zone in zone_geom_list: zone_multipolygon = ogr.ForceToMultiPolygon( ogr.CreateGeometryFromJson(json.dumps(zone[2]))) for zone_polygon in zone_multipolygon: merge_geom.AddGeometry(zone_polygon) # merge_json = json.loads(merge_mp.ExportToJson()) zone_geom_list = [[ 0, ini['INPUTS']['zone_filename'], json.loads(merge_geom.ExportToJson()) ]] ini['INPUTS']['zone_field'] = '' # Need zone_shp_path projection to build EE geometries zone_osr = gdc.feature_path_osr(ini['INPUTS']['zone_shp_path']) zone_proj = gdc.osr_wkt(zone_osr) # zone_proj = ee.Projection(zone_proj).wkt().getInfo() # zone_proj = zone_proj.replace('\n', '').replace(' ', '') logging.debug(' Zone Projection: {}'.format(zone_proj)) # Initialize Earth Engine API key logging.info('\nInitializing Earth Engine') ee.Initialize() utils.ee_request(ee.Number(1).getInfo()) # Get current running tasks tasks = utils.get_ee_tasks() # Download images for each feature separately for zone_fid, zone_name, zone_json in zone_geom_list: zone_name = zone_name.replace(' ', '_') logging.info('ZONE: {} (FID: {})'.format(zone_name, zone_fid)) # Build EE geometry object for zonal stats zone_geom = ee.Geometry(zone_json, zone_proj, False) # Project the zone_geom to the GRIDMET projection # if zone_proj != output_proj: zone_geom = zone_geom.transform(ini['SPATIAL']['crs'], 0.001) # Get the extent from the Earth Engine geometry object? zone_extent = zone_geom.bounds().getInfo()['coordinates'][0] zone_extent = gdc.Extent([ min(zip(*zone_extent)[0]), min(zip(*zone_extent)[1]), max(zip(*zone_extent)[0]), max(zip(*zone_extent)[1]) ]) # # Use GDAL and geometry json to build extent, transform, and shape # zone_extent = gdc.Extent( # ogr.CreateGeometryFromJson(json.dumps(zone_json)).GetEnvelope()) # # zone_extent = gdc.Extent(zone_geom.GetEnvelope()) # zone_extent.ymin, zone_extent.xmax = zone_extent.xmax, zone_extent.ymin # Adjust extent to match raster zone_extent = zone_extent.adjust_to_snap('EXPAND', ini['SPATIAL']['snap_x'], ini['SPATIAL']['snap_y'], ini['SPATIAL']['cellsize']) zone_geo = zone_extent.geo(ini['SPATIAL']['cellsize']) zone_transform = gdc.geo_2_ee_transform(zone_geo) zone_transform = '[' + ','.join(map(str, zone_transform)) + ']' zone_shape = zone_extent.shape(ini['SPATIAL']['cellsize']) logging.debug(' Zone Shape: {}'.format(zone_shape)) logging.debug(' Zone Transform: {}'.format(zone_transform)) logging.debug(' Zone Extent: {}'.format(zone_extent)) # logging.debug(' Geom: {}'.format(zone_geom.getInfo())) # output_transform = zone_transform[:] output_transform = '[' + ','.join(map(str, zone_transform)) + ']' output_shape = '[{1}x{0}]'.format(*zone_shape) logging.debug(' Output Projection: {}'.format(ini['SPATIAL']['crs'])) logging.debug(' Output Transform: {}'.format(output_transform)) logging.debug(' Output Shape: {}'.format(output_shape)) zone_gridmet_ws = os.path.join(ini['IMAGES']['output_ws'], zone_name, gridmet_folder) zone_pdsi_ws = os.path.join(ini['IMAGES']['output_ws'], zone_name, pdsi_folder) if not os.path.isdir(zone_gridmet_ws): os.makedirs(zone_gridmet_ws) if not os.path.isdir(zone_pdsi_ws): os.makedirs(zone_pdsi_ws) # GRIDMET PPT & ETo if gridmet_flag: # Process each image in the collection by date export_list = [] for year in range(ini['INPUTS']['start_year'], ini['INPUTS']['end_year'] + 1): for b_key, b_name in sorted(gridmet_download_bands.items()): if gridmet_monthly_flag: # Monthly for start_month in range(1, 13): start_dt = datetime.datetime(year, start_month, 1) end_dt = (start_dt + relativedelta(months=1) - datetime.timedelta(0, 1)) export_list.append([ start_dt, end_dt, '{:04d}{:02d}'.format(year, start_month), b_key, b_name ]) else: # Calendar year export_list.append([ datetime.datetime(year, 1, 1), datetime.datetime(year + 1, 1, 1), '{:04d}'.format(year), b_key, b_name ]) # Water year export_list.append([ datetime.datetime(year - 1, 10, 1), datetime.datetime(year, 10, 1) - datetime.timedelta(0, 1), '{:04d}wy'.format(year), b_key, b_name ]) for start_dt, end_dt, date_str, b_key, b_name in export_list: logging.info('{} {}'.format(date_str, b_name)) if end_dt > datetime.datetime.today(): logging.info(' End date after current date, skipping') continue # Rename to match naming style from getDownloadURL # image_name.band.tif export_id = '{}_{}_gridmet_{}'.format( ini['INPUTS']['zone_filename'], date_str, b_name.lower()) output_id = '{}_gridmet.{}'.format(date_str, b_name.lower()) export_path = os.path.join(ini['EXPORT']['export_ws'], export_id + '.tif') output_path = os.path.join(zone_gridmet_ws, output_id + '.tif') logging.debug(' Export: {}'.format(export_path)) logging.debug(' Output: {}'.format(output_path)) if overwrite_flag: if export_id in tasks.keys(): logging.debug(' Task already submitted, cancelling') ee.data.cancelTask(tasks[export_id]) del tasks[export_id] if os.path.isfile(export_path): logging.debug( ' Export image already exists, removing') utils.remove_file(export_path) # os.remove(export_path) if os.path.isfile(output_path): logging.debug( ' Output image already exists, removing') utils.remove_file(output_path) # os.remove(output_path) else: if os.path.isfile(export_path): logging.debug(' Export image already exists, moving') shutil.move(export_path, output_path) gdc.raster_path_set_nodata(output_path, nodata_value) # DEADBEEF - should raster stats be computed? # gdc.raster_statistics(output_path) # subprocess.check_output([ # 'gdalwarp', # '-ot', float_output_type, '-overwrite', # '-of', 'GTiff', '-co', 'COMPRESS=LZW', # '-srcnodata', str(nodata_value), # '-dstnodata', '{:f}'.format(float_nodata_value), # export_path, output_path]) # with open(os.devnull, 'w') as devnull: # subprocess.check_call( # ['gdalinfo', '-stats', output_path], # stdout=devnull) # subprocess.check_output( # ['gdalmanage', 'delete', export_path]) continue elif os.path.isfile(output_path): logging.debug( ' Output image already exists, skipping') continue elif export_id in tasks.keys(): logging.debug(' Task already submitted, skipping') continue # GRIDMET collection is available in EarthEngine gridmet_coll = ee.ImageCollection('IDAHO_EPSCOR/GRIDMET')\ .filterDate(start_dt, end_dt) \ .select([b_key]) gridmet_image = ee.Image(gridmet_coll.sum()) logging.debug(' Starting download task') # if ini['EXPORT']['export_dest'] == 'gdrive': task = ee.batch.Export.image.toDrive( image=gridmet_image, description=export_id, # folder=ini['EXPORT']['export_folder'], fileNamePrefix=export_id, dimensions=output_shape, crs=ini['SPATIAL']['crs'], crsTransform=output_transform) # elif ini['EXPORT']['export_dest'] == 'gdrive': # task = ee.batch.Export.image.toCloudStorage( # image=gridmet_image, # description=export_id, # bucket=ini['EXPORT']['export_folder'], # fileNamePrefix=export_id, # dimensions=output_shape, # crs=ini['SPATIAL']['crs'], # crsTransform=output_transform) logging.debug(' Starting export task') utils.ee_request(task.start()) # GRIDMET PDSI if pdsi_flag: # Process each image in the collection by date export_list = [] b_name = 'pdsi' for year in range(ini['INPUTS']['start_year'], ini['INPUTS']['end_year'] + 1): # Dekad for start_month in range(1, 13): for start_day, end_day in zip([1, 10, 20], [10, 20, 30]): if start_month == 12 and start_day == 20: # Go to the first day of the next year (and month) start_dt = datetime.datetime( year, start_month, start_day) end_dt = datetime.datetime(year + 1, 1, 1) elif start_month < 12 and start_day == 20: # Go to the first day of the next month start_dt = datetime.datetime( year, start_month, start_day) end_dt = datetime.datetime(year, start_month + 1, 1) else: start_dt = datetime.datetime( year, start_month, start_day) end_dt = datetime.datetime(year, start_month, end_day) end_dt = end_dt - datetime.timedelta(0, 1) export_list.append([ start_dt, end_dt, '{:04d}{:02d}{:02d}'.format( year, start_month, start_day), b_name ]) # Filter list to only keep last dekad of October and December if pdsi_date_list: export_list = [[ start_dt, end_dt, date_str, b_name ] for start_dt, end_dt, date_str, b_name in export_list if start_dt.strftime('%m%d') in pdsi_date_list] for start_dt, end_dt, date_str, b_name in export_list: logging.info('{} {}'.format(date_str, b_name)) # Rename to match naming style from getDownloadURL # image_name.band.tif export_id = '{}_{}_{}'.format( os.path.splitext( ini['INPUTS']['zone_filename'])[0].lower(), date_str, b_name.lower()) output_id = '{}_{}'.format(date_str, b_name.lower()) export_path = os.path.join(ini['EXPORT']['export_ws'], export_id + '.tif') output_path = os.path.join(zone_pdsi_ws, output_id + '.tif') logging.debug(' Export: {}'.format(export_path)) logging.debug(' Output: {}'.format(output_path)) if overwrite_flag: if export_id in tasks.keys(): logging.debug(' Task already submitted, cancelling') ee.data.cancelTask(tasks[export_id]) del tasks[export_id] if os.path.isfile(export_path): logging.debug( ' Export image already exists, removing') utils.remove_file(export_path) # os.remove(export_path) if os.path.isfile(output_path): logging.debug( ' Output image already exists, removing') utils.remove_file(output_path) # os.remove(output_path) else: if os.path.isfile(export_path): logging.debug(' Export image already exists, moving') shutil.move(export_path, output_path) gdc.raster_path_set_nodata(output_path, nodata_value) # DEADBEEF - should raster stats be computed? # gdc.raster_statistics(output_path) continue elif os.path.isfile(output_path): logging.debug( ' Output image already exists, skipping') continue elif export_id in tasks.keys(): logging.debug(' Task already submitted, skipping') continue # PDSI collection is available in EarthEngine # Index the PDSI image directly pdsi_image = ee.Image('IDAHO_EPSCOR/PDSI/{}'.format( start_dt.strftime('%Y%m%d'))) # pdsi_coll = ee.ImageCollection('IDAHO_EPSCOR/PDSI')\ # .filterDate(start_dt, end_dt) \ # .select(['pdsi']) # pdsi_image = ee.Image(pdsi_coll.mean()) logging.debug(' Building export task') # if ini['EXPORT']['export_dest'] == 'gdrive': task = ee.batch.Export.image.toDrive( image=pdsi_image, description=export_id, # folder=ini['EXPORT']['export_folder'], fileNamePrefix=export_id, dimensions=output_shape, crs=ini['SPATIAL']['crs'], crsTransform=output_transform) # elif ini['EXPORT']['export_dest'] == 'cloud': # task = ee.batch.Export.image.toCloudStorage( # image=pdsi_image, # description=export_id, # bucket=ini['EXPORT']['export_folder'], # fileNamePrefix=export_id, # dimensions=output_shape, # crs=ini['SPATIAL']['crs'], # crsTransform=output_transform) logging.debug(' Starting export task') utils.ee_request(task.start())
def main(ini_path, overwrite_flag=True): """Generate Beamer ETg summary tables Args: ini_path (str): overwrite_flag (bool): if True, overwrite existing figures Default is True (for now) """ logging.info('\nGenerate Beamer ETg summary tables') # # Eventually get from INI (like ini['BEAMER']['landsat_products']) # daily_fields = [ # 'ZONE_NAME', 'ZONE_FID', 'DATE', 'SCENE_ID', 'PLATFORM', 'PATH', 'ROW', # 'YEAR', 'MONTH', 'DAY', 'DOY', 'WATER_YEAR', # 'PIXEL_COUNT', 'ETSTAR_COUNT', # 'NDVI_TOA', 'NDWI_TOA', 'ALBEDO_SUR', 'TS', 'EVI_SUR', 'ETSTAR_MEAN', # 'ETG_MEAN', 'ETG_LPI', 'ETG_UPI', 'ETG_LCI', 'ETG_UCI', # 'ET_MEAN', 'ET_LPI', 'ET_UPI', 'ET_LCI', 'ET_UCI', # 'WY_ETO', 'WY_PPT'] # annual_fields = [ # 'SCENE_COUNT', 'PIXEL_COUNT', 'ETSTAR_COUNT', # 'NDVI_TOA', 'NDWI_TOA', 'ALBEDO_SUR', 'TS', # 'EVI_SUR_MEAN', 'EVI_SUR_MEDIAN', 'EVI_SUR_MIN', 'EVI_SUR_MAX', # 'ETSTAR_MEAN', # 'ETG_MEAN', 'ETG_LPI', 'ETG_UPI', 'ETG_LCI', 'ETG_UCI', # 'ET_MEAN', 'ET_LPI', 'ET_UPI', 'ET_LCI', 'ET_UCI', # 'WY_ETO', 'WY_PPT'] # For unit conversion eto_fields = [ 'ETG_MEAN', 'ETG_LPI', 'ETG_UPI', 'ETG_LCI', 'ETG_UCI', 'ET_MEAN', 'ET_LPI', 'ET_UPI', 'ET_LCI', 'ET_UCI', 'WY_ETO' ] ppt_fields = ['WY_PPT'] # Read config file ini = inputs.read(ini_path) inputs.parse_section(ini, section='INPUTS') inputs.parse_section(ini, section='ZONAL_STATS') inputs.parse_section(ini, section='BEAMER') inputs.parse_section(ini, section='SUMMARY') inputs.parse_section(ini, section='TABLES') # Output paths output_daily_path = os.path.join( ini['SUMMARY']['output_ws'], ini['BEAMER']['output_name'].replace('.csv', '_daily.xlsx')) output_annual_path = os.path.join( ini['SUMMARY']['output_ws'], ini['BEAMER']['output_name'].replace('.csv', '_annual.xlsx')) # Check if files already exist if overwrite_flag: if os.path.isfile(output_daily_path): os.remove(output_daily_path) if os.path.isfile(output_annual_path): os.remove(output_annual_path) else: if (os.path.isfile(output_daily_path) and os.path.isfile(output_annual_path)): logging.info('\nOutput files already exist and ' 'overwrite is False, exiting') return True # Start/end year year_list = list( range(ini['INPUTS']['start_year'], ini['INPUTS']['end_year'] + 1)) month_list = list( utils.wrapped_range(ini['INPUTS']['start_month'], ini['INPUTS']['end_month'], 1, 12)) doy_list = list( utils.wrapped_range(ini['INPUTS']['start_doy'], ini['INPUTS']['end_doy'], 1, 366)) # GRIDMET month range (default to water year) gridmet_start_month = ini['SUMMARY']['gridmet_start_month'] gridmet_end_month = ini['SUMMARY']['gridmet_end_month'] gridmet_months = list( utils.month_range(gridmet_start_month, gridmet_end_month)) logging.info('\nGridmet months: {}'.format(', '.join( map(str, gridmet_months)))) # Read in the zonal stats CSV logging.debug(' Reading zonal stats CSV file') input_df = pd.read_csv( os.path.join(ini['ZONAL_STATS']['output_ws'], ini['BEAMER']['output_name'])) logging.debug(' Filtering Landsat dataframe') input_df = input_df[input_df['PIXEL_COUNT'] > 0] # # This assumes that there are L5/L8 images in the dataframe # if not input_df.empty: # max_pixel_count = max(input_df['PIXEL_COUNT']) # else: # max_pixel_count = 0 if ini['INPUTS']['fid_keep_list']: input_df = input_df[input_df['ZONE_FID'].isin( ini['INPUTS']['fid_keep_list'])] if ini['INPUTS']['fid_skip_list']: input_df = input_df[~input_df['ZONE_FID']. isin(ini['INPUTS']['fid_skip_list'])] zone_name_list = sorted(list(set(input_df['ZONE_NAME'].values))) if year_list: input_df = input_df[input_df['YEAR'].isin(year_list)] if month_list: input_df = input_df[input_df['MONTH'].isin(month_list)] if doy_list: input_df = input_df[input_df['DOY'].isin(doy_list)] if ini['INPUTS']['path_keep_list']: input_df = input_df[input_df['PATH'].isin( ini['INPUTS']['path_keep_list'])] if (ini['INPUTS']['row_keep_list'] and ini['INPUTS']['row_keep_list'] != ['XXX']): input_df = input_df[input_df['ROW'].isin( ini['INPUTS']['row_keep_list'])] # Assume the default is for these to be True and only filter if False if not ini['INPUTS']['landsat4_flag']: input_df = input_df[input_df['PLATFORM'] != 'LT04'] if not ini['INPUTS']['landsat5_flag']: input_df = input_df[input_df['PLATFORM'] != 'LT05'] if not ini['INPUTS']['landsat7_flag']: input_df = input_df[input_df['PLATFORM'] != 'LE07'] if not ini['INPUTS']['landsat8_flag']: input_df = input_df[input_df['PLATFORM'] != 'LC08'] if ini['INPUTS']['scene_id_keep_list']: # Replace XXX with primary ROW value for checking skip list SCENE_ID scene_id_df = pd.Series([ s.replace('XXX', '{:03d}'.format(int(r))) for s, r in zip(input_df['SCENE_ID'], input_df['ROW']) ]) input_df = input_df[scene_id_df.isin( ini['INPUTS']['scene_id_keep_list']).values] # This won't work: SCENE_ID have XXX but scene_id_skip_list don't # input_df = input_df[input_df['SCENE_ID'].isin( # ini['INPUTS']['scene_id_keep_list'])] if ini['INPUTS']['scene_id_skip_list']: # Replace XXX with primary ROW value for checking skip list SCENE_ID scene_id_df = pd.Series([ s.replace('XXX', '{:03d}'.format(int(r))) for s, r in zip(input_df['SCENE_ID'], input_df['ROW']) ]) input_df = input_df[np.logical_not( scene_id_df.isin(ini['INPUTS']['scene_id_skip_list']).values)] # This won't work: SCENE_ID have XXX but scene_id_skip_list don't # input_df = input_df[~input_df['SCENE_ID'].isin( # ini['INPUTS']['scene_id_skip_list'])] # Filter by QA/QC value if ini['SUMMARY']['max_qa'] >= 0 and not input_df.empty: logging.debug(' Maximum QA: {0}'.format(ini['SUMMARY']['max_qa'])) input_df = input_df[input_df['QA'] <= ini['SUMMARY']['max_qa']] # First filter by average cloud score if ini['SUMMARY']['max_cloud_score'] < 100 and not input_df.empty: logging.debug(' Maximum cloud score: {0}'.format( ini['SUMMARY']['max_cloud_score'])) input_df = input_df[ input_df['CLOUD_SCORE'] <= ini['SUMMARY']['max_cloud_score']] # Filter by Fmask percentage if ini['SUMMARY']['max_fmask_pct'] < 100 and not input_df.empty: input_df['FMASK_PCT'] = 100 * (input_df['FMASK_COUNT'] / input_df['FMASK_TOTAL']) logging.debug(' Max Fmask threshold: {}'.format( ini['SUMMARY']['max_fmask_pct'])) input_df = input_df[ input_df['FMASK_PCT'] <= ini['SUMMARY']['max_fmask_pct']] # Filter low count SLC-off images if ini['SUMMARY']['min_slc_off_pct'] > 0 and not input_df.empty: logging.debug(' Mininum SLC-off threshold: {}%'.format( ini['SUMMARY']['min_slc_off_pct'])) # logging.debug(' Maximum pixel count: {}'.format( # max_pixel_count)) slc_off_mask = ((input_df['PLATFORM'] == 'LE07') & ((input_df['YEAR'] >= 2004) | ((input_df['YEAR'] == 2003) & (input_df['DOY'] > 151)))) slc_off_pct = 100 * (input_df['PIXEL_COUNT'] / input_df['PIXEL_TOTAL']) # slc_off_pct = 100 * (input_df['PIXEL_COUNT'] / max_pixel_count) input_df = input_df[((slc_off_pct >= ini['SUMMARY']['min_slc_off_pct']) & slc_off_mask) | (~slc_off_mask)] if input_df.empty: logging.error(' Empty dataframe after filtering, exiting') return False if not os.path.isfile(output_daily_path): logging.info('\nWriting daily values to Excel') excel_f = ExcelWriter(output_daily_path) for zone_name in sorted(zone_name_list): logging.info(' {}'.format(zone_name)) zone_df = input_df[input_df['ZONE_NAME'] == zone_name] zone_df.to_excel(excel_f, zone_name, index=False, float_format='%.4f') # zone_df.to_excel(excel_f, zone_name, index=False) del zone_df excel_f.save() if not os.path.isfile(output_annual_path): logging.info('\nComputing annual summaries') annual_df = input_df \ .groupby(['ZONE_NAME', 'YEAR']) \ .agg({ 'PIXEL_COUNT': ['count', 'mean'], 'PIXEL_TOTAL': ['mean'], 'FMASK_COUNT': 'mean', 'FMASK_TOTAL': 'mean', 'CLOUD_SCORE': 'mean', 'ETSTAR_COUNT': 'mean', 'NDVI_TOA': 'mean', 'NDWI_TOA': 'mean', 'ALBEDO_SUR': 'mean', 'TS': 'mean', # 'EVI_SUR': 'mean', 'EVI_SUR': ['mean', 'median', 'min', 'max'], 'ETSTAR_MEAN': 'mean', 'ETG_MEAN': 'mean', 'ETG_LPI': 'mean', 'ETG_UPI': 'mean', 'ETG_LCI': 'mean', 'ETG_UCI': 'mean', 'ET_MEAN': 'mean', 'ET_LPI': 'mean', 'ET_UPI': 'mean', 'ET_LCI': 'mean', 'ET_UCI': 'mean', 'WY_ETO': 'mean', 'WY_PPT': 'mean' }) annual_df.columns = annual_df.columns.map('_'.join) annual_df = annual_df.rename(columns={ 'PIXEL_COUNT_count': 'SCENE_COUNT', 'PIXEL_COUNT_mean': 'PIXEL_COUNT' }) annual_df = annual_df.rename( columns={ 'EVI_SUR_mean': 'EVI_SUR_MEAN', 'EVI_SUR_median': 'EVI_SUR_MEDIAN', 'EVI_SUR_min': 'EVI_SUR_MIN', 'EVI_SUR_max': 'EVI_SUR_MAX' }) annual_df.rename(columns=lambda x: str(x).replace('_mean', ''), inplace=True) annual_df['SCENE_COUNT'] = annual_df['SCENE_COUNT'].astype(np.int) annual_df['PIXEL_COUNT'] = annual_df['PIXEL_COUNT'].astype(np.int) annual_df['PIXEL_TOTAL'] = annual_df['PIXEL_TOTAL'].astype(np.int) annual_df['FMASK_COUNT'] = annual_df['FMASK_COUNT'].astype(np.int) annual_df['FMASK_TOTAL'] = annual_df['FMASK_TOTAL'].astype(np.int) annual_df['ETSTAR_COUNT'] = annual_df['ETSTAR_COUNT'].astype(np.int) annual_df = annual_df.reset_index() # Convert ETo units if (ini['BEAMER']['eto_units'] == 'mm' and ini['TABLES']['eto_units'] == 'mm'): pass elif (ini['BEAMER']['eto_units'] == 'mm' and ini['TABLES']['eto_units'] == 'in'): annual_df[eto_fields] /= (25.4) elif (ini['BEAMER']['eto_units'] == 'mm' and ini['TABLES']['eto_units'] == 'ft'): annual_df[eto_fields] /= (12 * 25.4) else: logging.error( ('\nERROR: Input units {} and output units {} are not ' + 'currently supported, exiting').format( ini['BEAMER']['eto_units'], ini['TABLES']['eto_units'])) sys.exit() # Convert PPT units if (ini['BEAMER']['ppt_units'] == 'mm' and ini['TABLES']['ppt_units'] == 'mm'): pass elif (ini['BEAMER']['ppt_units'] == 'mm' and ini['TABLES']['ppt_units'] == 'in'): annual_df[ppt_fields] /= (25.4) elif (ini['BEAMER']['ppt_units'] == 'mm' and ini['TABLES']['ppt_units'] == 'ft'): annual_df[ppt_fields] /= (12 * 25.4) else: logging.error( ('\nERROR: Input units {} and output units {} are not ' + 'currently supported, exiting').format( ini['BEAMER']['ppt_units'], ini['TABLES']['ppt_units'])) sys.exit() logging.info('\nWriting annual values to Excel') excel_f = ExcelWriter(output_annual_path) for zone_name in sorted(zone_name_list): logging.info(' {}'.format(zone_name)) zone_df = annual_df[annual_df['ZONE_NAME'] == zone_name] zone_df.to_excel(excel_f, zone_name, index=False, float_format='%.4f') del zone_df excel_f.save()
def main(ini_path, show_flag=False, overwrite_flag=True): """Generate Bokeh figures Bokeh issues: Adjust y range based on non-muted data https://stackoverflow.com/questions/43620837/how-to-get-bokeh-to-dynamically-adjust-y-range-when-panning Linked interactive legends so that there is only one legend for the gridplot Maybe hide or mute QA values above max (instead of filtering them in advance) Args: ini_path (str): show_flag (bool): if True, show the figures in the browser. Default is False. overwrite_flag (bool): if True, overwrite existing tables. Default is True (for now) """ logging.info('\nGenerate interactive timeseries figures') # Eventually read from INI plot_var_list = ['NDVI_TOA', 'ALBEDO_SUR', 'TS', 'NDWI_TOA', 'EVI_SUR'] # plot_var_list = [ # 'NDVI_TOA', 'ALBEDO_SUR', 'TS', 'NDWI_TOA', # 'CLOUD_SCORE', 'FMASK_PCT'] output_folder = 'figures' # Read config file ini = inputs.read(ini_path) inputs.parse_section(ini, section='INPUTS') inputs.parse_section(ini, section='ZONAL_STATS') inputs.parse_section(ini, section='SUMMARY') inputs.parse_section(ini, section='FIGURES') inputs.parse_section(ini, section='BEAMER') # Hardcode GRIDMET month range to the water year ini['SUMMARY']['gridmet_start_month'] = 10 ini['SUMMARY']['gridmet_end_month'] = 9 # Start/end year year_list = list(range( ini['INPUTS']['start_year'], ini['INPUTS']['end_year'] + 1)) month_list = list(utils.wrapped_range( ini['INPUTS']['start_month'], ini['INPUTS']['end_month'], 1, 12)) doy_list = list(utils.wrapped_range( ini['INPUTS']['start_doy'], ini['INPUTS']['end_doy'], 1, 366)) # GRIDMET month range (default to water year) gridmet_start_month = ini['SUMMARY']['gridmet_start_month'] gridmet_end_month = ini['SUMMARY']['gridmet_end_month'] gridmet_months = list(utils.month_range( gridmet_start_month, gridmet_end_month)) logging.info('\nGridmet months: {}'.format( ', '.join(map(str, gridmet_months)))) # Get ee features from shapefile zone_geom_list = gdc.shapefile_2_geom_list_func( ini['INPUTS']['zone_shp_path'], zone_field=ini['INPUTS']['zone_field'], reverse_flag=False) # Filter features by FID before merging geometries if ini['INPUTS']['fid_keep_list']: zone_geom_list = [ zone_obj for zone_obj in zone_geom_list if zone_obj[0] in ini['INPUTS']['fid_keep_list']] if ini['INPUTS']['fid_skip_list']: zone_geom_list = [ zone_obj for zone_obj in zone_geom_list if zone_obj[0] not in ini['INPUTS']['fid_skip_list']] # # Filter features by FID before merging geometries # if ini['INPUTS']['fid_keep_list']: # landsat_df = landsat_df[landsat_df['ZONE_FID'].isin( # ini['INPUTS']['fid_keep_list'])] # if ini['INPUTS']['fid_skip_list']: # landsat_df = landsat_df[~landsat_df['ZONE_FID'].isin( # ini['INPUTS']['fid_skip_list'])] logging.info('\nProcessing zones') for zone_fid, zone_name, zone_json in zone_geom_list: zone_name = zone_name.replace(' ', '_') logging.info('ZONE: {} (FID: {})'.format(zone_name, zone_fid)) zone_stats_ws = os.path.join( ini['ZONAL_STATS']['output_ws'], zone_name) zone_figures_ws = os.path.join( ini['SUMMARY']['output_ws'], zone_name, output_folder) if not os.path.isdir(zone_stats_ws): logging.debug(' Folder {} does not exist, skipping'.format( zone_stats_ws)) continue elif not os.path.isdir(zone_figures_ws): os.makedirs(zone_figures_ws) # Input paths landsat_daily_path = os.path.join( zone_stats_ws, '{}_landsat_daily.csv'.format(zone_name)) gridmet_daily_path = os.path.join( zone_stats_ws, '{}_gridmet_daily.csv'.format(zone_name)) gridmet_monthly_path = os.path.join( zone_stats_ws, '{}_gridmet_monthly.csv'.format(zone_name)) if not os.path.isfile(landsat_daily_path): logging.error(' Landsat daily CSV does not exist, skipping zone') continue elif (not os.path.isfile(gridmet_daily_path) and not os.path.isfile(gridmet_monthly_path)): logging.error( ' GRIDMET daily or monthly CSV does not exist, skipping zone') continue # DEADBEEF - Eventually support generating only Landsat figures # logging.error( # ' GRIDMET daily and/or monthly CSV files do not exist.\n' # ' ETo and PPT will not be processed.') # Output paths output_doy_path = os.path.join( zone_figures_ws, '{}_timeseries_doy.html'.format(zone_name)) output_date_path = os.path.join( zone_figures_ws, '{}_timeseries_date.html'.format(zone_name)) logging.debug(' Reading Landsat CSV') landsat_df = pd.read_csv(landsat_daily_path) logging.debug(' Filtering Landsat dataframe') landsat_df = landsat_df[landsat_df['PIXEL_COUNT'] > 0] # QA field should have been written in zonal stats code # Eventually this block can be removed if 'QA' not in landsat_df.columns.values: landsat_df['QA'] = 0 # # This assumes that there are L5/L8 images in the dataframe # if not landsat_df.empty: # max_pixel_count = max(landsat_df['PIXEL_COUNT']) # else: # max_pixel_count = 0 if year_list: landsat_df = landsat_df[landsat_df['YEAR'].isin(year_list)] if month_list: landsat_df = landsat_df[landsat_df['MONTH'].isin(month_list)] if doy_list: landsat_df = landsat_df[landsat_df['DOY'].isin(doy_list)] # Assume the default is for these to be True and only filter if False if not ini['INPUTS']['landsat4_flag']: landsat_df = landsat_df[landsat_df['PLATFORM'] != 'LT04'] if not ini['INPUTS']['landsat5_flag']: landsat_df = landsat_df[landsat_df['PLATFORM'] != 'LT05'] if not ini['INPUTS']['landsat7_flag']: landsat_df = landsat_df[landsat_df['PLATFORM'] != 'LE07'] if not ini['INPUTS']['landsat8_flag']: landsat_df = landsat_df[landsat_df['PLATFORM'] != 'LC08'] if ini['INPUTS']['path_keep_list']: landsat_df = landsat_df[ landsat_df['PATH'].isin(ini['INPUTS']['path_keep_list'])] if (ini['INPUTS']['row_keep_list'] and ini['INPUTS']['row_keep_list'] != ['XXX']): landsat_df = landsat_df[ landsat_df['ROW'].isin(ini['INPUTS']['row_keep_list'])] if ini['INPUTS']['scene_id_keep_list']: # Replace XXX with primary ROW value for checking skip list SCENE_ID scene_id_df = pd.Series([ s.replace('XXX', '{:03d}'.format(int(r))) for s, r in zip(landsat_df['SCENE_ID'], landsat_df['ROW'])]) landsat_df = landsat_df[scene_id_df.isin( ini['INPUTS']['scene_id_keep_list']).values] # This won't work: SCENE_ID have XXX but scene_id_skip_list don't # landsat_df = landsat_df[landsat_df['SCENE_ID'].isin( # ini['INPUTS']['scene_id_keep_list'])] if ini['INPUTS']['scene_id_skip_list']: # Replace XXX with primary ROW value for checking skip list SCENE_ID scene_id_df = pd.Series([ s.replace('XXX', '{:03d}'.format(int(r))) for s, r in zip(landsat_df['SCENE_ID'], landsat_df['ROW'])]) landsat_df = landsat_df[np.logical_not(scene_id_df.isin( ini['INPUTS']['scene_id_skip_list']).values)] # This won't work: SCENE_ID have XXX but scene_id_skip_list don't # landsat_df = landsat_df[np.logical_not(landsat_df['SCENE_ID'].isin( # ini['INPUTS']['scene_id_skip_list']))] # Filter by QA/QC value if ini['SUMMARY']['max_qa'] >= 0 and not landsat_df.empty: logging.debug(' Maximum QA: {0}'.format( ini['SUMMARY']['max_qa'])) landsat_df = landsat_df[ landsat_df['QA'] <= ini['SUMMARY']['max_qa']] # Filter by average cloud score if ini['SUMMARY']['max_cloud_score'] < 100 and not landsat_df.empty: logging.debug(' Maximum cloud score: {0}'.format( ini['SUMMARY']['max_cloud_score'])) landsat_df = landsat_df[ landsat_df['CLOUD_SCORE'] <= ini['SUMMARY']['max_cloud_score']] # Filter by Fmask percentage if ini['SUMMARY']['max_fmask_pct'] < 100 and not landsat_df.empty: landsat_df['FMASK_PCT'] = 100 * ( landsat_df['FMASK_COUNT'] / landsat_df['FMASK_TOTAL']) logging.debug(' Max Fmask threshold: {}'.format( ini['SUMMARY']['max_fmask_pct'])) landsat_df = landsat_df[ landsat_df['FMASK_PCT'] <= ini['SUMMARY']['max_fmask_pct']] # Filter low count SLC-off images if ini['SUMMARY']['min_slc_off_pct'] > 0 and not landsat_df.empty: logging.debug(' Mininum SLC-off threshold: {}%'.format( ini['SUMMARY']['min_slc_off_pct'])) # logging.debug(' Maximum pixel count: {}'.format( # max_pixel_count)) slc_off_mask = ( (landsat_df['PLATFORM'] == 'LE07') & ((landsat_df['YEAR'] >= 2004) | ((landsat_df['YEAR'] == 2003) & (landsat_df['DOY'] > 151)))) slc_off_pct = 100 * (landsat_df['PIXEL_COUNT'] / landsat_df['PIXEL_TOTAL']) # slc_off_pct = 100 * (landsat_df['PIXEL_COUNT'] / max_pixel_count) landsat_df = landsat_df[ ((slc_off_pct >= ini['SUMMARY']['min_slc_off_pct']) & slc_off_mask) | (~slc_off_mask)] if landsat_df.empty: logging.error( ' Empty Landsat dataframe after filtering, skipping zone') continue # Aggregate GRIDMET (to water year) if os.path.isfile(gridmet_monthly_path): logging.debug(' Reading montly GRIDMET CSV') gridmet_df = pd.read_csv(gridmet_monthly_path) elif os.path.isfile(gridmet_daily_path): logging.debug(' Reading daily GRIDMET CSV') gridmet_df = pd.read_csv(gridmet_daily_path) logging.debug(' Computing GRIDMET summaries') # Summarize GRIDMET for target months year if (gridmet_start_month in [10, 11, 12] and gridmet_end_month in [10, 11, 12]): month_mask = ( (gridmet_df['MONTH'] >= gridmet_start_month) & (gridmet_df['MONTH'] <= gridmet_end_month)) gridmet_df.loc[month_mask, 'GROUP_YEAR'] = gridmet_df['YEAR'] + 1 elif (gridmet_start_month in [10, 11, 12] and gridmet_end_month not in [10, 11, 12]): month_mask = gridmet_df['MONTH'] >= gridmet_start_month gridmet_df.loc[month_mask, 'GROUP_YEAR'] = gridmet_df['YEAR'] + 1 month_mask = gridmet_df['MONTH'] <= gridmet_end_month gridmet_df.loc[month_mask, 'GROUP_YEAR'] = gridmet_df['YEAR'] else: month_mask = ( (gridmet_df['MONTH'] >= gridmet_start_month) & (gridmet_df['MONTH'] <= gridmet_end_month)) gridmet_df.loc[month_mask, 'GROUP_YEAR'] = gridmet_df['YEAR'] # GROUP_YEAR for rows not in the GRIDMET month range will be NAN gridmet_df = gridmet_df[~pd.isnull(gridmet_df['GROUP_YEAR'])] if year_list: gridmet_df = gridmet_df[gridmet_df['GROUP_YEAR'].isin(year_list)] if gridmet_df.empty: logging.error( ' Empty GRIDMET dataframe after filtering by year') continue # Group GRIDMET data by user specified range (default is water year) gridmet_group_df = gridmet_df \ .groupby(['ZONE_NAME', 'ZONE_FID', 'GROUP_YEAR']) \ .agg({'ETO': np.sum, 'PPT': np.sum}) \ .reset_index() \ .sort_values(by='GROUP_YEAR') # .rename(columns={'ETO': 'ETO', 'PPT': 'ETO'}) \ # Rename wasn't working when chained... gridmet_group_df.rename(columns={'GROUP_YEAR': 'YEAR'}, inplace=True) gridmet_group_df['YEAR'] = gridmet_group_df['YEAR'].astype(int) # # Group GRIDMET data by month # gridmet_month_df = gridmet_df\ # .groupby(['ZONE_NAME', 'ZONE_FID', 'GROUP_YEAR', 'MONTH']) \ # .agg({'ETO': np.sum, 'PPT': np.sum}) \ # .reset_index() \ # .sort_values(by=['GROUP_YEAR', 'MONTH']) # gridmet_month_df.rename(columns={'GROUP_YEAR': 'YEAR'}, inplace=True) # # Rename monthly PPT columns # gridmet_month_df['MONTH'] = 'PPT_M' + gridmet_month_df['MONTH'].astype(str) # # Pivot rows up to separate columns # gridmet_month_df = gridmet_month_df.pivot_table( # 'PPT', ['ZONE_NAME', 'YEAR'], 'MONTH') # gridmet_month_df.reset_index(inplace=True) # columns = ['ZONE_NAME', 'YEAR'] + ['PPT_M{}'.format(m) for m in gridmet_months] # gridmet_month_df = gridmet_month_df[columns] # del gridmet_month_df.index.name # Merge Landsat and GRIDMET collections zone_df = landsat_df.merge( gridmet_group_df, on=['ZONE_NAME', 'ZONE_FID', 'YEAR']) if zone_df is None or zone_df.empty: logging.info(' Empty zone dataframe, not generating figures') continue # Compute ETg zone_df['ETG_MEAN'] = zone_df['ETSTAR_MEAN'] * ( zone_df['ETO'] - zone_df['PPT']) zone_df['ETG_LPI'] = zone_df['ETSTAR_LPI'] * ( zone_df['ETO'] - zone_df['PPT']) zone_df['ETG_UPI'] = zone_df['ETSTAR_UPI'] * ( zone_df['ETO'] - zone_df['PPT']) zone_df['ETG_LCI'] = zone_df['ETSTAR_LCI'] * ( zone_df['ETO'] - zone_df['PPT']) zone_df['ETG_UCI'] = zone_df['ETSTAR_UCI'] * ( zone_df['ETO'] - zone_df['PPT']) # Compute ET zone_df['ET_MEAN'] = zone_df['ETG_MEAN'] + zone_df['PPT'] zone_df['ET_LPI'] = zone_df['ETG_LPI'] + zone_df['PPT'] zone_df['ET_UPI'] = zone_df['ETG_UPI'] + zone_df['PPT'] zone_df['ET_LCI'] = zone_df['ETG_LCI'] + zone_df['PPT'] zone_df['ET_UCI'] = zone_df['ETG_UCI'] + zone_df['PPT'] # ORIGINAL PLOTTING CODE # Check that plot variables are present for plot_var in plot_var_list: if plot_var not in landsat_df.columns.values: logging.error( ' The plotting variable {} does not exist in the ' 'dataframe'.format(plot_var)) sys.exit() # if ini['INPUTS']['scene_id_keep_list']: # # Replace XXX with primary ROW value for checking skip list SCENE_ID # scene_id_df = pd.Series([ # s.replace('XXX', '{:03d}'.format(int(r))) # for s, r in zip(landsat_df['SCENE_ID'], landsat_df['ROW'])]) # landsat_df = landsat_df[scene_id_df.isin( # ini['INPUTS']['scene_id_keep_list']).values] # # This won't work: SCENE_ID have XXX but scene_id_skip_list don't # # landsat_df = landsat_df[landsat_df['SCENE_ID'].isin( # # ini['INPUTS']['scene_id_keep_list'])] # if ini['INPUTS']['scene_id_skip_list']: # # Replace XXX with primary ROW value for checking skip list SCENE_ID # scene_id_df = pd.Series([ # s.replace('XXX', '{:03d}'.format(int(r))) # for s, r in zip(landsat_df['SCENE_ID'], landsat_df['ROW'])]) # landsat_df = landsat_df[np.logical_not(scene_id_df.isin( # ini['INPUTS']['scene_id_skip_list']).values)] # # This won't work: SCENE_ID have XXX but scene_id_skip_list don't # # landsat_df = landsat_df[np.logical_not(landsat_df['SCENE_ID'].isin( # # ini['INPUTS']['scene_id_skip_list']))] # Compute colors for each QA value logging.debug(' Building column data source') qa_values = sorted(list(set(zone_df['QA'].values))) colors = { qa: "#%02x%02x%02x" % (int(r), int(g), int(b)) for qa, (r, g, b, _) in zip( qa_values, 255 * cm.viridis(mpl.colors.Normalize()(qa_values))) } logging.debug(' QA values: {}'.format( ', '.join(map(str, qa_values)))) # Unpack the data by QA type to support interactive legends sources = dict() # sources = defaultdict(dict) # platform_list = ['LT04', 'LT05', 'LE07', 'LC08'] for qa_value in qa_values: # for platform in platform_list: # qa_df = zone_df[ # (zone_df['PLATFORM'] == platform) & # (zone_df['QA'] == qa_value)] qa_df = zone_df[zone_df['QA'] == qa_value] qa_data = { 'INDEX': list(range(len(qa_df.index))), 'PLATFORM': qa_df['PLATFORM'], 'DATE': pd.to_datetime(qa_df['DATE']), 'TIME': pd.to_datetime(qa_df['DATE']).map( lambda x: x.strftime('%Y-%m-%d')), 'DOY': qa_df['DOY'].values, 'QA': qa_df['QA'].values, 'COLOR': [colors[qa] for qa in qa_df['QA'].values] } for plot_var in plot_var_list: if plot_var in qa_df.columns.values: qa_data.update({plot_var: qa_df[plot_var].values}) sources[qa_value] = bokeh.models.ColumnDataSource(qa_data) # sources[qa_value][platform] = bokeh.models.ColumnDataSource( # qa_data) tooltips = [ ("LANDSAT", "@PLATFORM"), ("DATE", "@TIME"), ("DOY", "@DOY")] # hover_tool = bokeh.models.HoverTool(tooltips=tooltips) # tools="xwheel_zoom,xpan,xbox_zoom,reset,box_select" # tools = [ # hover_tool, # bokeh.models.WheelZoomTool(dimensions='width'), # bokeh.models.PanTool(dimensions='width'), # bokeh.models.BoxZoomTool(dimensions='width'), # bokeh.models.ResetTool(), # bokeh.models.BoxSelectTool()] # Selection hover_circle = Circle( fill_color='#ff0000', line_color='#ff0000') selected_circle = Circle( fill_color='COLOR', line_color='COLOR') nonselected_circle = Circle( fill_color='#aaaaaa', line_color='#aaaaaa') # Plot the data by DOY logging.debug(' Building DOY timeseries figure') if os.path.isfile(output_doy_path): os.remove(output_doy_path) output_file(output_doy_path, title=zone_name) figure_args = dict( plot_width=750, plot_height=250, title=None, tools="xwheel_zoom,xpan,xbox_zoom,reset,box_select", # tools="xwheel_zoom,xpan,xbox_zoom,reset,tap", active_scroll="xwheel_zoom") plot_args = dict( size=4, alpha=0.9, color='COLOR') if ini['SUMMARY']['max_qa'] > 0: plot_args['legend'] = 'QA' figures = [] for plot_i, plot_var in enumerate(plot_var_list): if plot_i == 0: f = figure( # x_range=bokeh.models.Range1d(1, 366, bounds=(1, 366)), y_axis_label=plot_var, **figure_args) else: f = figure( x_range=f.x_range, y_axis_label=plot_var, **figure_args) # # Add each QA level as a separate object # for qa, platform_sources in sorted(sources.items()): # for platform, source in platform_sources.items(): # if platform == 'LT05': # r = f.triangle( # 'DOY', plot_var, source=source, **plot_args) # elif platform == 'LE07': # r = f.square( # 'DOY', plot_var, source=source, **plot_args) # elif platform == 'LC08': # r = f.circle( # 'DOY', plot_var, source=source, **plot_args) # else: # r = f.diamond( # 'DOY', plot_var, source=source, **plot_args) # r.hover_glyph = hover_circle # r.selection_glyph = selected_circle # r.nonselection_glyph = nonselected_circle # r.muted_glyph = nonselected_circle # hover_tool.renderers.append(r) # Add each QA level as a separate object for qa, source in sorted(sources.items()): r = f.circle('DOY', plot_var, source=source, **plot_args) r.hover_glyph = hover_circle r.selection_glyph = selected_circle r.nonselection_glyph = nonselected_circle r.muted_glyph = nonselected_circle # # DEADBEEF - This will display high QA points as muted # if qa > ini['SUMMARY']['max_qa']: # r.muted = True # # r.visible = False f.add_tools(bokeh.models.HoverTool(tooltips=tooltips)) # if ini['SUMMARY']['max_qa'] > 0: f.legend.location = "top_left" f.legend.click_policy = "hide" # f.legend.click_policy = "mute" f.legend.orientation = "horizontal" figures.append(f) del f # Try to not allow more than 4 plots in a column p = gridplot( figures, ncols=len(plot_var_list) // 3, sizing_mode='stretch_both') if show_flag: show(p) save(p) # Plot the data by DATE logging.debug(' Building date timeseries figure') if os.path.isfile(output_date_path): os.remove(output_date_path) output_file(output_date_path, title=zone_name) figure_args = dict( plot_width=750, plot_height=250, title=None, tools="xwheel_zoom,xpan,xbox_zoom,reset,box_select", # tools="xwheel_zoom,xpan,xbox_zoom,reset,tap", active_scroll="xwheel_zoom", x_axis_type="datetime",) plot_args = dict( size=4, alpha=0.9, color='COLOR') if ini['SUMMARY']['max_qa'] > 0: plot_args['legend'] = 'QA' figures = [] for plot_i, plot_var in enumerate(plot_var_list): if plot_i == 0: f = figure( # x_range=bokeh.models.Range1d(x_limit[0], x_limit[1], bounds=x_limit), y_axis_label=plot_var, **figure_args) else: f = figure( x_range=f.x_range, y_axis_label=plot_var, **figure_args) if plot_var == 'TS': f.y_range.bounds = (270, None) # # Add each QA level as a separate object # for qa, platform_sources in sorted(sources.items()): # for platform, source in sorted(platform_sources.items()): # if platform == 'LT05': # r = f.triangle( # 'DATE', plot_var, source=source, **plot_args) # elif platform == 'LE07': # r = f.square( # 'DATE', plot_var, source=source, **plot_args) # elif platform == 'LC08': # r = f.circle( # 'DATE', plot_var, source=source, **plot_args) # else: # r = f.diamond( # 'DATE', plot_var, source=source, **plot_args) # r.hover_glyph = hover_circle # r.selection_glyph = selected_circle # r.nonselection_glyph = nonselected_circle # r.muted_glyph = nonselected_circle # hover_tool.renderers.append(r) # Add each QA level as a separate object for qa, source in sorted(sources.items()): r = f.circle('DATE', plot_var, source=source, **plot_args) r.hover_glyph = hover_circle r.selection_glyph = selected_circle r.nonselection_glyph = nonselected_circle r.muted_glyph = nonselected_circle # # DEADBEEF - This will display high QA points as muted # if qa > ini['SUMMARY']['max_qa']: # r.muted = True # # r.visible = False f.add_tools(bokeh.models.HoverTool(tooltips=tooltips)) # if ini['SUMMARY']['max_qa'] > 0: f.legend.location = "top_left" f.legend.click_policy = "hide" # f.legend.click_policy = "mute" f.legend.orientation = "horizontal" figures.append(f) del f # Try to not allow more than 4 plots in a column p = gridplot( figures, ncols=len(plot_var_list) // 3, sizing_mode='stretch_both') if show_flag: show(p) save(p) # Pause after each iteration if show is True if show_flag: input('Press ENTER to continue')
def main(ini_path=None, overwrite_flag=False): """Generate summary thumbnails Parameters ---------- ini_path : str overwrite_flag : bool, optional If True, overwrite existing files (the default is False). """ logging.info('\nGenerate summary thumbnails') # Inputs (eventually move to INI file?) vis_args = { 'bands': ['red', 'green', 'blue'], # 'bands': ['swir1', 'nir', 'red'], 'min': [0.01, 0.01, 0.01], 'max': [0.4, 0.4, 0.4], 'gamma': [1.8, 1.8, 1.8] } # Buffer zone polygon zone_buffer = 240 # Generate images by DOY doy_flag = True # Generate images by date date_flag = True # Read config file ini = inputs.read(ini_path) inputs.parse_section(ini, section='INPUTS') inputs.parse_section(ini, section='SPATIAL') inputs.parse_section(ini, section='SUMMARY') year_list = range(ini['INPUTS']['start_year'], ini['INPUTS']['end_year'] + 1) month_list = list( utils.wrapped_range(ini['INPUTS']['start_month'], ini['INPUTS']['end_month'], 1, 12)) doy_list = list( utils.wrapped_range(ini['INPUTS']['start_doy'], ini['INPUTS']['end_doy'], 1, 366)) # Add merged row XXX to keep list ini['INPUTS']['row_keep_list'].append('XXX') # Get ee features from shapefile zone_geom_list = gdc.shapefile_2_geom_list_func( ini['INPUTS']['zone_shp_path'], zone_field=ini['INPUTS']['zone_field'], reverse_flag=False) # Filter features by FID before merging geometries if ini['INPUTS']['fid_keep_list']: zone_geom_list = [ zone_obj for zone_obj in zone_geom_list if zone_obj[0] in ini['INPUTS']['fid_keep_list'] ] if ini['INPUTS']['fid_skip_list']: zone_geom_list = [ zone_obj for zone_obj in zone_geom_list if zone_obj[0] not in ini['INPUTS']['fid_skip_list'] ] # Need zone_shp_path projection to build EE geometries zone = {} zone['osr'] = gdc.feature_path_osr(ini['INPUTS']['zone_shp_path']) zone['proj'] = gdc.osr_wkt(zone['osr']) # zone['proj'] = ee.Projection(zone['proj']).wkt().getInfo() # zone['proj'] = zone['proj'].replace('\n', '').replace(' ', '') # logging.debug(' Zone Projection: {}'.format(zone['proj'])) # Initialize Earth Engine API key logging.debug('') ee.Initialize() coll_dict = { 'LT04': 'LANDSAT/LT04/C01/T1_SR', 'LT05': 'LANDSAT/LT05/C01/T1_SR', 'LE07': 'LANDSAT/LE07/C01/T1_SR', 'LC08': 'LANDSAT/LC08/C01/T1_SR' } logging.info('\nProcessing zones') for zone_fid, zone_name, zone_json in zone_geom_list: zone['fid'] = zone_fid zone['name'] = zone_name.replace(' ', '_') zone['json'] = zone_json logging.info('ZONE: {} (FID: {})'.format(zone['name'], zone['fid'])) # Build EE geometry object for zonal stats zone['geom'] = ee.Geometry(geo_json=zone['json'], opt_proj=zone['proj'], opt_geodesic=False) # logging.debug(' Centroid: {}'.format( # zone['geom'].centroid(100).getInfo()['coordinates'])) # Use feature geometry to build extent, transform, and shape zone['extent'] = gdc.Extent( ogr.CreateGeometryFromJson(json.dumps(zone['json'])).GetEnvelope()) # zone['extent'] = gdc.Extent(zone['geom'].GetEnvelope()) zone['extent'] = zone['extent'].ogrenv_swap() zone['extent'] = zone['extent'].buffer(zone_buffer) zone['extent'] = zone['extent'].adjust_to_snap( 'EXPAND', ini['SPATIAL']['snap_x'], ini['SPATIAL']['snap_y'], ini['SPATIAL']['cellsize']) zone['geo'] = zone['extent'].geo(ini['SPATIAL']['cellsize']) zone['transform'] = gdc.geo_2_ee_transform(zone['geo']) # zone['transform'] = '[' + ','.join(map(str, zone['transform'])) + ']' zone['shape'] = zone['extent'].shape(ini['SPATIAL']['cellsize']) logging.debug(' Zone Shape: {}'.format(zone['shape'])) logging.debug(' Zone Transform: {}'.format(zone['transform'])) logging.debug(' Zone Extent: {}'.format(zone['extent'])) # logging.debug(' Zone Geom: {}'.format(zone['geom'].getInfo())) # Build an EE geometry of the extent extent_geom = ee.Geometry.Rectangle(coords=list(zone['extent']), proj=zone['proj'], geodesic=False) if 'SUMMARY' in ini.keys(): zone_output_ws = os.path.join(ini['SUMMARY']['output_ws'], zone['name']) elif 'EXPORT' in ini.keys(): zone_output_ws = os.path.join(ini['EXPORT']['output_ws'], zone['name']) else: logging.error( 'INI file does not contain a SUMMARY or EXPORT section') sys.exit() if not os.path.isdir(zone_output_ws): logging.debug( 'Folder {} does not exist, skipping'.format(zone_output_ws)) continue landsat_daily_path = os.path.join( zone_output_ws, '{}_landsat_daily.csv'.format(zone['name'])) if not os.path.isfile(landsat_daily_path): logging.error(' Landsat daily CSV does not exist, skipping zone') continue output_doy_ws = os.path.join(zone_output_ws, 'thumbnails_doy') output_date_ws = os.path.join(zone_output_ws, 'thumbnails_date') if overwrite_flag and os.path.isdir(output_doy_ws): for file_name in os.listdir(output_doy_ws): os.remove(os.path.join(output_doy_ws, file_name)) if overwrite_flag and os.path.isdir(output_date_ws): for file_name in os.listdir(output_date_ws): os.remove(os.path.join(output_date_ws, file_name)) if doy_flag and not os.path.isdir(output_doy_ws): os.makedirs(output_doy_ws) if date_flag and not os.path.isdir(output_date_ws): os.makedirs(output_date_ws) logging.debug(' Reading Landsat CSV') landsat_df = pd.read_csv(landsat_daily_path) # landsat_df = pd.read_csv( # landsat_daily_path, parse_dates=['DATE'], index_col='DATE') landsat_df = landsat_df[landsat_df['PIXEL_COUNT'] > 0] # Common summary filtering logging.debug(' Filtering using INI SUMMARY parameters') if year_list: landsat_df = landsat_df[landsat_df['YEAR'].isin(year_list)] if month_list: landsat_df = landsat_df[landsat_df['MONTH'].isin(month_list)] if doy_list: landsat_df = landsat_df[landsat_df['DOY'].isin(doy_list)] if ini['INPUTS']['path_keep_list']: landsat_df = landsat_df[landsat_df['PATH'].isin( ini['INPUTS']['path_keep_list'])] if (ini['INPUTS']['row_keep_list'] and ini['INPUTS']['row_keep_list'] != ['XXX']): landsat_df = landsat_df[landsat_df['ROW'].isin( ini['INPUTS']['row_keep_list'])] # Assume the default is for these to be True and only filter if False if not ini['INPUTS']['landsat4_flag']: landsat_df = landsat_df[landsat_df['PLATFORM'] != 'LT04'] if not ini['INPUTS']['landsat5_flag']: landsat_df = landsat_df[landsat_df['PLATFORM'] != 'LT05'] if not ini['INPUTS']['landsat7_flag']: landsat_df = landsat_df[landsat_df['PLATFORM'] != 'LE07'] if not ini['INPUTS']['landsat8_flag']: landsat_df = landsat_df[landsat_df['PLATFORM'] != 'LC08'] if ini['INPUTS']['scene_id_keep_list']: # Replace XXX with primary ROW value for checking skip list SCENE_ID scene_id_df = pd.Series([ s.replace('XXX', '{:03d}'.format(int(r))) for s, r in zip(landsat_df['SCENE_ID'], landsat_df['ROW']) ]) landsat_df = landsat_df[scene_id_df.isin( ini['INPUTS']['scene_id_keep_list']).values] # This won't work: SCENE_ID have XXX but scene_id_skip_list don't # landsat_df = landsat_df[landsat_df['SCENE_ID'].isin( # ini['INPUTS']['scene_id_keep_list'])] if ini['INPUTS']['scene_id_skip_list']: # Replace XXX with primary ROW value for checking skip list SCENE_ID scene_id_df = pd.Series([ s.replace('XXX', '{:03d}'.format(int(r))) for s, r in zip(landsat_df['SCENE_ID'], landsat_df['ROW']) ]) landsat_df = landsat_df[np.logical_not( scene_id_df.isin(ini['INPUTS']['scene_id_skip_list']).values)] # This won't work: SCENE_ID have XXX but scene_id_skip_list don't # landsat_df = landsat_df[np.logical_not(landsat_df['SCENE_ID'].isin( # ini['INPUTS']['scene_id_skip_list']))] # Filter by QA/QC value if ini['SUMMARY']['max_qa'] >= 0 and not landsat_df.empty: logging.debug(' Maximum QA: {0}'.format( ini['SUMMARY']['max_qa'])) landsat_df = landsat_df[ landsat_df['QA'] <= ini['SUMMARY']['max_qa']] # Filter by average cloud score if ini['SUMMARY']['max_cloud_score'] < 100 and not landsat_df.empty: logging.debug(' Maximum cloud score: {0}'.format( ini['SUMMARY']['max_cloud_score'])) landsat_df = landsat_df[ landsat_df['CLOUD_SCORE'] <= ini['SUMMARY']['max_cloud_score']] # Filter by Fmask percentage if ini['SUMMARY']['max_fmask_pct'] < 100 and not landsat_df.empty: landsat_df['FMASK_PCT'] = 100 * (landsat_df['FMASK_COUNT'] / landsat_df['FMASK_TOTAL']) logging.debug(' Max Fmask threshold: {}'.format( ini['SUMMARY']['max_fmask_pct'])) landsat_df = landsat_df[ landsat_df['FMASK_PCT'] <= ini['SUMMARY']['max_fmask_pct']] # Filter low count SLC-off images if ini['SUMMARY']['min_slc_off_pct'] > 0 and not landsat_df.empty: logging.debug(' Mininum SLC-off threshold: {}%'.format( ini['SUMMARY']['min_slc_off_pct'])) # logging.debug(' Maximum pixel count: {}'.format( # max_pixel_count)) slc_off_mask = ((landsat_df['PLATFORM'] == 'LE07') & ((landsat_df['YEAR'] >= 2004) | ((landsat_df['YEAR'] == 2003) & (landsat_df['DOY'] > 151)))) slc_off_pct = 100 * (landsat_df['PIXEL_COUNT'] / landsat_df['PIXEL_TOTAL']) # slc_off_pct = 100 * (landsat_df['PIXEL_COUNT'] / max_pixel_count) landsat_df = landsat_df[( (slc_off_pct >= ini['SUMMARY']['min_slc_off_pct']) & slc_off_mask) | (~slc_off_mask)] if landsat_df.empty: logging.error( ' Empty Landsat dataframe after filtering, skipping zone') continue logging.debug(' Downloading thumbnails') for landsat, start_date in zip(landsat_df['PLATFORM'], landsat_df['DATE']): start_dt = datetime.datetime.strptime(start_date, '%Y-%m-%d') end_dt = start_dt + datetime.timedelta(days=1) end_date = end_dt.strftime('%Y-%m-%d') output_doy_path = os.path.join( output_doy_ws, '{}_{}.png'.format(start_dt.strftime('%j_%Y-%m-%d'), landsat)) output_date_path = os.path.join( output_date_ws, '{}_{}.png'.format(start_dt.strftime('%Y-%m-%d_%j'), landsat)) # DEADBEEF - This seems like a poor approach save_doy_flag = False save_date_flag = False if doy_flag and not os.path.isfile(output_doy_path): save_doy_flag = True if date_flag and not os.path.isfile(output_date_path): save_date_flag = True if not save_doy_flag and not save_date_flag: logging.debug( ' {} - file already exists, skipping'.format(start_date)) continue logging.debug(' {}'.format(start_date)) # logging.debug(' {}'.format(output_path)) if landsat in ['LT04', 'LT05', 'LE07']: ee_coll = ee.ImageCollection(coll_dict[landsat]).select( ['B1', 'B2', 'B3', 'B4', 'B5', 'B7'], ['blue', 'green', 'red', 'nir', 'swir1', 'swir2']) elif landsat in ['LC08']: ee_coll = ee.ImageCollection(coll_dict[landsat]).select( ['B2', 'B3', 'B4', 'B5', 'B6', 'B7'], ['blue', 'green', 'red', 'nir', 'swir1', 'swir2']) ee_coll = ee_coll.filterDate(start_date, end_date) ee_image = ee.Image(ee_coll.median().divide(10000)) \ .visualize(**vis_args) \ .reproject(crs=zone['proj'], crsTransform=zone['transform']) \ .paint(zone['geom'], color=0.5, width=1) \ .clip(extent_geom) # Get the image thumbnail for i in range(10): try: output_url = ee_image.getThumbUrl({'format': 'png'}) break except Exception as e: logging.error(' Exception: {}, retry {}'.format(e, i)) logging.debug('{}'.format(e)) sleep(i**2) for i in range(10): try: # DEADBEEF - This seems like a poor approach if save_doy_flag and save_date_flag: urllib.urlretrieve(output_url, output_doy_path) shutil.copy(output_doy_path, output_date_path) elif save_doy_flag: urllib.urlretrieve(output_url, output_doy_path) elif save_date_flag: urllib.urlretrieve(output_url, output_date_path) break except Exception as e: logging.error(' Exception: {}, retry {}'.format(e, i)) logging.debug('{}'.format(e)) sleep(i**2)