def keys_to_df(keys): """ helper function for 'get_GCS_keys' Args: keys (list) : list of strings with keys. Returns: df (pd.DataFrame) : Pandas DataFrame with all relvant properties for Aqueduct 3.0. """ df = pd.DataFrame() i = 0 for key in keys: i = i + 1 schema = ["indicator", "spatial_resolution", "unit"] out_dict = aqueduct3.split_key(key, schema) df2 = pd.DataFrame(out_dict, index=[i]) df = df.append(df2) return df
print(ic_values_input_asset_id) df = aqueduct3.earthengine.get_df_from_ic(ic_values_input_asset_id) if TESTING: df = df[0:3] else: pass for index, row in df.iterrows(): i_processed = i_processed + 1 elapsed_time = time.time() - start_time i_values_input_asset_id = row["input_image_asset_id"] # Add an artificial extension to allow the function to run. # consider updating the split_key function to handle cases without an extension. i_values_input_asset_id_extenstion = i_values_input_asset_id + ".ee_image" dictje = aqueduct3.split_key(i_values_input_asset_id_extenstion, SCHEMA, SEPARATOR) output_file_name = "{}_reduced_{:02.0f}_{}_{}".format( dictje["file_name"], pfaf_level, spatial_resolution, reducer_name) output_file_path_pkl = "{}/{}.pkl".format(ec2_output_path, output_file_name) output_file_path_csv = "{}/{}.csv".format(ec2_output_path, output_file_name) if os.path.isfile(output_file_path_pkl): message = "Index {:02.2f}, Skipping: {} Elapsed: {} Asset: {}".format( float(index), i_processed, str(timedelta(seconds=elapsed_time)), i_values_input_asset_id) logger.debug(message) else: message = "Index {:02.2f}, Processed: {} Elapsed: {} Asset: {}".format(
# In[10]: if TESTING: mainchannel_file_names = mainchannel_file_names[0:10] # In[11]: for mainchannel_file_name in mainchannel_file_names: mainchannel_path = "{}/{}".format(ec2_input_path_mainchannel,mainchannel_file_name) dictje = aqueduct3.split_key(key=mainchannel_file_name, schema = SCHEMA, separator = SEPARATOR) year = int(dictje["year"]) month = int(dictje["month"]) identifier = int(dictje["identifier"]) df_mainchannel = read_mainchannel(mainchannel_path) sinks_file_name = "global_historical_riverdischarge_{}_millionm3_5min_1960_2014_I{:03.0f}Y{:04.0f}M{:02.0f}_reduced_06_5min_sum.pkl".format(dictje["temporal_resolution"],identifier,year,month) sinks_path = "{}/{}".format(ec2_input_path_sinks,sinks_file_name) df_sinks = read_sinks(sinks_path) df_merge = df_mainchannel.merge(right=df_sinks, how="outer", left_on="zones",
def main(): geometry = aqueduct3.earthengine.get_global_geometry(TESTING) i_processed = 0 if TESTING: sectors = ["PDom"] demand_types = ["WW"] temporal_resolutions = ["year"] reducer_names = ["mean"] spatial_resolutions = ["30s"] pfaf_levels = [6] else: sectors = SECTORS demand_types = DEMAND_TYPES temporal_resolutions = TEMPORAL_RESOLUTIONS reducer_names = REDUCER_NAMES spatial_resolutions = SPATIAL_RESOLUTIONS pfaf_levels = PFAF_LEVELS start_time = time.time() for reducer_name in reducer_names: reducer = aqueduct3.earthengine.get_grouped_reducer(reducer_name) for spatial_resolution in spatial_resolutions: crs_transform = aqueduct3.earthengine.get_crs_transform( spatial_resolution) for pfaf_level in pfaf_levels: for sector in sectors: for demand_type in demand_types: for temporal_resolution in temporal_resolutions: print(reducer_name, spatial_resolution, pfaf_level, sector, demand_type, temporal_resolution) i_zones_asset_id = "{}/hybas_lev{:02.0f}_v1c_merged_fiona_{}_V{:02.0f}".format( EE_INPUT_ZONES_PATH, pfaf_level, spatial_resolution, INPUT_VERSION_ZONES) ic_values_input_asset_id = "{}/global_historical_{}{}_{}_m_5min_1960_2014".format( EE_INPUT_VALUES_PATH, sector, demand_type, temporal_resolution) df = aqueduct3.earthengine.get_df_from_ic( ic_values_input_asset_id) if TESTING: df = df[1:3] else: pass for index, row in df.iterrows(): i_processed = i_processed + 1 elapsed_time = time.time() - start_time i_values_input_asset_id = row[ "input_image_asset_id"] # Add an artificial extension to allow the function to run. # consider updating the split_key function to handle cases without an extension. i_values_input_asset_id_extenstion = i_values_input_asset_id + ".ee_image" dictje = aqueduct3.split_key( i_values_input_asset_id_extenstion, SCHEMA, SEPARATOR) output_file_name = "{}_reduced_{:02.0f}_{}_{}".format( dictje["file_name"], pfaf_level, spatial_resolution, reducer_name) output_file_path_pkl = "{}/{}.pkl".format( ec2_output_path, output_file_name) output_file_path_csv = "{}/{}.csv".format( ec2_output_path, output_file_name) if os.path.isfile(output_file_path_pkl): message = "Index {:02.2f}, Skipping: {} Elapsed: {} Asset: {}".format( float(index), i_processed, str(timedelta(seconds=elapsed_time)), i_values_input_asset_id) logger.debug(message) else: message = "Index {:02.2f}, Processed: {} Elapsed: {} Asset: {}".format( float(index), i_processed, str(timedelta(seconds=elapsed_time)), i_values_input_asset_id) print(message) logger.debug(message) i_values = ee.Image( i_values_input_asset_id) total_image = ee.Image( i_values_input_asset_id).addBands( ee.Image(i_zones_asset_id)) result_list = total_image.reduceRegion( geometry=geometry, reducer=reducer, crsTransform=crs_transform, maxPixels=1e10).get("groups") function_properties = { "zones_pfaf_level": pfaf_level, "zones_spatial_resolution": spatial_resolution, "reducer": reducer_name, "zones_image_asset_id": i_zones_asset_id } function_properties = { **function_properties, **dictje } df = post_process_results( result_list, function_properties) df.to_pickle(output_file_path_pkl)
def ic_flux_to_volume_5min_m3second_millionm3(ic_input_asset_id, output_version, old_units, new_units, schema, separator): """ Convert an imagecollection from flux to volume. ------------------------------------------------------------------------------- The result is stored in an imagecollection with the same name as the input imagecollection but with 'millionm3' instead of 'm3second' Input ic: global_historical_riverdischarge_month_m3second_5min_1960_2014 Output ic: global_historical_riverdischarge_month_millionm3_5min_1960_2014 Args: ic_input_asset_id (string) : asset id of input imagecollection. output_version (integer) : output version. old_units (string) : Old units. new_units (string) : New units. schema (list) : A list of strings containing the schema. See aqueduct3.split_key() for more info. separator (regex) : Regular expression of separators used in geotiff filenames. """ start_time = time.time() df_errors = pd.DataFrame() df = aqueduct3.earthengine.get_df_from_ic(ic_input_asset_id) df = aqueduct3.earthengine.add_export_parameters_for_export( df, old_unit, new_unit, output_version) # Creating ImageCollection(s) output_ic_asset_ids = list(df["output_ic_asset_id"].unique()) for output_ic_asset_id in output_ic_asset_ids: command, result = aqueduct3.earthengine.create_imageCollection( output_ic_asset_id) print(command, result) # Bacth Converting and uploading. for index, row in df.iterrows(): elapsed_time = time.time() - start_time print("Index: {:04.0f} Elapsed: {}".format( index, timedelta(seconds=elapsed_time))) description = row["description"] output_image_asset_id = row["output_image_asset_id"] # get additional parameters from asset name # add fictional extension to key = row["input_image_asset_id"] + ".ee_image" dictje = aqueduct3.split_key(key, schema, separator) if aqueduct3.earthengine.asset_exists(output_image_asset_id): print("Asset exists, skipping: {}".format(output_image_asset_id)) else: i_old_unit_5min = ee.Image(row["input_image_asset_id"]) if old_unit == "m3second" and new_unit == "millionm3": year = int(dictje["year"]) month = int(dictje["month"]) temporal_resolution = dictje["temporal_resolution"] i_new_unit_5min = aqueduct3.earthengine.flux_to_volume_5min_m3second_millionm3( i_old_unit_5min, temporal_resolution, year, month) else: raise ("Error: invalid combination of units.") i_new_unit_5min = update_property_script_used(i_new_unit_5min) i_new_unit_5min = update_property_output_version(i_new_unit_5min) aqueduct3.earthengine.export_image_global_5min( i_new_unit_5min, description, output_image_asset_id) print(output_image_asset_id) return i_new_unit_5min