def main(): start_time = time.time() get_ipython().system('mkdir -p {ec2_output_path}') keys = aqueduct3.get_GCS_keys(GCS_INPUT_PATH) # Limiting to tiffs for now. keys = list(filter(lambda x: x.endswith('.tif'), keys)) df = aqueduct3.keys_to_df(keys, SEPARATOR, SCHEMA) df = df.assign(**EXTRA_PROPERTIES) df["exportdescription"] = df["file_name"] df = df.apply(pd.to_numeric, errors='ignore') # Earth Engine Preparations # Create folder (create parent if non existent) result = aqueduct3.earthengine.create_ee_folder_recursive( ee_output_path, OVERWRITE_OUTPUT) df_errors = pd.DataFrame() for index, row in df.iterrows(): geotiff_gcs_path = GCS_INPUT_PATH + row.file_name + "." + row.extension output_ee_asset_id = ee_output_path + "/" + row.file_name properties = row.to_dict() df_errors2 = aqueduct3.upload_geotiff_to_EE_imageCollection( geotiff_gcs_path, output_ee_asset_id, properties, index) df_errors = df_errors.append(df_errors2) df_errors.to_csv("{}/{}".format(ec2_output_path, OUTPUT_FILE_NAME)) get_ipython().system( 'aws s3 cp {ec2_output_path} {s3_output_path} --recursive') return df, df_errors
def main(): start_time = time.time() get_ipython().system('mkdir -p {ec2_output_path}') keys = aqueduct3.get_GCS_keys(gcs_input_path) df = aqueduct3.keys_to_df(keys, SEPARATOR, SCHEMA) df = df.assign(**EXTRA_PROPERTIES) #Python >3.5 # EXTRA FOR AUX FILES ONLY, replace nodata_value for ldd. df.loc[df['file_name'] == "global_lddsound_numpad_05min", "nodata_value"] = 255 df["exportdescription"] = df["indicator"] df = df.apply(pd.to_numeric, errors='ignore') # Earth Engine Preparations # Create folder if OVERWRITE: command = "earthengine rm -r {}".format(ee_output_path) print(command) subprocess.check_output(command, shell=True) command = "earthengine create folder {}".format(ee_output_path) print(command) subprocess.check_output(command, shell=True) if TESTING: df = df[1:3] df_errors = pd.DataFrame() for index, row in df.iterrows(): elapsed_time = time.time() - start_time print( index, "{:02.2f}".format( (float(index) / df.shape[0]) * 100) + "elapsed: ", str(timedelta(seconds=elapsed_time))) geotiff_gcs_path = gcs_input_path + row.file_name + "." + row.extension output_ee_asset_id = ee_output_path + "/" + row.file_name properties = row.to_dict() df_errors2 = aqueduct3.upload_geotiff_to_EE_imageCollection( geotiff_gcs_path, output_ee_asset_id, properties, index) df_errors = df_errors.append(df_errors2) # Storing error dataframe on ec2 and S3 df_errors.to_csv("{}/{}".format(ec2_output_path, OUTPUT_FILE_NAME)) get_ipython().system( 'aws s3 cp {ec2_output_path} {s3_output_path} --recursive') # Retry Failed Tasks Once df_retry = df_errors.loc[df_errors['error'] != 0] for index, row in df_retry.iterrows(): response = subprocess.check_output(row.command, shell=True) return df, df_errors
def main(): start_time = time.time() get_ipython().system('mkdir -p {ec2_output_path}') keys = aqueduct3.get_GCS_keys(gcs_input_path) # Limiting to tiffs for now. keys = list(filter(lambda x: x.endswith('.tif'), keys)) df = aqueduct3.keys_to_df(keys,SEPARATOR,SCHEMA) df = df.assign(**EXTRA_PROPERTIES) df["exportdescription"] = df["file_name"] df = df.apply(pd.to_numeric, errors='ignore') # Earth Engine Preparations # Create folder (create parent if non existent) if OVERWRITE: command = "earthengine rm -r {}".format(ee_output_path) print(command) subprocess.check_output(command,shell=True) command = "earthengine create folder {}".format(ee_output_path) print(command) subprocess.check_output(command,shell=True) df_errors = pd.DataFrame() for index, row in df.iterrows(): elapsed_time = time.time() - start_time print(index,"{:02.2f}".format((float(index)/df.shape[0])*100) + "elapsed: ", str(timedelta(seconds=elapsed_time))) geotiff_gcs_path = gcs_input_path + row.file_name + "." + row.extension output_ee_asset_id = ee_output_path + "/" + row.file_name properties = row.to_dict() df_errors2 = aqueduct3.upload_geotiff_to_EE_imageCollection(geotiff_gcs_path, output_ee_asset_id, properties,index) df_errors = df_errors.append(df_errors2) # Storing error dataframe on ec2 and S3 df_errors.to_csv("{}/{}".format(ec2_output_path,OUTPUT_FILE_NAME)) get_ipython().system('aws s3 cp {ec2_output_path} {s3_output_path} --recursive') # Retry Failed Tasks Once df_retry = df_errors.loc[df_errors['error'] != 0] for index, row in df_retry.iterrows(): response = subprocess.check_output(row.command, shell=True) return df,df_errors
None """ command = "" for key, value in d.items(): command += " --p {}={}".format(key,value) return command # In[6]: keys = aqueduct3.get_GCS_keys(gcs_input_path) keys = list(filter(lambda x: x.endswith('.shp'), keys)) # In[24]: for key in keys: input_path = key output_filename_ext = key.split("/")[-1] output_filename = output_filename_ext.split(".")[-2] output_path = ee_output_path + "/" + output_filename command = "earthengine upload table --asset_id={} {} ".format(output_path,input_path) extra_command = property_dict_to_ee_command(EXTRA_PROPERTIES) command = command + extra_command print(command) response = subprocess.check_output(command, shell=True)
import os import time import re import pandas as pd from datetime import timedelta import aqueduct3 # In[4]: if OVERWRITE_OUTPUT: command = "earthengine rm -r {}".format(ee_output_path) subprocess.check_output(command, shell=True) # In[5]: keys = aqueduct3.get_GCS_keys(GCS_INPUT_PATH) # In[6]: def main(): start_time = time.time() get_ipython().system('mkdir -p {ec2_output_path}') keys = aqueduct3.get_GCS_keys(GCS_INPUT_PATH) # Limiting to tiffs for now. keys = list(filter(lambda x: x.endswith('.tif'), keys)) df = aqueduct3.keys_to_df(keys, SEPARATOR, SCHEMA) df = df.assign(**EXTRA_PROPERTIES) df["exportdescription"] = df["file_name"] df = df.apply(pd.to_numeric, errors='ignore')