Ejemplo n.º 1
0
def main():
    start_time = time.time()
    get_ipython().system('mkdir -p {ec2_output_path}')
    keys = aqueduct3.get_GCS_keys(GCS_INPUT_PATH)
    # Limiting to tiffs for now.
    keys = list(filter(lambda x: x.endswith('.tif'), keys))
    df = aqueduct3.keys_to_df(keys, SEPARATOR, SCHEMA)
    df = df.assign(**EXTRA_PROPERTIES)
    df["exportdescription"] = df["file_name"]
    df = df.apply(pd.to_numeric, errors='ignore')

    # Earth Engine Preparations
    # Create folder (create parent if non existent)

    result = aqueduct3.earthengine.create_ee_folder_recursive(
        ee_output_path, OVERWRITE_OUTPUT)

    df_errors = pd.DataFrame()
    for index, row in df.iterrows():
        geotiff_gcs_path = GCS_INPUT_PATH + row.file_name + "." + row.extension
        output_ee_asset_id = ee_output_path + "/" + row.file_name
        properties = row.to_dict()
        df_errors2 = aqueduct3.upload_geotiff_to_EE_imageCollection(
            geotiff_gcs_path, output_ee_asset_id, properties, index)
        df_errors = df_errors.append(df_errors2)
    df_errors.to_csv("{}/{}".format(ec2_output_path, OUTPUT_FILE_NAME))
    get_ipython().system(
        'aws s3 cp  {ec2_output_path} {s3_output_path} --recursive')
    return df, df_errors
Ejemplo n.º 2
0
def main():
    start_time = time.time()
    get_ipython().system('mkdir -p {ec2_output_path}')
    keys = aqueduct3.get_GCS_keys(gcs_input_path)
    df = aqueduct3.keys_to_df(keys, SEPARATOR, SCHEMA)
    df = df.assign(**EXTRA_PROPERTIES)  #Python >3.5

    # EXTRA FOR AUX FILES ONLY, replace nodata_value for ldd.
    df.loc[df['file_name'] == "global_lddsound_numpad_05min",
           "nodata_value"] = 255

    df["exportdescription"] = df["indicator"]
    df = df.apply(pd.to_numeric, errors='ignore')

    # Earth Engine Preparations
    # Create folder
    if OVERWRITE:
        command = "earthengine rm -r {}".format(ee_output_path)
        print(command)
        subprocess.check_output(command, shell=True)

    command = "earthengine create folder {}".format(ee_output_path)
    print(command)
    subprocess.check_output(command, shell=True)

    if TESTING:
        df = df[1:3]

    df_errors = pd.DataFrame()
    for index, row in df.iterrows():
        elapsed_time = time.time() - start_time
        print(
            index, "{:02.2f}".format(
                (float(index) / df.shape[0]) * 100) + "elapsed: ",
            str(timedelta(seconds=elapsed_time)))

        geotiff_gcs_path = gcs_input_path + row.file_name + "." + row.extension
        output_ee_asset_id = ee_output_path + "/" + row.file_name
        properties = row.to_dict()

        df_errors2 = aqueduct3.upload_geotiff_to_EE_imageCollection(
            geotiff_gcs_path, output_ee_asset_id, properties, index)
        df_errors = df_errors.append(df_errors2)

    # Storing error dataframe on ec2 and S3
    df_errors.to_csv("{}/{}".format(ec2_output_path, OUTPUT_FILE_NAME))
    get_ipython().system(
        'aws s3 cp  {ec2_output_path} {s3_output_path} --recursive')

    # Retry Failed Tasks Once
    df_retry = df_errors.loc[df_errors['error'] != 0]
    for index, row in df_retry.iterrows():
        response = subprocess.check_output(row.command, shell=True)

    return df, df_errors
Ejemplo n.º 3
0
def main():
    start_time = time.time()
    get_ipython().system('mkdir -p {ec2_output_path}')
    keys = aqueduct3.get_GCS_keys(gcs_input_path)
    # Limiting to tiffs for now.
    keys = list(filter(lambda x: x.endswith('.tif'), keys))
    df = aqueduct3.keys_to_df(keys,SEPARATOR,SCHEMA)
    df = df.assign(**EXTRA_PROPERTIES)
    df["exportdescription"] = df["file_name"]
    df = df.apply(pd.to_numeric, errors='ignore')

    # Earth Engine Preparations
    # Create folder (create parent if non existent)
    if OVERWRITE:
        command = "earthengine rm -r {}".format(ee_output_path)
        print(command)
        subprocess.check_output(command,shell=True)

    command = "earthengine create folder {}".format(ee_output_path)
    print(command)
    subprocess.check_output(command,shell=True)
    
    df_errors = pd.DataFrame()
    for index, row in df.iterrows():
        elapsed_time = time.time() - start_time 
        print(index,"{:02.2f}".format((float(index)/df.shape[0])*100) + "elapsed: ", str(timedelta(seconds=elapsed_time)))

        geotiff_gcs_path = gcs_input_path + row.file_name + "." + row.extension
        output_ee_asset_id = ee_output_path + "/" + row.file_name
        properties = row.to_dict()

        df_errors2 = aqueduct3.upload_geotiff_to_EE_imageCollection(geotiff_gcs_path, output_ee_asset_id, properties,index)
        df_errors = df_errors.append(df_errors2) 

    # Storing error dataframe on ec2 and S3
    df_errors.to_csv("{}/{}".format(ec2_output_path,OUTPUT_FILE_NAME))
    get_ipython().system('aws s3 cp  {ec2_output_path} {s3_output_path} --recursive')
   
    # Retry Failed Tasks Once
    df_retry = df_errors.loc[df_errors['error'] != 0]
    for index, row in df_retry.iterrows():
        response = subprocess.check_output(row.command, shell=True)

    return df,df_errors
Ejemplo n.º 4
0
        None
    
    """
    
    command = ""
    
    for key, value in d.items():
            command += " --p {}={}".format(key,value)
       
    return command
    


# In[6]:

keys = aqueduct3.get_GCS_keys(gcs_input_path)
keys = list(filter(lambda x: x.endswith('.shp'), keys))


# In[24]:

for key in keys:
    input_path = key
    output_filename_ext = key.split("/")[-1]
    output_filename = output_filename_ext.split(".")[-2]
    output_path = ee_output_path + "/" + output_filename    
    command = "earthengine upload table --asset_id={} {} ".format(output_path,input_path)
    extra_command = property_dict_to_ee_command(EXTRA_PROPERTIES)
    command = command + extra_command
    print(command)
    response = subprocess.check_output(command, shell=True)
Ejemplo n.º 5
0
import os
import time
import re
import pandas as pd
from datetime import timedelta
import aqueduct3

# In[4]:

if OVERWRITE_OUTPUT:
    command = "earthengine rm -r {}".format(ee_output_path)
    subprocess.check_output(command, shell=True)

# In[5]:

keys = aqueduct3.get_GCS_keys(GCS_INPUT_PATH)

# In[6]:


def main():
    start_time = time.time()
    get_ipython().system('mkdir -p {ec2_output_path}')
    keys = aqueduct3.get_GCS_keys(GCS_INPUT_PATH)
    # Limiting to tiffs for now.
    keys = list(filter(lambda x: x.endswith('.tif'), keys))
    df = aqueduct3.keys_to_df(keys, SEPARATOR, SCHEMA)
    df = df.assign(**EXTRA_PROPERTIES)
    df["exportdescription"] = df["file_name"]
    df = df.apply(pd.to_numeric, errors='ignore')