df['datetime'] = datetime.datetime(2016, 1, 1)
# rearrange the column names
df = df[['country', 'datetime', 'death_rate_per_100000','estimated_lower_bound',
         'estimated_number_of_road_traffic_deaths_data','estimated_upper_bound']]
# there are some rows which has empty spaces in between numbers in the
# 'estimated_number_of_road_traffic_deaths_data' column. remove those empty spaces
df['estimated_number_of_road_traffic_deaths_data'] = df['estimated_number_of_road_traffic_deaths_data'].str.replace(' ', '')
#save processed dataset to csv
processed_data_file = os.path.join(data_dir, dataset_name+'_edit.csv')
df.to_csv(processed_data_file, index=False)

'''
Upload processed data to Carto
'''
logger.info('Uploading processed data to Carto.')
util_carto.upload_to_carto(processed_data_file, 'LINK')

'''
Upload original data and processed data to Amazon S3 storage
'''
# initialize AWS variables
aws_bucket = 'wri-public-data'
s3_prefix = 'resourcewatch/'

logger.info('Uploading original data to S3.')
# Upload raw data file to S3

# Copy the raw data into a zipped file to upload to S3
raw_data_dir = os.path.join(data_dir, dataset_name+'.zip')
with ZipFile(raw_data_dir,'w') as zip:
    zip.write(raw_data_file, os.path.basename(raw_data_file))
Example #2
0
raw_data_dir = os.path.join(data_dir, dataset_name + '.zip')
with ZipFile(raw_data_dir, 'w') as zip:
    zip.write(raw_data_file, os.path.basename(raw_data_file))

logger.info('Uploading original data to S3.')
# Upload raw data file to S3
uploaded = util_cloud.aws_upload(raw_data_dir, aws_bucket,
                                 s3_prefix + os.path.basename(raw_data_dir))

# Copy the processed data into a zipped file to upload to S3
processed_data_dir = os.path.join(data_dir, dataset_name + '_edit.zip')
# Find all the necessary components of the shapefile
processed_data_files = glob.glob(
    os.path.join(data_dir, dataset_name + '_edit.*'))
with ZipFile(processed_data_dir, 'w') as zip:
    for file in processed_data_files:
        zip.write(file, os.path.basename(file))

logger.info('Uploading processed data to S3.')
# Upload processed data file to S3
uploaded = util_cloud.aws_upload(
    processed_data_dir, aws_bucket,
    s3_prefix + os.path.basename(processed_data_dir))
'''
Upload processed data to Carto
'''

logger.info('Uploading data to Carto.')
# upload the shapefile to Carto
util_carto.upload_to_carto(processed_data_dir, 'LINK', tags=['ow'])
        out_filelist = glob.glob(out_str)
        
        zipfile_list = []
        
        # Create a zipfile for the shapefile componenets
        logger.info('Zipping files for ' + processed_data_file)
       
        out_zip_dir = os.path.join(processed_data_file[:-4] + '.zip')
        with ZipFile(out_zip_dir,'w') as zip:
            for file in out_filelist:
                zip.write(file, os.path.basename(file))
                zipfile_list.append(file)

        # Upload zipfile to carto
        logger.info('Uploading processed data for ' + processed_data_file +' to Carto.')
        util_carto.upload_to_carto(out_zip_dir, 'LINK', tags=['ow'])
        
'''
Upload original data and processed data to Amazon S3 storage
'''

# initialize AWS variables
aws_bucket = 'wri-public-data'
s3_prefix = 'resourcewatch/'

logger.info('Zipping original data files.')

# Copy the raw data into a zipped file to upload to S3
raw_zip_dir = os.path.join(data_dir, dataset_name+'.zip')
with ZipFile(raw_zip_dir,'w') as zip:
    for file in raw_namelist:
imports_processed_data_file = os.path.join(data_dir, 'foo_066a_rw0_food_product_import_shares_edit.csv')
exports_processed_data_file = os.path.join(data_dir, 'foo_066b_rw0_food_product_export_shares_edit.csv')

# We export both datasets to csv files
imports_df_edit.to_csv(imports_processed_data_file, index=False)
exports_df_edit.to_csv(exports_processed_data_file, index=False)

# We append both paths to a processed data dir
processed_data_file = [imports_processed_data_file, exports_processed_data_file]

'''
Upload processed data to Carto
'''
logger.info('Uploading processed data to Carto.')
for file in processed_data_file:
    util_carto.upload_to_carto(file, 'LINK', collision_strategy = 'overwrite')
'''
Upload original data and processed data to Amazon S3 storage
'''
# initialize AWS variables
aws_bucket = 'wri-public-data'
s3_prefix = 'resourcewatch/'

logger.info('Uploading original data to S3.')
# Copy the raw data into a zipped file to upload to S3
raw_data_dir = os.path.join(data_dir, dataset_name+'.zip')
with ZipFile(raw_data_dir,'w') as zipped:
    for file in raw_data_file:
        zipped.write(file, os.path.basename(file))
# Upload raw data file to S3
uploaded = util_cloud.aws_upload(raw_data_dir, aws_bucket, s3_prefix + os.path.basename(raw_data_dir))