def lambda_handler(event, context): asset_list = source_dataset() if asset_list: try: upload_manifest_file(asset_list) except Exception as e: raise Exception( f"Something went wrong when uploading manifest file to manifest bucket: {e}" )
def lambda_handler(event, context): asset_list = source_dataset() asset_lists = [asset_list[i:i+100] for i in range(0, len(asset_list), 100)] if type(asset_lists) == list: if len(asset_lists) == 0: print( 'No need for a revision, all datasets included with this product are up to date') return { 'statusCode': 200, 'body': json.dumps('No need for a revision, all datasets included with this product are up to date') } create_revision_response = dataexchange.create_revision( DataSetId=data_set_id) revision_id = create_revision_response['Id'] revision_arn = create_revision_response['Arn'] for idx in range(len(asset_lists)): asset_lists[idx] = { 'asset_list': asset_lists[idx], 'revision_id': revision_id, 'job_num': str(idx + 1), 'total_jobs': str(len(asset_lists)) } with (Pool(10)) as p: p.map(jobs_handler, asset_lists) update_revision_response = dataexchange.update_revision( DataSetId=data_set_id, RevisionId=revision_id, Comment=revision_comment, Finalized=True ) revision_state = update_revision_response['Finalized'] if revision_state == True: # Call AWSMarketplace Catalog's APIs to add revisions describe_entity_response = marketplace.describe_entity( Catalog='AWSMarketplace', EntityId=product_id) start_change_set_response = start_change_set( describe_entity_response, revision_arn) if start_change_set_response['ChangeSetId']: print('Revision updated successfully and added to the dataset') return { 'statusCode': 200, 'body': json.dumps('Revision updated successfully and added to the dataset') } else: print('Something went wrong with AWSMarketplace Catalog API') return { 'statusCode': 500, 'body': json.dumps('Something went wrong with AWSMarketplace Catalog API') } else: print('Revision did not complete successfully') return { 'statusCode': 500, 'body': json.dumps('Revision did not complete successfully') } else: raise Exception('Something went wrong when uploading files to s3')
def lambda_handler(event, context): source_dataset(new_filename, s3_bucket, new_s3_key) create_revision_response = dataexchange.create_revision( DataSetId=data_set_id) revision_id = create_revision_response['Id'] revision_arn = create_revision_response['Arn'] # Used to store the Ids of the Jobs importing the assets to S3. job_ids = set() import_job = dataexchange.create_job(Type='IMPORT_ASSETS_FROM_S3', Details={ 'ImportAssetsFromS3': { 'DataSetId': data_set_id, 'RevisionId': revision_id, 'AssetSources': [{ 'Bucket': s3_bucket, 'Key': new_s3_key }] } }) # Start the Job and save the JobId. dataexchange.start_job(JobId=import_job['Id']) job_ids.add(import_job['Id']) # Iterate until all remaining jobs have reached a terminal state, or an error is found. completed_jobs = set() while job_ids != completed_jobs: for job_id in job_ids: if job_id in completed_jobs: continue get_job_response = dataexchange.get_job(JobId=job_id) if get_job_response['State'] == 'COMPLETED': print("Job {} completed".format(job_id)) completed_jobs.add(job_id) if get_job_response['State'] == 'ERROR': job_errors = get_job_response['Errors'] raise Exception('JobId: {} failed with errors:\n{}'.format( job_id, job_errors)) # Sleep to ensure we don't get throttled by the GetJob API. time.sleep(0.2) update_revision_response = dataexchange.update_revision( DataSetId=data_set_id, RevisionId=revision_id, Comment=revision_comment, Finalized=True) revision_state = update_revision_response['Finalized'] if revision_state == True: # Call AWSMarketplace Catalog's APIs to add revisions describe_entity_response = marketplace.describe_entity( Catalog='AWSMarketplace', EntityId=product_id) start_change_set_response = start_change_set(describe_entity_response, revision_arn) if start_change_set_response['ChangeSetId']: return { 'statusCode': 200, 'body': json.dumps( 'Revision updated successfully and added to the dataset') } else: return { 'statusCode': 500, 'body': json.dumps( 'Something went wrong with AWSMarketplace Catalog API') } else: return { 'statusCode': 500, 'body': json.dumps('Revision did not complete successfully') }
if __name__ == '__main__': today = date.today().strftime('%Y-%m-%d') file1 = open('/Users/nfunke/iCoding/2021/rearc-data/credentials', 'r') lines = file1.readlines() REGION_NAME = lines[0].rstrip() AWS_SERVER_PUBLIC_KEY = lines[1].rstrip() AWS_SERVER_SECRET_KEY = lines[2].rstrip() RUN_LOCAL = True # source_dataset_url = "https://pasteur.epa.gov/uploads/10.23719/1517796/SupplyChainEmissionFactorsforUSIndustriesCommodities.xlsx" # df = pd.DataFrame.from_dict(pd.read_excel(source_dataset_url, sheet_name=None, engine='openpyxl'), orient='index', columns=['sheet','data']) # df = pd.read_excel(source_dataset_url, sheet_name=None, engine='openpyxl') #for row in df.rows: # print(row) #df.columns.values[0] = 'Sheet' # for keys in df.keys(): # print (keys) # df[keys].to_csv('/Users/nfunke/Temp/' %keys) # isempty = df.empty asset_list = source_data.source_dataset( #"world-bank-cpi.csv", "norbert-adx-test2", AWS_SERVER_SECRET_KEY #"world-bank-cpi.csv", "rearc-data-provider", AWS_SERVER_SECRET_KEY #source_dataset_url = "http://www.who.int/entity/immunization/monitoring_surveillance/data/incidence_series.xls" ) print(type(asset_list))
def lambda_handler(event, context): asset_list = source_dataset() if type(asset_list) == list: if len(asset_list) == 0: print( 'No need for a revision, all datasets included with this product are current' ) return { 'statusCode': 200, 'body': json.dumps( 'No need for a revision, all datasets included with this product are current' ) } create_revision_response = dataexchange.create_revision( DataSetId=data_set_id) revision_id = create_revision_response['Id'] revision_arn = create_revision_response['Arn'] print('Total assets to be uploaded', len(asset_list)) start_index = 0 total_jobs = math.floor(len(asset_list) / 100) + 1 while (start_index < len(asset_list)): # Used to store the Ids of the Jobs importing the assets to S3. job_ids = set() end_index = len(asset_list) if len(asset_list) - \ start_index < 100 else start_index + 100 print('asset_list {}:'.format(math.floor(start_index / 100) + 1), asset_list[start_index:end_index]) import_job = dataexchange.create_job( Type='IMPORT_ASSETS_FROM_S3', Details={ 'ImportAssetsFromS3': { 'DataSetId': data_set_id, 'RevisionId': revision_id, 'AssetSources': asset_list[start_index:end_index] } }) # Start the Job and save the JobId. dataexchange.start_job(JobId=import_job['Id']) job_ids.add(import_job['Id']) # Iterate until all remaining jobs have reached a terminal state, or an error is found. completed_jobs = set() while job_ids != completed_jobs: for job_id in job_ids: if job_id in completed_jobs: continue get_job_response = dataexchange.get_job(JobId=job_id) if get_job_response['State'] == 'COMPLETED': print('JobId: {}, Job {} of {} completed'.format( job_id, math.floor(start_index / 100) + 1, total_jobs)) completed_jobs.add(job_id) if get_job_response['State'] == 'ERROR': job_errors = get_job_response['Errors'] raise Exception( 'JobId: {} failed with errors:\n{}'.format( job_id, job_errors)) # Sleep to ensure we don't get throttled by the GetJob API. time.sleep(0.2) start_index = start_index + 100 update_revision_response = dataexchange.update_revision( DataSetId=data_set_id, RevisionId=revision_id, Comment=revision_comment, Finalized=True) revision_state = update_revision_response['Finalized'] if revision_state == True: # Call AWSMarketplace Catalog's APIs to add revisions describe_entity_response = marketplace.describe_entity( Catalog='AWSMarketplace', EntityId=product_id) start_change_set_response = start_change_set( describe_entity_response, revision_arn) if start_change_set_response['ChangeSetId']: print('Revision updated successfully and added to the dataset') return { 'statusCode': 200, 'body': json.dumps( 'Revision updated successfully and added to the dataset' ) } else: print('Something went wrong with AWSMarketplace Catalog API') return { 'statusCode': 500, 'body': json.dumps( 'Something went wrong with AWSMarketplace Catalog API') } else: print('Revision did not complete successfully') return { 'statusCode': 500, 'body': json.dumps('Revision did not complete successfully') } else: raise Exception('Something went wrong when uploading files to s3')