Python source_dataset 예제들, source_data.source_dataset Python 예제들

예제 #1

0

파일 보기

파일: lambda_function.py 프로젝트: rearc-data/nextstrain-hcov-19

def lambda_handler(event, context):
    asset_list = source_dataset()

    if asset_list:
        try:
            upload_manifest_file(asset_list)
        except Exception as e:
            raise Exception(
                f"Something went wrong when uploading manifest file to manifest bucket: {e}"
            )

예제 #2

0

파일 보기

파일: lambda_function.py 프로젝트: rearc-data/fivethirtyeight-congress-generic-ballot

def lambda_handler(event, context):
    asset_list = source_dataset()
    asset_lists = [asset_list[i:i+100] for i in range(0, len(asset_list), 100)]

    if type(asset_lists) == list:

        if len(asset_lists) == 0:
            print(
                'No need for a revision, all datasets included with this product are up to date')
            return {
                'statusCode': 200,
                'body': json.dumps('No need for a revision, all datasets included with this product are up to date')
            }

        create_revision_response = dataexchange.create_revision(
            DataSetId=data_set_id)
        revision_id = create_revision_response['Id']
        revision_arn = create_revision_response['Arn']

        for idx in range(len(asset_lists)):
            asset_lists[idx] = {
                'asset_list': asset_lists[idx],
                'revision_id': revision_id,
                'job_num': str(idx + 1),
                'total_jobs': str(len(asset_lists))
            }

        with (Pool(10)) as p:
            p.map(jobs_handler, asset_lists)

        update_revision_response = dataexchange.update_revision(
            DataSetId=data_set_id,
            RevisionId=revision_id,
            Comment=revision_comment,
            Finalized=True
        )

        revision_state = update_revision_response['Finalized']

        if revision_state == True:
            # Call AWSMarketplace Catalog's APIs to add revisions
            describe_entity_response = marketplace.describe_entity(
                Catalog='AWSMarketplace', EntityId=product_id)
            start_change_set_response = start_change_set(
                describe_entity_response, revision_arn)
            if start_change_set_response['ChangeSetId']:
                print('Revision updated successfully and added to the dataset')
                return {
                    'statusCode': 200,
                    'body': json.dumps('Revision updated successfully and added to the dataset')
                }
            else:
                print('Something went wrong with AWSMarketplace Catalog API')
                return {
                    'statusCode': 500,
                    'body': json.dumps('Something went wrong with AWSMarketplace Catalog API')
                }
        else:
            print('Revision did not complete successfully')
            return {
                'statusCode': 500,
                'body': json.dumps('Revision did not complete successfully')
            }
    else:
        raise Exception('Something went wrong when uploading files to s3')

예제 #3

0

파일 보기

파일: lambda_function.py 프로젝트: rearc-data/publish-a-data-product-on-aws-data-exchange

def lambda_handler(event, context):
    source_dataset(new_filename, s3_bucket, new_s3_key)

    create_revision_response = dataexchange.create_revision(
        DataSetId=data_set_id)
    revision_id = create_revision_response['Id']
    revision_arn = create_revision_response['Arn']

    # Used to store the Ids of the Jobs importing the assets to S3.
    job_ids = set()

    import_job = dataexchange.create_job(Type='IMPORT_ASSETS_FROM_S3',
                                         Details={
                                             'ImportAssetsFromS3': {
                                                 'DataSetId':
                                                 data_set_id,
                                                 'RevisionId':
                                                 revision_id,
                                                 'AssetSources': [{
                                                     'Bucket':
                                                     s3_bucket,
                                                     'Key':
                                                     new_s3_key
                                                 }]
                                             }
                                         })

    # Start the Job and save the JobId.
    dataexchange.start_job(JobId=import_job['Id'])
    job_ids.add(import_job['Id'])

    # Iterate until all remaining jobs have reached a terminal state, or an error is found.
    completed_jobs = set()

    while job_ids != completed_jobs:
        for job_id in job_ids:
            if job_id in completed_jobs:
                continue
            get_job_response = dataexchange.get_job(JobId=job_id)
            if get_job_response['State'] == 'COMPLETED':
                print("Job {} completed".format(job_id))
                completed_jobs.add(job_id)
            if get_job_response['State'] == 'ERROR':
                job_errors = get_job_response['Errors']
                raise Exception('JobId: {} failed with errors:\n{}'.format(
                    job_id, job_errors))
            # Sleep to ensure we don't get throttled by the GetJob API.
            time.sleep(0.2)

    update_revision_response = dataexchange.update_revision(
        DataSetId=data_set_id,
        RevisionId=revision_id,
        Comment=revision_comment,
        Finalized=True)

    revision_state = update_revision_response['Finalized']

    if revision_state == True:
        # Call AWSMarketplace Catalog's APIs to add revisions
        describe_entity_response = marketplace.describe_entity(
            Catalog='AWSMarketplace', EntityId=product_id)
        start_change_set_response = start_change_set(describe_entity_response,
                                                     revision_arn)
        if start_change_set_response['ChangeSetId']:
            return {
                'statusCode':
                200,
                'body':
                json.dumps(
                    'Revision updated successfully and added to the dataset')
            }
        else:
            return {
                'statusCode':
                500,
                'body':
                json.dumps(
                    'Something went wrong with AWSMarketplace Catalog API')
            }
    else:
        return {
            'statusCode': 500,
            'body': json.dumps('Revision did not complete successfully')
        }

예제 #4

0

파일 보기

if __name__ == '__main__':

    today = date.today().strftime('%Y-%m-%d')

    file1 = open('/Users/nfunke/iCoding/2021/rearc-data/credentials', 'r')
    lines = file1.readlines()

    REGION_NAME = lines[0].rstrip()
    AWS_SERVER_PUBLIC_KEY = lines[1].rstrip()
    AWS_SERVER_SECRET_KEY = lines[2].rstrip()
    RUN_LOCAL = True

    #    source_dataset_url = "https://pasteur.epa.gov/uploads/10.23719/1517796/SupplyChainEmissionFactorsforUSIndustriesCommodities.xlsx"
    #    df = pd.DataFrame.from_dict(pd.read_excel(source_dataset_url, sheet_name=None, engine='openpyxl'), orient='index', columns=['sheet','data'])
    #    df = pd.read_excel(source_dataset_url, sheet_name=None, engine='openpyxl')

    #for row in df.rows:
    #    print(row)

    #df.columns.values[0] = 'Sheet'
    #    for keys in df.keys():
    #        print (keys)
    #        df[keys].to_csv('/Users/nfunke/Temp/' %keys)
    #    isempty = df.empty

    asset_list = source_data.source_dataset(
        #"world-bank-cpi.csv", "norbert-adx-test2", AWS_SERVER_SECRET_KEY
        #"world-bank-cpi.csv", "rearc-data-provider", AWS_SERVER_SECRET_KEY
        #source_dataset_url = "http://www.who.int/entity/immunization/monitoring_surveillance/data/incidence_series.xls"
    )
    print(type(asset_list))

예제 #5

0

파일 보기

def lambda_handler(event, context):
    asset_list = source_dataset()

    if type(asset_list) == list:

        if len(asset_list) == 0:
            print(
                'No need for a revision, all datasets included with this product are current'
            )
            return {
                'statusCode':
                200,
                'body':
                json.dumps(
                    'No need for a revision, all datasets included with this product are current'
                )
            }

        create_revision_response = dataexchange.create_revision(
            DataSetId=data_set_id)
        revision_id = create_revision_response['Id']
        revision_arn = create_revision_response['Arn']

        print('Total assets to be uploaded', len(asset_list))

        start_index = 0
        total_jobs = math.floor(len(asset_list) / 100) + 1

        while (start_index < len(asset_list)):

            # Used to store the Ids of the Jobs importing the assets to S3.
            job_ids = set()

            end_index = len(asset_list) if len(asset_list) - \
                start_index < 100 else start_index + 100

            print('asset_list {}:'.format(math.floor(start_index / 100) + 1),
                  asset_list[start_index:end_index])

            import_job = dataexchange.create_job(
                Type='IMPORT_ASSETS_FROM_S3',
                Details={
                    'ImportAssetsFromS3': {
                        'DataSetId': data_set_id,
                        'RevisionId': revision_id,
                        'AssetSources': asset_list[start_index:end_index]
                    }
                })

            # Start the Job and save the JobId.
            dataexchange.start_job(JobId=import_job['Id'])
            job_ids.add(import_job['Id'])

            # Iterate until all remaining jobs have reached a terminal state, or an error is found.
            completed_jobs = set()

            while job_ids != completed_jobs:
                for job_id in job_ids:
                    if job_id in completed_jobs:
                        continue
                    get_job_response = dataexchange.get_job(JobId=job_id)
                    if get_job_response['State'] == 'COMPLETED':
                        print('JobId: {}, Job {} of {} completed'.format(
                            job_id,
                            math.floor(start_index / 100) + 1, total_jobs))
                        completed_jobs.add(job_id)
                    if get_job_response['State'] == 'ERROR':
                        job_errors = get_job_response['Errors']
                        raise Exception(
                            'JobId: {} failed with errors:\n{}'.format(
                                job_id, job_errors))
                    # Sleep to ensure we don't get throttled by the GetJob API.
                    time.sleep(0.2)

            start_index = start_index + 100

        update_revision_response = dataexchange.update_revision(
            DataSetId=data_set_id,
            RevisionId=revision_id,
            Comment=revision_comment,
            Finalized=True)

        revision_state = update_revision_response['Finalized']

        if revision_state == True:
            # Call AWSMarketplace Catalog's APIs to add revisions
            describe_entity_response = marketplace.describe_entity(
                Catalog='AWSMarketplace', EntityId=product_id)
            start_change_set_response = start_change_set(
                describe_entity_response, revision_arn)
            if start_change_set_response['ChangeSetId']:
                print('Revision updated successfully and added to the dataset')
                return {
                    'statusCode':
                    200,
                    'body':
                    json.dumps(
                        'Revision updated successfully and added to the dataset'
                    )
                }
            else:
                print('Something went wrong with AWSMarketplace Catalog API')
                return {
                    'statusCode':
                    500,
                    'body':
                    json.dumps(
                        'Something went wrong with AWSMarketplace Catalog API')
                }
        else:
            print('Revision did not complete successfully')
            return {
                'statusCode': 500,
                'body': json.dumps('Revision did not complete successfully')
            }
    else:
        raise Exception('Something went wrong when uploading files to s3')