Exemple #1
0
def delete_glue_database(database_name):
    try:
        glue_client.delete_database(Name=database_name)
        ul.log(
            info=f'Glue database {database_name} has been successfully deleted'
        )
    except Exception as exc:
        ul.log(error=exc)
Exemple #2
0
def create_glue_database(database_definition):
    try:
        glue_client.create_database(DatabaseInput=database_definition)
        ul.log(
            info=
            f'Glue database {database_definition["Name"]} has been successfully created'
        )
    except Exception as exc:
        ul.log(error=exc)
Exemple #3
0
def create_glue_table(database_name, table_definition):
    try:
        glue_client.create_table(DatabaseName=database_name,
                                 TableInput=table_definition)
        ul.log(
            info=
            f'Glue table {table_definition["Name"]} has been successfully created'
        )
    except Exception as exc:
        ul.log(error=exc)
Exemple #4
0
def create_error_vault_database_table_from_vault_database_table(
        vault_bucket, vault_database_name, vault_table_name):

    error_vault_database_name = get_error_vault_database_name(
        vault_database_name)
    error_vault_table_name = get_error_table_name(vault_table_name)

    try:
        if get_glue_database_definition(error_vault_database_name) is None:
            create_error_vault_database_from_vault_database(
                vault_database_name)

        response = get_glue_table_definition(vault_database_name,
                                             vault_table_name)

        response['Table']['Name'] = error_vault_table_name
        response['Table']['StorageDescriptor'][
            'Location'] = get_error_table_s3_uri(vault_bucket,
                                                 vault_table_name)
        columns = [{
            'Name': 'error_load_ts',
            'Type': 'timestamp',
            'Comment': 'loading timestamp of error rows'
        }, {
            'Name': 'error_column',
            'Type': 'string',
            'Comment': 'column name which has error value '
        }, {
            'Name': 'error_value',
            'Type': 'string',
            'Comment': 'error value '
        }, {
            'Name': 'error_description',
            'Type': 'string',
            'Comment': 'error description for rejected rows'
        }, {
            'Name': 'stage_path',
            'Type': 'string',
            'Comment': 'data source'
        }]
        for column in columns:
            response['Table']['StorageDescriptor']['Columns'].append(column)
        keys_to_remove = [
            'DatabaseName', 'CreateTime', 'UpdateTime', 'CreatedBy',
            'IsRegisteredWithLakeFormation', 'CatalogId'
        ]
        for key in keys_to_remove:
            del response['Table'][key]
        # print('new_table_definition: ', response['Table'])

        create_glue_table(error_vault_database_name, response['Table'])
    except Exception as exc:
        ul.log(error=exc)
Exemple #5
0
def get_s3_event_bucket_and_key(event):
    """ get_event_bucket_and_path 
        return a tuple with bucket and key from the event
    """
    ul.log(event=event)
    if event['Records'][0]['EventSource'] == 'aws:sns':
        event = json.loads(event['Records'][0]['Sns']['Message'])
        ul.log(event=event)
    result = jmespath.search(
        'Records[*].s3.{ Bucket : bucket.name, Key : object.key }|[0]', event)
    # ul.log(result=result)
    return (result["Bucket"], result['Key'])
Exemple #6
0
def send_msg_list_to_sqs(queue_url, msg_list):
    """ send_msg_list_to_sqs
        sends a list of messages to sqs queue breaking the 
        list into batches of suitable size
    """
    assert isinstance(msg_list, Iterable)

    items_per_batch = calc_avg_items_per_batch(
        item_count=len(msg_list),
        batch_size=get_batch_bytes(),
        total_size=size_in_json(msg_list))
    ul.log(avg_items_per_batch=items_per_batch, queue_url=queue_url)
    for batch in generate_json_item_batches(msg_list, items_per_batch,
                                            get_batch_bytes()):
        send_msg_obj_to_sqs(queue_url, batch)
Exemple #7
0
def create_error_vault_database_from_vault_database(vault_database_name):

    error_vault_database_name = get_error_vault_database_name(
        vault_database_name)

    if get_glue_database_definition(error_vault_database_name) is None:
        try:
            response = get_glue_database_definition(vault_database_name)

            response['Database']['Name'] = error_vault_database_name
            response['Database']['Description'] = 'Data Lake Errors'
            keys_to_remove = ['CreateTime', 'CatalogId']
            for key in keys_to_remove:
                del response['Database'][key]
            # print('vault_database_definition: ', response['Database'])

            create_glue_database(response['Database'])
        except Exception as exc:
            ul.log(error=exc)
    else:
        ul.log(error=f'{error_vault_database_name} already exists')
Exemple #8
0
def send_batches_to_sqs(queue_url, batches):
    """ send_batches_to_sqs
        sends each batch in baches to sqs queue
        clearly requires batches to be split into suitable size for sqs already
    """
    ul.log(status='starting send batches', queue_url=queue_url)
    for batch_id, batch in enumerate(batches):
        ul.log(batch=batch_id)
        response = send_msg_obj_to_sqs(
            queue_url=queue_url,
            msg_obj=batch,
        )
        if response is not None:
            ul.log(
                Error='Failed to send batch to queue',
                batch_id=batch_id,
                httpstatuscode=response["ResponseMetadata"]["HTTPStatusCode"],
                queue_url=queue_url,
                response=response,
                message=batch)
Exemple #9
0
def delete_glue_table(database_name, table_name):
    try:
        glue_client.delete_table(DatabaseName=database_name, Name=table_name)
        ul.log(info=f'Glue table {table_name} has been successfully deleted')
    except Exception as exc:
        ul.log(error=exc)