Beispiel #1
0
def handler(event, context):
    print "Start Glue"  
    stack_id = event[c.ENV_STACK_ID]
    resources = util.get_stack_resources(stack_id)  
    request_type = event['RequestType']
    db_name = athena.get_database_name(stack_id, False) 
    glue = Glue()  

    for resource in resources:        
        if resource.logical_id == c.RES_SERVICE_ROLE:
           role_name = resource.physical_id
        if resource.logical_id == c.RES_S3_STORAGE:
           storage_physical_id = resource.physical_id
    
    if role_name is None:
        raise errors.ClientError("The logical resource '{}' was not found.  Is the resource in the cloud formation stack?".format(c.RES_SERVICE_ROLE))   

    if storage_physical_id is None:
        raise errors.ClientError("The logical resource '{}' was not found.  Is the resource in the cloud formation stack?".format(c.RES_S3_STORAGE))           
    crawler_id_1 =  glue.get_crawler_name(stack_id)    
    srcs = [
                {
                    'Path': "{}/{}{}".format(storage_physical_id, "table=", DEFAULT_EVENTS.CLIENTINITCOMPLETE),
                    'Exclusions': []
                },
                {
                    'Path': "{}/{}{}".format(storage_physical_id, "table=", DEFAULT_EVENTS.SESSIONSTART),
                    'Exclusions': []
                }
            ]
      

    print request_type, db_name, crawler_id_1, "role: ", role_name, "s3: ", storage_physical_id
    if request_type.lower() == 'delete':
        if glue.get_crawler(crawler_id_1) is not None:       
            glue.stop_crawler(crawler_id_1) 
            glue.delete_crawler(crawler_id_1)

        if glue.database_exists(db_name):
            glue.delete_database(db_name)
    elif request_type.lower() == 'create':   
        if not glue.database_exists(db_name):
            glue.create_database(db_name)

        if glue.get_crawler(crawler_id_1) is None:
            glue.create_crawler(crawler_id_1, role_name, db_name, athena.get_table_prefix(stack_id), srcs=srcs )

    else:                   
        if glue.get_crawler(crawler_id_1) is None:
            glue.create_crawler(crawler_id_1, role_name, db_name, athena.get_table_prefix(stack_id), srcs=srcs )
        else:
            glue.stop_crawler(crawler_id_1) 
            glue.update_crawler(crawler_id_1, role_name, db_name, athena.get_table_prefix(stack_id) )
        
    return custom_resource_response.success_response({}, "*")
Beispiel #2
0
def update_glue_crawler_datastores(context, datastores):
    global glue_crawler_response
    glue = Glue()
    crawler_name = glue.get_crawler_name(context[c.KEY_LAMBDA_FUNCTION])
    if not glue_crawler_response:
        glue_crawler_response = glue.get_crawler(crawler_name)
    if glue_crawler_response is not None:
        bucket = "s3://{}/".format(os.environ[c.RES_S3_STORAGE])
        path_format = "s3://{}/{}".format(os.environ[c.RES_S3_STORAGE], "{}")
        srcs = []
        if len(glue_crawler_response['Crawler']['Targets']['S3Targets']) > 0:
            for s3target in glue_crawler_response['Crawler']['Targets'][
                    'S3Targets']:
                table = s3target['Path'].replace(bucket, '').lower()
                if table in datastores:
                    del datastores[table]
                srcs.append(s3target)

        if len(datastores) == 0:
            return

        for table in datastores:
            srcs.append({'Path': path_format.format(table), 'Exclusions': []})
        print "Defining GLUE datastores"
        db_name = athena.get_database_name(os.environ[c.ENV_S3_STORAGE])
        table_prefix = athena.get_table_prefix(os.environ[c.ENV_S3_STORAGE])
        glue.update_crawler(crawler_name,
                            os.environ[c.ENV_SERVICE_ROLE],
                            db_name,
                            table_prefix,
                            srcs=srcs)
Beispiel #3
0
def get_crawler_status(request, name):
    glue_crawler = Glue()    
    response = glue_crawler.get_crawler(name.replace('-', '_'))    
    return custom_resource_response.success_response({ "State": response['Crawler']['State']}, "*")