def enqueue_msgs(fp):
    """Parse given messages and send to SQS queue.

    Args:
        fp (file-like-object): File-like-object containing a header and messages.
    """
    read_header = False
    msgs = []
    upload_queue = None
    lineNum = 0

    for line in fp:
        lineNum += 1
        if not read_header:
            header = json.loads(line)
            if 'upload_queue_url' not in header:
                raise KeyError('Expected upload_queue_url in header')
            if 'ingest_queue_url' not in header:
                raise KeyError('Expected ingest_queue_url in header')
            if 'job_id' not in header:
                raise KeyError('Expected job_id in header')
            read_header = True
            continue

        try:
            msgs.append(parse_line(header, line))
        except:
            print('Error parsing line {}: {}'.format(lineNum, line))

        if len(msgs) == 1 and upload_queue is None:
            # Instantiate the upload queue object.
            asDict = json.loads(msgs[0])
            boss_ingest_proj = BossIngestProj.fromTileKey(asDict['tile_key'])
            boss_ingest_proj.job_id = header['job_id']
            upload_queue = UploadQueue(boss_ingest_proj)
        if len(msgs) >= MAX_BATCH_MSGS:
            # Enqueue messages.
            upload_queue.sendBatchMessages(msgs)
            msgs = []

    if len(msgs) > 0:
        # Final enqueue messages of remaining messages.
        upload_queue.sendBatchMessages(msgs)
def enqueue_msgs(fp):
    """Parse given messages and send to SQS queue.

    Args:
        fp (file-like-object): File-like-object containing a header and messages.
    """
    read_header = False
    msgs = []
    upload_queue = None
    lineNum = 0

    for line in fp:
        lineNum += 1
        if not read_header:
            header = json.loads(line)
            if 'upload_queue_url' not in header:
                raise KeyError('Expected upload_queue_url in header')
            if 'ingest_queue_url' not in header:
                raise KeyError('Expected ingest_queue_url in header')
            if 'job_id' not in header:
                raise KeyError('Expected job_id in header')
            read_header = True
            continue

        try:
            msgs.append(parse_line(header, line))
        except:
            print('Error parsing line {}: {}'.format(lineNum, line))

        if len(msgs) == 1 and upload_queue is None:
            # Instantiate the upload queue object.
            asDict = json.loads(msgs[0])
            boss_ingest_proj = BossIngestProj.fromTileKey(asDict['tile_key'])
            boss_ingest_proj.job_id = header['job_id']
            upload_queue = UploadQueue(boss_ingest_proj)
        if len(msgs) >= MAX_BATCH_MSGS:
            # Enqueue messages.
            upload_queue.sendBatchMessages(msgs)
            msgs = []

    if len(msgs) > 0:
        # Final enqueue messages of remaining messages.
        upload_queue.sendBatchMessages(msgs)
SETTINGS = BossSettings.load()

# Parse input args passed as a JSON string from the lambda loader
json_event = sys.argv[1]
event = json.loads(json_event)
print(event)

# extract bucket name and tile key from the event
bucket = event['Records'][0]['s3']['bucket']['name']
tile_key = urllib.parse.unquote_plus(
    event['Records'][0]['s3']['object']['key'])
print("Bucket: {}".format(bucket))
print("Tile key: {}".format(tile_key))

# fetch metadata from the s3 object
proj_info = BossIngestProj.fromTileKey(tile_key)
tile_bucket = TileBucket(proj_info.project_name)
message_id, receipt_handle, metadata = tile_bucket.getMetadata(tile_key)
print("Metadata: {}".format(metadata))

# Currently this is what is sent from the client for the "metadata"
#  metadata = {'chunk_key': 'chunk_key',
#              'ingest_job': self.ingest_job_id,
#              'parameters': {"upload_queue": XX
#                             "ingest_queue": XX,
#                             "ingest_lambda":XX,
#                             "KVIO_SETTINGS": XX,
#                             "STATEIO_CONFIG": XX,
#                             "OBJECTIO_CONFIG": XX
#                             },
#              'tile_size_x': "{}".format(self.config.config_data["ingest_job"]["tile_size"]["x"]),
def process(msg, context, region):
    """
    Process a single message.

    Args:
        msg (dict): Contents described at the top of the file.
        context (Context): Lambda context object.
        region (str): Lambda execution region.
    """

    job_id = int(msg['ingest_job'])
    chunk_key = msg['chunk_key']
    tile_key = msg['tile_key']
    print("Tile key: {}".format(tile_key))

    proj_info = BossIngestProj.fromTileKey(tile_key)

    # Set the job id
    proj_info.job_id = msg['ingest_job']

    print("Data: {}".format(msg))

    # update value in the dynamo table
    tile_index_db = BossTileIndexDB(proj_info.project_name)
    chunk = tile_index_db.getCuboid(chunk_key, job_id)
    if chunk:
        if tile_index_db.cuboidReady(chunk_key, chunk["tile_uploaded_map"]):
            print("Chunk already has all its tiles: {}".format(chunk_key))
            # Go ahead and setup to fire another ingest lambda so this tile
            # entry will be deleted on successful execution of the ingest lambda.
            chunk_ready = True
        else:
            print("Updating tile index for chunk_key: {}".format(chunk_key))
            chunk_ready = tile_index_db.markTileAsUploaded(chunk_key, tile_key, job_id)
    else:
        # First tile in the chunk
        print("Creating first entry for chunk_key: {}".format(chunk_key))
        try:
            tile_index_db.createCuboidEntry(chunk_key, job_id)
        except ClientError as err:
            # Under _exceptional_ circumstances, it's possible for another lambda
            # to beat the current instance to creating the initial cuboid entry
            # in the index.
            error_code = err.response['Error'].get('Code', 'Unknown')
            if error_code == 'ConditionalCheckFailedException':
                print('Chunk key entry already created - proceeding.')
            else:
                raise
        chunk_ready = tile_index_db.markTileAsUploaded(chunk_key, tile_key, job_id)

    # ingest the chunk if we have all the tiles
    if chunk_ready:
        print("CHUNK READY SENDING MESSAGE: {}".format(chunk_key))
        # insert a new job in the insert queue if we have all the tiles
        ingest_queue = IngestQueue(proj_info)
        ingest_queue.sendMessage(json.dumps(msg))

        # Invoke Ingest lambda function
        names = AWSNames.create_from_lambda_name(context.function_name)
        lambda_client = boto3.client('lambda', region_name=region)
        lambda_client.invoke(
            FunctionName=names.tile_ingest_lambda,
            InvocationType='Event',
            Payload=json.dumps(msg).encode())
    else:
        print("Chunk not ready for ingest yet: {}".format(chunk_key))

    print("DONE!")
Beispiel #5
0
def process(msg, context, region):
    """
    Process a single message.

    Args:
        msg (dict): Contents described at the top of the file.
        context (Context): Lambda context object.
        region (str): Lambda execution region.
    """

    job_id = int(msg['ingest_job'])
    chunk_key = msg['chunk_key']
    tile_key = msg['tile_key']
    print("Tile key: {}".format(tile_key))

    proj_info = BossIngestProj.fromTileKey(tile_key)

    # Set the job id
    proj_info.job_id = msg['ingest_job']

    print("Data: {}".format(msg))

    # update value in the dynamo table
    tile_index_db = BossTileIndexDB(proj_info.project_name)
    chunk = tile_index_db.getCuboid(chunk_key, job_id)
    if chunk:
        if tile_index_db.cuboidReady(chunk_key, chunk["tile_uploaded_map"]):
            print("Chunk already has all its tiles: {}".format(chunk_key))
            # Go ahead and setup to fire another ingest lambda so this tile
            # entry will be deleted on successful execution of the ingest lambda.
            chunk_ready = True
        else:
            print("Updating tile index for chunk_key: {}".format(chunk_key))
            chunk_ready = tile_index_db.markTileAsUploaded(chunk_key, tile_key, job_id)
    else:
        # First tile in the chunk
        print("Creating first entry for chunk_key: {}".format(chunk_key))
        try:
            tile_index_db.createCuboidEntry(chunk_key, job_id)
        except ClientError as err:
            # Under _exceptional_ circumstances, it's possible for another lambda
            # to beat the current instance to creating the initial cuboid entry
            # in the index.
            error_code = err.response['Error'].get('Code', 'Unknown')
            if error_code == 'ConditionalCheckFailedException':
                print('Chunk key entry already created - proceeding.')
            else:
                raise
        chunk_ready = tile_index_db.markTileAsUploaded(chunk_key, tile_key, job_id)

    # ingest the chunk if we have all the tiles
    if chunk_ready:
        print("CHUNK READY SENDING MESSAGE: {}".format(chunk_key))
        # insert a new job in the insert queue if we have all the tiles
        ingest_queue = IngestQueue(proj_info)
        ingest_queue.sendMessage(json.dumps(msg))

        # Invoke Ingest lambda function
        names = AWSNames.from_lambda(context.function_name)
        lambda_client = boto3.client('lambda', region_name=region)
        lambda_client.invoke(
            FunctionName=names.tile_ingest.lambda_,
            InvocationType='Event',
            Payload=json.dumps(msg).encode())
    else:
        print("Chunk not ready for ingest yet: {}".format(chunk_key))

    print("DONE!")