def generate_raw_uploads_for_processing(attempt_reprocessing, limit=None): count = 0 for object in aws.list_all_objects_in(settings.S3_RAW_LOG_UPLOAD_BUCKET, prefix="raw"): key = object["Key"] if key.endswith(".log"): # Don't queue the descriptor files, just the .logs raw_upload = RawUpload(settings.S3_RAW_LOG_UPLOAD_BUCKET, key) raw_upload.attempt_reprocessing = attempt_reprocessing yield raw_upload count += 1 if limit and limit >= count: return
def get_tracing_id(event): """ Returns the Authorization token as a unique identifier. Used in the Lambda logging system to trace sessions. """ UNKNOWN_ID = "unknown-id" records = event["Records"] if len(records) > 1: # This is a kinesis batch invocation return ":".join(r["kinesis"]["partitionKey"] for r in records) event_data = records[0] if "s3" in event_data: # We are in the process_s3_object Lambda s3_event = event_data["s3"] raw_upload = RawUpload.from_s3_event(s3_event) return raw_upload.shortid elif "kinesis" in event_data: kinesis_event = event_data["kinesis"] # We always use the shortid as the partitionKey in kinesis streams return kinesis_event["partitionKey"] return UNKNOWN_ID
def get_tracing_id(event): """ Returns the Authorization token as a unique identifier. Used in the Lambda logging system to trace sessions. """ UNKNOWN_ID = "unknown-id" if "Records" in event: records = event["Records"] if len(records) > 1: # This is a kinesis batch invocation return ":".join(r["kinesis"]["partitionKey"] for r in records) event_data = records[0] if "s3" in event_data: # We are in the process_s3_object Lambda s3_event = event_data["s3"] raw_upload = RawUpload.from_s3_event(s3_event) return raw_upload.shortid elif "kinesis" in event_data: kinesis_event = event_data["kinesis"] # We always use the shortid as the partitionKey in kinesis streams return kinesis_event["partitionKey"] return UNKNOWN_ID
def get_tracing_id(event): """ Returns the Authorization token as a unique identifier. Used in the Lambda logging system to trace sessions. """ UNKNOWN_ID = "unknown-id" event_data = event["Records"][0] if "Sns" in event_data: # We are in a lambda triggered via SNS message = json.loads(event_data["Sns"]["Message"]) if "shortid" in message: # We are in a lambda to process a raw s3 upload return message["shortid"] elif "token" in message: # We are in a lambda for processing an upload event return message["token"] else: return UNKNOWN_ID elif "s3" in event_data: # We are in the process_s3_object Lambda s3_event = event_data['s3'] raw_upload = RawUpload.from_s3_event(s3_event) return raw_upload.shortid else: return UNKNOWN_ID
def queue_upload_event_for_reprocessing(event): if settings.ENV_AWS: raw_upload = RawUpload.from_upload_event(event) raw_upload.attempt_reprocessing = True aws.publish_raw_upload_to_processing_stream(raw_upload) else: logger.info("Processing UploadEvent %r locally", event) event.process()
def process_raw_upload_sns_handler(event, context): """ A handler that subscribes to an SNS queue to support processing of raw log uploads. """ logger = logging.getLogger("hsreplaynet.lambdas.process_raw_upload_sns_handler") message = json.loads(event["Records"][0]["Sns"]["Message"]) raw_upload = RawUpload.from_sns_message(message) logger.info("Processing a RawUpload from an SNS message: %s", str(raw_upload)) process_raw_upload(raw_upload)
def process_s3_create_handler(event, context): """ A handler that is triggered whenever a "..power.log" suffixed object is created in S3. """ logger = logging.getLogger("hsreplaynet.lambdas.process_s3_create_handler") s3_event = event["Records"][0]["s3"] raw_upload = RawUpload.from_s3_event(s3_event) logger.info("Processing a RawUpload from an S3 event: %s", str(raw_upload)) process_raw_upload(raw_upload)
def process_s3_create_handler(event, context): """ A handler that is triggered whenever a "..power.log" suffixed object is created in S3. """ logger = logging.getLogger("hsreplaynet.lambdas.process_s3_create_handler") log_group_name = context.log_group_name log_stream_name = context.log_stream_name s3_event = event["Records"][0]["s3"] raw_upload = RawUpload.from_s3_event(s3_event) # This handler entry point should only fire for new raw log uploads reprocessing = False logger.info("S3 RawUpload: %r (reprocessing=%r)", raw_upload, reprocessing) process_raw_upload(raw_upload, reprocessing, log_group_name, log_stream_name)
def process_s3_create_handler(event, context): """ A handler that is triggered whenever a "..power.log" suffixed object is created in S3. """ logger = logging.getLogger("hsreplaynet.lambdas.process_s3_create_handler") log_group_name = context.log_group_name log_stream_name = context.log_stream_name s3_event = event["Records"][0]["s3"] raw_upload = RawUpload.from_s3_event(s3_event) # This handler entry point should only fire for new raw log uploads reprocessing = False logger.info( "S3 RawUpload: %r (reprocessing=%r)", raw_upload, reprocessing ) process_raw_upload(raw_upload, reprocessing, log_group_name, log_stream_name)
def process_single_replay_upload_stream_handler(event, context): """ A handler that consumes single records from an AWS Kinesis stream. """ logger = logging.getLogger( "hsreplaynet.lambdas.process_single_replay_upload_stream_handler") log_group_name = context.log_group_name log_stream_name = context.log_stream_name kinesis_event = event["Records"][0]["kinesis"] raw_upload = RawUpload.from_kinesis_event(kinesis_event) # Reprocessing will only be True when the UploadEvent was scheduled via the Admin reprocessing = raw_upload.attempt_reprocessing logger.info("Kinesis RawUpload: %r (reprocessing=%r)", raw_upload, reprocessing) process_raw_upload(raw_upload, reprocessing, log_group_name, log_stream_name)
def process_single_replay_upload_stream_handler(event, context): """ A handler that consumes single records from an AWS Kinesis stream. """ logger = logging.getLogger( "hsreplaynet.lambdas.process_single_replay_upload_stream_handler" ) log_group_name = context.log_group_name log_stream_name = context.log_stream_name kinesis_event = event["Records"][0]["kinesis"] raw_upload = RawUpload.from_kinesis_event(kinesis_event) # Reprocessing will only be True when the UploadEvent was scheduled via the Admin reprocessing = raw_upload.attempt_reprocessing logger.info( "Kinesis RawUpload: %r (reprocessing=%r)", raw_upload, reprocessing ) process_raw_upload(raw_upload, reprocessing, log_group_name, log_stream_name)
def get_shortid(event) -> str: """ Returns the Authorization token as a unique identifier. Used in the Lambda logging system to trace sessions. """ if "Records" not in event: return "" event_data = event["Records"][0] if "s3" in event_data: from hsreplaynet.uploads.models import RawUpload s3_event = event_data["s3"] raw_upload = RawUpload.from_s3_event(s3_event) return raw_upload.shortid elif "kinesis" in event_data: kinesis_event = event_data["kinesis"] # We always use the shortid as the partitionKey in kinesis streams return kinesis_event["partitionKey"] return ""
def _generate_raw_uploads_from_events(events): for event in events: raw_upload = RawUpload.from_upload_event(event) raw_upload.attempt_reprocessing = True yield raw_upload
def _list_raw_uploads_by_prefix(prefix): for object in aws.list_all_objects_in(settings.S3_RAW_LOG_UPLOAD_BUCKET, prefix=prefix): key = object["Key"] if key.endswith(".log"): # Just emit one message per power.log / canary.log yield RawUpload(settings.S3_RAW_LOG_UPLOAD_BUCKET, key)