Esempio n. 1
0
def generate_raw_uploads_for_processing(attempt_reprocessing, limit=None):
	count = 0
	for object in aws.list_all_objects_in(settings.S3_RAW_LOG_UPLOAD_BUCKET, prefix="raw"):
		key = object["Key"]
		if key.endswith(".log"):  # Don't queue the descriptor files, just the .logs
			raw_upload = RawUpload(settings.S3_RAW_LOG_UPLOAD_BUCKET, key)
			raw_upload.attempt_reprocessing = attempt_reprocessing
			yield raw_upload
			count += 1
			if limit and limit >= count:
				return
Esempio n. 2
0
def generate_raw_uploads_for_processing(attempt_reprocessing, limit=None):
	count = 0
	for object in aws.list_all_objects_in(settings.S3_RAW_LOG_UPLOAD_BUCKET, prefix="raw"):
		key = object["Key"]
		if key.endswith(".log"):  # Don't queue the descriptor files, just the .logs
			raw_upload = RawUpload(settings.S3_RAW_LOG_UPLOAD_BUCKET, key)
			raw_upload.attempt_reprocessing = attempt_reprocessing
			yield raw_upload
			count += 1
			if limit and limit >= count:
				return
def get_tracing_id(event):
	"""
	Returns the Authorization token as a unique identifier.
	Used in the Lambda logging system to trace sessions.
	"""
	UNKNOWN_ID = "unknown-id"
	records = event["Records"]

	if len(records) > 1:
		# This is a kinesis batch invocation
		return ":".join(r["kinesis"]["partitionKey"] for r in records)

	event_data = records[0]

	if "s3" in event_data:
		# We are in the process_s3_object Lambda
		s3_event = event_data["s3"]
		raw_upload = RawUpload.from_s3_event(s3_event)
		return raw_upload.shortid
	elif "kinesis" in event_data:
		kinesis_event = event_data["kinesis"]
		# We always use the shortid as the partitionKey in kinesis streams
		return kinesis_event["partitionKey"]

	return UNKNOWN_ID
def get_tracing_id(event):
    """
	Returns the Authorization token as a unique identifier.
	Used in the Lambda logging system to trace sessions.
	"""
    UNKNOWN_ID = "unknown-id"
    if "Records" in event:
        records = event["Records"]

        if len(records) > 1:
            # This is a kinesis batch invocation
            return ":".join(r["kinesis"]["partitionKey"] for r in records)

        event_data = records[0]

        if "s3" in event_data:
            # We are in the process_s3_object Lambda
            s3_event = event_data["s3"]
            raw_upload = RawUpload.from_s3_event(s3_event)
            return raw_upload.shortid
        elif "kinesis" in event_data:
            kinesis_event = event_data["kinesis"]
            # We always use the shortid as the partitionKey in kinesis streams
            return kinesis_event["partitionKey"]

    return UNKNOWN_ID
Esempio n. 5
0
def get_tracing_id(event):
	"""
	Returns the Authorization token as a unique identifier.
	Used in the Lambda logging system to trace sessions.
	"""
	UNKNOWN_ID = "unknown-id"
	event_data = event["Records"][0]

	if "Sns" in event_data:
		# We are in a lambda triggered via SNS
		message = json.loads(event_data["Sns"]["Message"])

		if "shortid" in message:
			# We are in a lambda to process a raw s3 upload
			return message["shortid"]
		elif "token" in message:
			# We are in a lambda for processing an upload event
			return message["token"]
		else:
			return UNKNOWN_ID

	elif "s3" in event_data:
		# We are in the process_s3_object Lambda
		s3_event = event_data['s3']
		raw_upload = RawUpload.from_s3_event(s3_event)
		return raw_upload.shortid
	else:

		return UNKNOWN_ID
Esempio n. 6
0
def queue_upload_event_for_reprocessing(event):
	if settings.ENV_AWS:
		raw_upload = RawUpload.from_upload_event(event)
		raw_upload.attempt_reprocessing = True
		aws.publish_raw_upload_to_processing_stream(raw_upload)
	else:
		logger.info("Processing UploadEvent %r locally", event)
		event.process()
Esempio n. 7
0
def queue_upload_event_for_reprocessing(event):
	if settings.ENV_AWS:
		raw_upload = RawUpload.from_upload_event(event)
		raw_upload.attempt_reprocessing = True
		aws.publish_raw_upload_to_processing_stream(raw_upload)
	else:
		logger.info("Processing UploadEvent %r locally", event)
		event.process()
Esempio n. 8
0
def process_raw_upload_sns_handler(event, context):
	"""
	A handler that subscribes to an SNS queue to support processing of raw log uploads.
	"""
	logger = logging.getLogger("hsreplaynet.lambdas.process_raw_upload_sns_handler")

	message = json.loads(event["Records"][0]["Sns"]["Message"])
	raw_upload = RawUpload.from_sns_message(message)
	logger.info("Processing a RawUpload from an SNS message: %s", str(raw_upload))
	process_raw_upload(raw_upload)
Esempio n. 9
0
def process_s3_create_handler(event, context):
	"""
	A handler that is triggered whenever a "..power.log" suffixed object is created in S3.
	"""
	logger = logging.getLogger("hsreplaynet.lambdas.process_s3_create_handler")

	s3_event = event["Records"][0]["s3"]
	raw_upload = RawUpload.from_s3_event(s3_event)
	logger.info("Processing a RawUpload from an S3 event: %s", str(raw_upload))
	process_raw_upload(raw_upload)
Esempio n. 10
0
def process_s3_create_handler(event, context):
    """
	A handler that is triggered whenever a "..power.log" suffixed object is created in S3.
	"""
    logger = logging.getLogger("hsreplaynet.lambdas.process_s3_create_handler")
    log_group_name = context.log_group_name
    log_stream_name = context.log_stream_name

    s3_event = event["Records"][0]["s3"]
    raw_upload = RawUpload.from_s3_event(s3_event)

    # This handler entry point should only fire for new raw log uploads
    reprocessing = False

    logger.info("S3 RawUpload: %r (reprocessing=%r)", raw_upload, reprocessing)
    process_raw_upload(raw_upload, reprocessing, log_group_name,
                       log_stream_name)
Esempio n. 11
0
def process_s3_create_handler(event, context):
	"""
	A handler that is triggered whenever a "..power.log" suffixed object is created in S3.
	"""
	logger = logging.getLogger("hsreplaynet.lambdas.process_s3_create_handler")
	log_group_name = context.log_group_name
	log_stream_name = context.log_stream_name

	s3_event = event["Records"][0]["s3"]
	raw_upload = RawUpload.from_s3_event(s3_event)

	# This handler entry point should only fire for new raw log uploads
	reprocessing = False

	logger.info(
		"S3 RawUpload: %r (reprocessing=%r)", raw_upload, reprocessing
	)
	process_raw_upload(raw_upload, reprocessing, log_group_name, log_stream_name)
Esempio n. 12
0
def process_single_replay_upload_stream_handler(event, context):
    """
	A handler that consumes single records from an AWS Kinesis stream.
	"""
    logger = logging.getLogger(
        "hsreplaynet.lambdas.process_single_replay_upload_stream_handler")
    log_group_name = context.log_group_name
    log_stream_name = context.log_stream_name

    kinesis_event = event["Records"][0]["kinesis"]
    raw_upload = RawUpload.from_kinesis_event(kinesis_event)

    # Reprocessing will only be True when the UploadEvent was scheduled via the Admin
    reprocessing = raw_upload.attempt_reprocessing

    logger.info("Kinesis RawUpload: %r (reprocessing=%r)", raw_upload,
                reprocessing)
    process_raw_upload(raw_upload, reprocessing, log_group_name,
                       log_stream_name)
Esempio n. 13
0
def process_single_replay_upload_stream_handler(event, context):
	"""
	A handler that consumes single records from an AWS Kinesis stream.
	"""
	logger = logging.getLogger(
		"hsreplaynet.lambdas.process_single_replay_upload_stream_handler"
	)
	log_group_name = context.log_group_name
	log_stream_name = context.log_stream_name

	kinesis_event = event["Records"][0]["kinesis"]
	raw_upload = RawUpload.from_kinesis_event(kinesis_event)

	# Reprocessing will only be True when the UploadEvent was scheduled via the Admin
	reprocessing = raw_upload.attempt_reprocessing

	logger.info(
		"Kinesis RawUpload: %r (reprocessing=%r)", raw_upload, reprocessing
	)
	process_raw_upload(raw_upload, reprocessing, log_group_name, log_stream_name)
Esempio n. 14
0
def get_shortid(event) -> str:
    """
	Returns the Authorization token as a unique identifier.
	Used in the Lambda logging system to trace sessions.
	"""
    if "Records" not in event:
        return ""

    event_data = event["Records"][0]

    if "s3" in event_data:
        from hsreplaynet.uploads.models import RawUpload
        s3_event = event_data["s3"]
        raw_upload = RawUpload.from_s3_event(s3_event)
        return raw_upload.shortid

    elif "kinesis" in event_data:
        kinesis_event = event_data["kinesis"]
        # We always use the shortid as the partitionKey in kinesis streams
        return kinesis_event["partitionKey"]

    return ""
Esempio n. 15
0
def _generate_raw_uploads_from_events(events):
	for event in events:
		raw_upload = RawUpload.from_upload_event(event)
		raw_upload.attempt_reprocessing = True
		yield raw_upload
Esempio n. 16
0
def _list_raw_uploads_by_prefix(prefix):
	for object in aws.list_all_objects_in(settings.S3_RAW_LOG_UPLOAD_BUCKET, prefix=prefix):
		key = object["Key"]
		if key.endswith(".log"):  # Just emit one message per power.log / canary.log
			yield RawUpload(settings.S3_RAW_LOG_UPLOAD_BUCKET, key)
Esempio n. 17
0
def _generate_raw_uploads_from_events(events):
	for event in events:
		raw_upload = RawUpload.from_upload_event(event)
		raw_upload.attempt_reprocessing = True
		yield raw_upload