def get_reaping_inventory_for_date(date): descriptors = defaultdict(lambda: defaultdict(lambda: defaultdict(dict))) key_prefix = date.strftime("raw/%Y/%m/%d") for object in aws.list_all_objects_in( settings.S3_RAW_LOG_UPLOAD_BUCKET, prefix=key_prefix ): key = object["Key"] if key.endswith("descriptor.json"): match = re.match(RawUpload.DESCRIPTOR_KEY_PATTERN, key) fields = match.groupdict() shortid = fields["shortid"] timestamp = datetime.strptime(fields["ts"], RawUpload.TIMESTAMP_FORMAT) descriptors[timestamp.hour][timestamp.minute][shortid]["descriptor"] = key else: match = re.match(RawUpload.RAW_LOG_KEY_PATTERN, key) fields = match.groupdict() shortid = fields["shortid"] timestamp = datetime.strptime(fields["ts"], RawUpload.TIMESTAMP_FORMAT) descriptors[timestamp.hour][timestamp.minute][shortid]["log"] = key return descriptors
def get_reaping_inventory_for_date(date): descriptors = defaultdict(lambda: defaultdict(lambda: defaultdict(dict))) key_prefix = date.strftime("raw/%Y/%m/%d") for object in aws.list_all_objects_in(settings.S3_RAW_LOG_UPLOAD_BUCKET, prefix=key_prefix): key = object["Key"] if key.endswith("descriptor.json"): match = re.match(RawUpload.DESCRIPTOR_KEY_PATTERN, key) fields = match.groupdict() shortid = fields["shortid"] timestamp = datetime.strptime(fields["ts"], RawUpload.TIMESTAMP_FORMAT) descriptors[timestamp.hour][ timestamp.minute][shortid]["descriptor"] = key else: match = re.match(RawUpload.RAW_LOG_KEY_PATTERN, key) fields = match.groupdict() shortid = fields["shortid"] timestamp = datetime.strptime(fields["ts"], RawUpload.TIMESTAMP_FORMAT) descriptors[timestamp.hour][timestamp.minute][shortid]["log"] = key return descriptors
def queue_raw_uploads_for_processing(): """ Queue all raw logs to attempt processing them into UploadEvents. The primary use for this is for when we deploy code. The intended deploy process is: - Notify S3 to suspend triggering lambda upon log upload - Perform the Deploy - Notify S3 to resume triggering lambda upon log upload - Invoke this function to queue for processing any logs uploaded during the deploy This method is not intended to requeue uploads that have previously failed. For that see the requeue_failed_* family of methods. """ logger.info("Starting - Queue all raw uploads for processing") topic_arn = aws.get_sns_topic_arn_from_name(settings.SNS_PROCESS_RAW_LOG_UPOAD_TOPIC) if topic_arn is None: raise Exception("A Topic for queueing raw uploads is not configured.") for object in aws.list_all_objects_in(settings.S3_RAW_LOG_UPLOAD_BUCKET, prefix="raw"): key = object["Key"] if key.endswith("power.log"): # Don't queue the descriptor files, just the logs. raw_upload = RawUpload(settings.S3_RAW_LOG_UPLOAD_BUCKET, key) logger.info("About to queue: %s" % str(raw_upload)) aws.publish_sns_message(topic_arn, raw_upload.sns_message)
def generate_raw_uploads_for_processing(attempt_reprocessing, limit=None): count = 0 for object in aws.list_all_objects_in(settings.S3_RAW_LOG_UPLOAD_BUCKET, prefix="raw"): key = object["Key"] if key.endswith(".log"): # Don't queue the descriptor files, just the .logs raw_upload = RawUpload(settings.S3_RAW_LOG_UPLOAD_BUCKET, key) raw_upload.attempt_reprocessing = attempt_reprocessing yield raw_upload count += 1 if limit and limit >= count: return
def _list_raw_uploads_by_prefix(prefix): for object in aws.list_all_objects_in(settings.S3_RAW_LOG_UPLOAD_BUCKET, prefix=prefix): key = object["Key"] if key.endswith(".log"): # Just emit one message per power.log / canary.log yield RawUpload(settings.S3_RAW_LOG_UPLOAD_BUCKET, key)
def _list_raw_uploads_by_prefix(prefix): for object in aws.list_all_objects_in(settings.S3_RAW_LOG_UPLOAD_BUCKET, prefix=prefix): key = object["Key"] if key.endswith("power.log"): # Just emit one message per power.log yield RawUpload(settings.S3_RAW_LOG_UPLOAD_BUCKET, key)