Example #1
0
def parse_log_files():
    """Parse S3 log files that reside in an S3 bucket

    The contents of BUCKET_NAME are iterated over. Already parsed files have 
    their filename added to PARSED_FILES to prevent duplicate parsing.
    """
       
    aws_access_key = os.environ.get('S3_LOG_ACCESS_KEY')
    aws_secret_key = os.environ.get('S3_LOG_SECRET_KEY')
    bucket_name = os.environ.get('S3_LOG_BUCKET')
    prefix = os.environ.get('S3_LOG_PREFIX')
    
    conn = S3Connection(aws_access_key, aws_secret_key)
    bucket = conn.get_bucket(bucket_name)
    file_count = 0
    for key in bucket.list(prefix=prefix):
        try:
            log_file = LogFile.objects.get(key=key.key)
            if (log_file.parsed is False) and (log_file.lock is False):
                log_file.lock = True
                log_file.save()
            else:
                continue
        except ObjectDoesNotExist:
            log_file = LogFile(key=key.key, parsed=False, lock=True)
            log_file.save()
        
        contents = str(key.get_contents_as_string())
        parse_str(contents)
        
        log_file.parsed = True
        log_file.lock = False
        log_file.save()
Example #2
0
def parse_log_files_locally(path_to_logs):
    """Parse S3 log files that are local

    Intended to be run manually, has no provisions for locking that the 
    normal task has.
    """
    for log in os.listdir(path_to_logs):
        try:
            key = "logs/" + log
            log_file = LogFile.objects.get(key=key)
        except ObjectDoesNotExist:
            contents = open(path_to_logs + log, 'r').read()
            key = "logs/" + log
            print(key)
            parse_str(contents)
            log_file = LogFile(key=key, parsed=True, lock=False)
            log_file.save()