def parse_log_files(): """Parse S3 log files that reside in an S3 bucket The contents of BUCKET_NAME are iterated over. Already parsed files have their filename added to PARSED_FILES to prevent duplicate parsing. """ aws_access_key = os.environ.get('S3_LOG_ACCESS_KEY') aws_secret_key = os.environ.get('S3_LOG_SECRET_KEY') bucket_name = os.environ.get('S3_LOG_BUCKET') prefix = os.environ.get('S3_LOG_PREFIX') conn = S3Connection(aws_access_key, aws_secret_key) bucket = conn.get_bucket(bucket_name) file_count = 0 for key in bucket.list(prefix=prefix): try: log_file = LogFile.objects.get(key=key.key) if (log_file.parsed is False) and (log_file.lock is False): log_file.lock = True log_file.save() else: continue except ObjectDoesNotExist: log_file = LogFile(key=key.key, parsed=False, lock=True) log_file.save() contents = str(key.get_contents_as_string()) parse_str(contents) log_file.parsed = True log_file.lock = False log_file.save()