def test_pre_parse_s3(): """Pre-Parse S3 Test""" region = 'us-east-1' bucket_name = 'test_bucket' key_name = 'test_key' body_value = 'this is a value for the object' raw_record = { 'awsRegion': region, 's3': { 'bucket': { 'name': bucket_name }, 'object': { 'key': key_name, 'size': 30 } } } s3_resource = boto3.resource('s3', region_name=region) s3_resource.create_bucket(Bucket=bucket_name) obj = s3_resource.Object(bucket_name, key_name) obj.put(Body=body_value) s3_file, size = StreamPreParsers.pre_parse_s3(raw_record) data = StreamPreParsers.read_s3_file(s3_file).next() assert_equal(body_value, data) assert_equal(size, 30)
def s3_process(self, payload, classifier): """Process S3 data for alerts""" s3_file_lines = StreamPreParsers.pre_parse_s3(payload.raw_record) for line in s3_file_lines: data = line.rstrip() payload.refresh_record(data) classifier.classify_record(payload, data) self.process_alerts(payload)
def _s3_process(self, payload, classifier): """Process S3 data for alerts""" s3_file, s3_object_size = StreamPreParsers.pre_parse_s3(payload.raw_record) count, processed_size = 0, 0 for data in StreamPreParsers.read_s3_file(s3_file): payload.refresh_record(data) self._process_alerts(classifier, payload, data) # Add the current data to the total processed size, +1 to account for line feed processed_size += (len(data) + 1) count += 1 # Log an info message on every 100 lines processed if count % 100 == 0: avg_record_size = ((processed_size - 1) / count) approx_record_count = s3_object_size / avg_record_size LOGGER.info('Processed %s records out of an approximate total of %s ' '(average record size: %s bytes, total size: %s bytes)', count, approx_record_count, avg_record_size, s3_object_size)