def extract_logfile_from_s3(record): if 's3' in record: s3key = record['s3']['object']['key'] logger.structure_logs(append=True, s3_key=s3key) logtype = utils.get_logtype_from_s3key(s3key, logtype_s3key_dict) logconfig = create_logconfig(logtype) logfile = siem.LogS3(record, logtype, logconfig, s3_client, sqs_queue) else: logger.error('invalid input data. exit') raise Exception('invalid input data. exit') return logfile
def lambda_handler(event, context): for record in event['Records']: if 'body' in record: # from sqs-splitted-logs record = json.loads(record['body']) if 'kinesis' in record: logfile = siem.LogKinesis(record, etl_config) elif 's3' in record: s3 = boto3.client('s3', config=s3_session_config) logfile = siem.LogS3(record, etl_config, s3) else: raise Exception('ERROR[{0}]: invalid input data. exit'.format( os.getpid())) if logfile.ignore: print('WARN[{0}]: skipped because {1}'.format( os.getpid(), logfile.ignore)) continue print('INFO[{0}]: {1}'.format( os.getpid(), logfile.startmsg, )) # ETL対象のログタイプのConfigだけに限定する logconfig = copy.copy(etl_config[logfile.logtype]) # ESにPUTする size = 0 results = False putdata_list = [] for data in get_es_entry(logfile, logconfig, exclude_log_patterns): putdata_list.append(data) size += len(str(data)) # es の http.max_content_length は t2 で10MB なのでデータがたまったらESにロード if isinstance(data, str) and size > 6000000: results = es_conn.bulk(putdata_list) check_es_results(results) size = 0 putdata_list = [] if size > 0: results = es_conn.bulk(putdata_list) check_es_results(results) elif not results: print('INFO[{0}]: No entries were successed to load'.format( os.getpid()))