def extract_logfile_from_s3(record):
    if 's3' in record:
        s3key = record['s3']['object']['key']
        logger.structure_logs(append=True, s3_key=s3key)
        logtype = utils.get_logtype_from_s3key(s3key, logtype_s3key_dict)
        logconfig = create_logconfig(logtype)
        logfile = siem.LogS3(record, logtype, logconfig, s3_client, sqs_queue)
    else:
        logger.error('invalid input data. exit')
        raise Exception('invalid input data. exit')
    return logfile
Example #2
0
def lambda_handler(event, context):
    for record in event['Records']:
        if 'body' in record:
            # from sqs-splitted-logs
            record = json.loads(record['body'])
        if 'kinesis' in record:
            logfile = siem.LogKinesis(record, etl_config)
        elif 's3' in record:
            s3 = boto3.client('s3', config=s3_session_config)
            logfile = siem.LogS3(record, etl_config, s3)
        else:
            raise Exception('ERROR[{0}]: invalid input data. exit'.format(
                os.getpid()))
        if logfile.ignore:
            print('WARN[{0}]: skipped because {1}'.format(
                os.getpid(), logfile.ignore))
            continue
        print('INFO[{0}]: {1}'.format(
            os.getpid(),
            logfile.startmsg,
        ))

        # ETL対象のログタイプのConfigだけに限定する
        logconfig = copy.copy(etl_config[logfile.logtype])
        # ESにPUTする
        size = 0
        results = False
        putdata_list = []
        for data in get_es_entry(logfile, logconfig, exclude_log_patterns):
            putdata_list.append(data)
            size += len(str(data))
            # es の http.max_content_length は t2 で10MB なのでデータがたまったらESにロード
            if isinstance(data, str) and size > 6000000:
                results = es_conn.bulk(putdata_list)
                check_es_results(results)
                size = 0
                putdata_list = []
        if size > 0:
            results = es_conn.bulk(putdata_list)
            check_es_results(results)
        elif not results:
            print('INFO[{0}]: No entries were successed to load'.format(
                os.getpid()))