'log_format':
        '<Month>  <Date> <Time> <User> <Component>\[<PID>\]( \(<Address>\))?: <Content>',
        'regex': [r'([\w-]+\.){2,}[\w-]+'],
        'groupNum': 250
    },
}

benchmark_result = []
for dataset, setting in benchmark_settings.iteritems():
    print('\n=== Evaluation on %s ===' % dataset)
    indir = os.path.join(input_dir, os.path.dirname(setting['log_file']))
    log_file = os.path.basename(setting['log_file'])

    parser = LogSig.LogParser(log_format=setting['log_format'],
                              indir=indir,
                              outdir=output_dir,
                              rex=setting['regex'],
                              groupNum=setting['groupNum'])
    parser.parse(log_file)

    F1_measure, accuracy = evaluator.evaluate(
        groundtruth=os.path.join(indir, log_file + '_structured.csv'),
        parsedresult=os.path.join(output_dir, log_file + '_structured.csv'))
    benchmark_result.append([dataset, F1_measure, accuracy])

print('\n=== Overall evaluation results ===')
df_result = pd.DataFrame(benchmark_result,
                         columns=['Dataset', 'F1_measure', 'Accuracy'])
df_result.set_index('Dataset', inplace=True)
print(df_result)
df_result.T.to_csv('LogSig_benchmark_result.csv')
Exemple #2
0
#!/usr/bin/env python

import sys
sys.path.append('../')
from logparser import LogSig

input_dir    = '../logs/HDFS/' # The input directory of log file
output_dir   = 'LogSig_result/' # The output directory of parsing results
log_file     = 'HDFS_2k.log' # The input log file name
log_format   = '<Date> <Time> <Pid> <Level> <Component>: <Content>' # HDFS log format
regex        = []  # Regular expression list for optional preprocessing (default: [])
group_number = 14 # The number of message groups to partition

parser = LogSig.LogParser(input_dir, output_dir, group_number, log_format, rex=regex)
parser.parse(log_file)