Example #1
0
#!/usr/bin/env python
import sys
sys.path.append('../')
from logparser.LogCluster import *
from logparser.LogCluster import LogCluster

input_dir = '../logs/HDFS/'  # The input directory of log file
output_dir = 'LogCluster_result/'  # The output directory of parsing results
log_file = 'HDFS_2k.log'  # The input log file name
log_format = '<Date> <Time> <Pid> <Level> <Component>: <Content>'  # HDFS log format
rsupport = 10  # The minimum threshold of relative support, 10 denotes 10%
regex = []  # Regular expression list for optional preprocessing (default: [])

parser = LogCluster.LogParser(input_dir,
                              log_format,
                              output_dir,
                              rsupport=rsupport)
parser.parse(log_file)
Example #2
0
        'log_format':
        '<Month>  <Date> <Time> <User> <Component>\[<PID>\]( \(<Address>\))?: <Content>',
        'regex': [r'([\w-]+\.){2,}[\w-]+'],
        'rsupport': 0.2,
    },
}

bechmark_result = []
for dataset, setting in benchmark_settings.iteritems():
    print('\n=== Evaluation on %s ===' % dataset)
    indir = os.path.join(input_dir, os.path.dirname(setting['log_file']))
    log_file = os.path.basename(setting['log_file'])

    parser = LogCluster.LogParser(indir,
                                  setting['log_format'],
                                  output_dir,
                                  rex=setting['regex'],
                                  rsupport=setting['rsupport'])
    parser.parse(log_file)

    F1_measure, accuracy = evaluator.evaluate(
        groundtruth=os.path.join(indir, log_file + '_structured.csv'),
        parsedresult=os.path.join(output_dir, log_file + '_structured.csv'))
    bechmark_result.append([dataset, F1_measure, accuracy])

print('\n=== Overall evaluation results ===')
df_result = pd.DataFrame(bechmark_result,
                         columns=['Dataset', 'F1_measure', 'Accuracy'])
df_result.set_index('Dataset', inplace=True)
print(df_result)
df_result.T.to_csv('LogCluster_bechmark_result.csv')