import pandas as pd from benchmark.AEL_benchmark import benchmark_settings from logparser import AEL from File_Info import get_file_size dataset = 'BGL' output_dir = 'AEL_result/' # The output directory of parsing results one_setting = benchmark_settings[dataset] log_file = os.path.basename(one_setting['log_file']) input_dir = os.path.join('../logs/', os.path.dirname(one_setting['log_file'])) file_size = get_file_size(dataset) results = collections.defaultdict(list) for size, file in file_size.items(): parser = AEL.LogParser( input_dir, output_dir, log_format=one_setting['log_format'], rex=one_setting['regex'], minEventCount=one_setting['minEventCount'], merge_percent=one_setting['merge_percent'], keep_para=False ) time_elapsed = parser.parse(file) results['size'].append(size) results['time'].append(time_elapsed.total_seconds()) print(results['time']) pd.DataFrame(results).to_csv('./AEL_%s.csv' % dataset)
'regex': [r'(/|)(\d+\.){3}\d+(:\d+)?'], 'minEventCount': 2, 'merge_percent': 0.4 }, } if __name__ == '__main__': benchmark_result = [] for dataset, setting in benchmark_settings.items(): print('\n=== Evaluation on %s ===' % dataset) indir = os.path.join(input_dir, os.path.dirname(setting['log_file'])) log_file = os.path.basename(setting['log_file']) parser = AEL.LogParser(log_format=setting['log_format'], indir=indir, outdir=output_dir, minEventCount=setting['minEventCount'], merge_percent=setting['merge_percent'], rex=setting['regex']) time_elapsed = parser.parse(log_file) F1_measure, accuracy = evaluator.evaluate( groundtruth=os.path.join(indir, log_file + '_structured.csv'), parsedresult=os.path.join(output_dir, log_file + '_structured.csv')) benchmark_result.append( [dataset, F1_measure, accuracy, time_elapsed.total_seconds()]) print('\n=== Overall evaluation results ===') df_result = pd.DataFrame( benchmark_result,
#!/usr/bin/env python import sys sys.path.append('../') from logparser import AEL input_dir = '../logs/HDFS/' # The input directory of log file output_dir = 'AEL_result/' # The output directory of parsing results log_file = 'HDFS_2k.log' # The input log file name log_format = '<Date> <Time> <Pid> <Level> <Component>: <Content>' # HDFS log format minEventCount = 2 # The minimum number of events in a bin merge_percent = 0.5 # The percentage of different tokens regex = [r'blk_-?\d+', r'(\d+\.){3}\d+(:\d+)?' ] # Regular expression list for optional preprocessing (default: []) parser = AEL.LogParser(input_dir, output_dir, log_format, rex=regex, minEventCount=minEventCount, merge_percent=merge_percent) parser.parse(log_file)