Ejemplo n.º 1
0
import pandas as pd
from benchmark.AEL_benchmark import benchmark_settings
from logparser import AEL
from File_Info import get_file_size

dataset = 'BGL'

output_dir = 'AEL_result/'  # The output directory of parsing results
one_setting = benchmark_settings[dataset]
log_file = os.path.basename(one_setting['log_file'])
input_dir = os.path.join('../logs/', os.path.dirname(one_setting['log_file']))

file_size = get_file_size(dataset)

results = collections.defaultdict(list)
for size, file in file_size.items():
    parser = AEL.LogParser(
        input_dir,
        output_dir,
        log_format=one_setting['log_format'],
        rex=one_setting['regex'],
        minEventCount=one_setting['minEventCount'],
        merge_percent=one_setting['merge_percent'],
        keep_para=False
    )
    time_elapsed = parser.parse(file)
    results['size'].append(size)
    results['time'].append(time_elapsed.total_seconds())
    print(results['time'])
    pd.DataFrame(results).to_csv('./AEL_%s.csv' % dataset)
Ejemplo n.º 2
0
        'regex': [r'(/|)(\d+\.){3}\d+(:\d+)?'],
        'minEventCount': 2,
        'merge_percent': 0.4
    },
}

if __name__ == '__main__':
    benchmark_result = []
    for dataset, setting in benchmark_settings.items():
        print('\n=== Evaluation on %s ===' % dataset)
        indir = os.path.join(input_dir, os.path.dirname(setting['log_file']))
        log_file = os.path.basename(setting['log_file'])

        parser = AEL.LogParser(log_format=setting['log_format'],
                               indir=indir,
                               outdir=output_dir,
                               minEventCount=setting['minEventCount'],
                               merge_percent=setting['merge_percent'],
                               rex=setting['regex'])
        time_elapsed = parser.parse(log_file)

        F1_measure, accuracy = evaluator.evaluate(
            groundtruth=os.path.join(indir, log_file + '_structured.csv'),
            parsedresult=os.path.join(output_dir,
                                      log_file + '_structured.csv'))
        benchmark_result.append(
            [dataset, F1_measure, accuracy,
             time_elapsed.total_seconds()])

    print('\n=== Overall evaluation results ===')
    df_result = pd.DataFrame(
        benchmark_result,
Ejemplo n.º 3
0
#!/usr/bin/env python
import sys
sys.path.append('../')
from logparser import AEL

input_dir = '../logs/HDFS/'  # The input directory of log file
output_dir = 'AEL_result/'  # The output directory of parsing results
log_file = 'HDFS_2k.log'  # The input log file name
log_format = '<Date> <Time> <Pid> <Level> <Component>: <Content>'  # HDFS log format
minEventCount = 2  # The minimum number of events in a bin
merge_percent = 0.5  # The percentage of different tokens
regex = [r'blk_-?\d+', r'(\d+\.){3}\d+(:\d+)?'
         ]  # Regular expression list for optional preprocessing (default: [])

parser = AEL.LogParser(input_dir,
                       output_dir,
                       log_format,
                       rex=regex,
                       minEventCount=minEventCount,
                       merge_percent=merge_percent)
parser.parse(log_file)