Example #1
0
#!/usr/bin/env python
from pprint import pprint
from RI_precision import *
from LogSig import *
result=zeros((10,9))

LogSigDataPath=['../Sca/Sca_BGL400/','../Sca/Sca_BGL4k/','../Sca/Sca_BGL40k/','../Sca/Sca_BGL400k/','../Sca/Sca_BGL4m/']

dataName=['Sca_BGL400','Sca_BGL4k','Sca_BGL40k','Sca_BGL400k','Sca_BGL4m']
curData=3
for i in range(0,10,1):
	print 'the ', i+1, 'th experiment starts here!'
	LogSigPara=Para(LogSigDataPath[curData])
	LogSigInstance=LogSig(LogSigPara)
	time=LogSigInstance.mainProcess()
	parameters=prePara(LogSigDataPath[curData])
	TP,FP,TN,FN,p,r,f,RI=process(parameters)
	result[i,:]=TP,FP,TN,FN,p,r,f,RI,time
	pprint(result)
	savetxt('10experiment_withRE'+dataName[curData]+'.csv',result,delimiter=',')

Example #2
0
def evaluateMethods(dataset,
                    algorithm,
                    leaf_num=30,
                    logname='rawlog.log',
                    choose='all',
                    ratio=0.5,
                    eval_flag=1):
    if choose == 'all':
        dataPath = os.path.join(DATA_PATH, '%s_all/' % dataset)
        dataName = '%s_all' % dataset
    else:
        dataPath = os.path.join(DATA_PATH,
                                '%s_%s_%0.2f/' % (dataset, choose, ratio))
        dataName = '%s_%s_%0.2f' % (dataset, choose, ratio)
    groupNum = int(GroupNum[dataset][choose] * 0.5)  # caution!
    removeCol = []  # caution!
    result = np.zeros((1, 9))

    #####LogSig##############
    if algorithm == "LogSig":
        print('dataset:', logname)
        t1 = time.time()
        parserPara = logsig.Para(path=dataPath,
                                 logname=logname,
                                 groupNum=groupNum,
                                 removeCol=removeCol,
                                 rex=regL[dataset],
                                 savePath=RESULT_PATH + algorithm +
                                 '_results/' + dataName + '/')
        myParser = logsig.LogSig(parserPara)
        runningTime = myParser.mainProcess()
        t2 = time.time()
        # print 'cur_result_path:','./results/LogSig_results/' + dataName+'/'
        # createDir('./results/LogSig_results/' + dataName+'/' + dataName,1)
        if eval_flag:
            parameters = prePara(groundTruthDataPath=dataPath,
                                 logName=logname,
                                 geneDataPath=RESULT_PATH + algorithm +
                                 '_results/' + dataName + '/')
            TP, FP, TN, FN, p, r, f, RI = process(parameters)
        else:
            print('No evaluation')
        print('dataset:', logname)
        print('training time: %0.3f' % (t2 - t1))

    #####Spell################
    if algorithm == "Spell":
        print('dataset:', logname)
        t1 = time.time()
        parser = spell.LogParser(indir=dataPath,
                                 outdir=RESULT_PATH + algorithm + '_results/' +
                                 dataName + '/',
                                 log_format='<Content>',
                                 tau=0.5,
                                 rex=regL[dataset])
        parser.parse(logname)
        t2 = time.time()

        if eval_flag:
            parameters = prePara(groundTruthDataPath=dataPath,
                                 logName=logname,
                                 geneDataPath=RESULT_PATH + algorithm +
                                 '_results/' + dataName + '/')
            TP, FP, TN, FN, p, r, f, RI = process(parameters)
        else:
            print('No evaluation')
        print('dataset:', logname)
        print('training time: %0.3f' % (t2 - t1))

    #####Drain################
    if algorithm == "Drain":
        print('dataset:', logname)
        t1 = time.time()
        parser = drain.LogParser(indir=dataPath,
                                 outdir=RESULT_PATH + algorithm + '_results/' +
                                 dataName + '/',
                                 log_format='<Content>',
                                 st=0.5,
                                 depth=4,
                                 rex=regL[dataset])
        parser.parse(logname)
        t2 = time.time()

        if eval_flag:
            parameters = prePara(groundTruthDataPath=dataPath,
                                 logName=logname,
                                 geneDataPath=RESULT_PATH + algorithm +
                                 '_results/' + dataName + '/')
            TP, FP, TN, FN, p, r, f, RI = process(parameters)
        else:
            print('No evaluation')
        print('dataset:', logname)
        print('training time: %0.3f' % (t2 - t1))

    #####MoLFI################
    if algorithm == "MoLFI":
        print('dataset:', logname)
        t1 = time.time()
        parser = molfi.LogParser(indir=dataPath,
                                 outdir=RESULT_PATH + algorithm + '_results/' +
                                 dataName + '/',
                                 log_format='<Content>',
                                 rex=regL[dataset])
        parser.parse(logname)
        t2 = time.time()

        if eval_flag:
            parameters = prePara(groundTruthDataPath=dataPath,
                                 logName=logname,
                                 geneDataPath=RESULT_PATH + algorithm +
                                 '_results/' + dataName + '/')
            TP, FP, TN, FN, p, r, f, RI = process(parameters)
        else:
            print('No evaluation')
        print('dataset:', logname)
        print('training time: %0.3f' % (t2 - t1))

    #####FT_tree##############
    if algorithm == "FT_tree":
        #training
        t1 = time.time()
        log_path = dataPath + logname
        createDir(RESULT_PATH + "FT_tree_results/" + dataName + '/', 0)
        template_path = RESULT_PATH + "FT_tree_results/" + dataName + '/'  # + "logTemplate.txt"
        ## leaf_num = 5
        #ft_tree.getLogsAndSave(log_path, template_path + "/logTemplate.txt" , leaf_num)
        ##matching
        #matchTemplatesAndSave(log_path,template_path)
        out_seq_path = os.path.join(template_path, "matchTemplates.seq")
        templates = os.path.join(template_path, "logTemplates.txt")
        fre_word_path = os.path.join(template_path, "output.fre")
        middle_templates = os.path.join(template_path,
                                        "output.template_middle")
        sub_args = [
            os.path.join(ALGORITHM_PATH, "./ft_tree/main_train.py"),
            "-train_log_path",
            log_path,
            "-out_seq_path",
            out_seq_path,
            "-templates",
            templates,
            "-fre_word_path",
            fre_word_path,
            "-middle_templates",
            middle_templates,
            "-short_threshold",
            "1",
        ]
        subprocess.run(sub_args,
                       check=True,
                       stdout=subprocess.PIPE,
                       stderr=subprocess.PIPE)
        t2 = time.time()

        # fix the format
        for i in glob(os.path.join(template_path, "template[0-9]*.txt")):
            os.remove(i)
        match_lines = open(out_seq_path, "r").readlines()
        for i in range(len(match_lines)):
            template_index_str = match_lines[i].strip()
            assert int(template_index_str) > 0, "%d: %s" % (i,
                                                            template_index_str)
            template_file = os.path.join(template_path,
                                         "template%s.txt" % template_index_str)
            with open(template_file, "a") as f:
                f.write(str(i + 1) + "\n")

        #evaluation
        if eval_flag:
            parameters = prePara(groundTruthDataPath=dataPath,
                                 logName=logname,
                                 geneDataPath=RESULT_PATH +
                                 "FT_tree_results/" + dataName + '/')
            TP, FP, TN, FN, p, r, f, RI = process(parameters)
        else:
            print('No evaluation')

        print('dataset:', logname)
        print('training time: %0.3f' % (t2 - t1))

    #######LKE##############
    if algorithm == "LKE":
        print('dataset:', logname, "LKE")
        t1 = time.time()

        # parserPara = lke.Para(path=dataPath, dataName='', logname = logname,  removeCol=removeCol, rex=regL, savePath='./results/'+algorithm+'_results/' + dataName+'/')
        # print ('parserPara.path',parserPara.path)
        # myParser = lke.LKE(parserPara)
        # runningTime = myParser.mainProcess()
        parser = lke.LogParser(log_format='<Content>',
                               indir=dataPath,
                               outdir=RESULT_PATH + algorithm + '_results/' +
                               dataName + '/',
                               rex=regL[dataset],
                               split_threshold=3)
        parser.parse(logname)
        t2 = time.time()
        # print 'cur_result_path:','./results/LogSig_results/' + dataName+'/'
        #createDir('./results/LKE_results/' + dataName+'/' + dataName,1)
        if eval_flag:
            parameters = prePara(groundTruthDataPath=dataPath,
                                 logName=logname,
                                 geneDataPath=RESULT_PATH + algorithm +
                                 '_results/' + dataName + '/')
            TP, FP, TN, FN, p, r, f, RI = process(parameters)
        else:
            print('No evaluation')
        print('dataset:', logname)
        print('training time: %0.3f' % (t2 - t1))

    #######IPLoM############
    if algorithm == "IPLoM":
        print('dataset:', logname, "IPLoM")
        t1 = time.time()
        parserPara = iplom.Para(path=dataPath,
                                logname=logname,
                                removeCol=removeCol,
                                rex=regL[dataset],
                                savePath=RESULT_PATH + algorithm +
                                '_results/' + dataName + '/')
        print('parserPara.path', parserPara.path)
        myParser = iplom.IPLoM(parserPara)
        runningTime = myParser.mainProcess()
        t2 = time.time()
        # print 'cur_result_path:','./results/LogSig_results/' + dataName+'/'
        #createDir('./results/LKE_results/' + dataName+'/' + dataName,1)
        if eval_flag:
            parameters = prePara(groundTruthDataPath=dataPath,
                                 logName=logname,
                                 geneDataPath=RESULT_PATH + algorithm +
                                 '_results/' + dataName + '/')
            TP, FP, TN, FN, p, r, f, RI = process(parameters)
        else:
            print('No evaluation')
        print('dataset:', logname)
        print('training time: %0.3f' % (t2 - t1))
Example #3
0
result = zeros((10, 9))

LogSigDataPath = [
    '../Sca/Sca_BGL400/', '../Sca/Sca_BGL4k/', '../Sca/Sca_BGL40k/',
    '../Sca/Sca_BGL400k/', '../Sca/Sca_BGL4m/'
]

dataName = [
    'Sca_BGL400', 'Sca_BGL4k', 'Sca_BGL40k', 'Sca_BGL400k', 'Sca_BGL4m'
]
curData = 3
for i in range(0, 10, 1):
    print 'the ', i + 1, 'th experiment starts here!'
    LogSigPara = Para(LogSigDataPath[curData])
    LogSigInstance = LogSig(LogSigPara)
    time = LogSigInstance.mainProcess()
    parameters = prePara(LogSigDataPath[curData])
    TP, FP, TN, FN, p, r, f, RI = process(parameters)
    result[i, :] = TP, FP, TN, FN, p, r, f, RI, time
    pprint(result)
    savetxt('10experiment_withRE' + dataName[curData] + '.csv',
            result,
            delimiter=',')

#IPLoM
#For 2kHDFS data:
#       (self,path='../Data/2kHDFS/',logname='NoID_2kHDFS.log',removable=True,removeCol=[0,1,2,3,4],regular=True,
#		rex=['blk_(|-)[0-9]+','(/|)([0-9]+\.){3}[0-9]+(:[0-9]+|)(:|)'],savePath='./results_2kHDFS/',saveFileName='template',groupNum=14):
#For 2kZookeeper:
# 		(self,path='../Data/2kZookeeper/',logname='NoID_2kZookeeper.log',removable=True,removeCol=[0,1,2,3,4,5],regular=True,
Example #4
0
from LogSig import *

RawLogPath = './'
RawLogFile = 'rawlog.log'
OutputPath = './results/'
para = Para(path=RawLogPath, logname=RawLogFile, savePath=OutputPath)

myparser = LogSig(para)
time = myparser.mainProcess()

print('The running time of LogSig is', time)
Example #5
0
    removeCol = [0, 1, 2, 3, 4, 5, 6]
    regL = ['(/|)([0-9]+\.){3}[0-9]+(:[0-9]+|)(:|)']
    # regL = []
elif dataset == 5:
    dataName = 'Proxifier'
    groupNum = 6
    removeCol = [0, 1, 2, 4, 5]
    regL = []

result = np.zeros((1, 9))

for i in range(0, 1, 1):
    print('the ', i + 1, 'th experiment starts here!')
    parserPara = Para(path=dataPath + dataName + '/',
                      groupNum=groupNum,
                      removeCol=removeCol,
                      rex=regL,
                      savePath='./results/')
    myParser = LogSig(parserPara)
    runningTime = myParser.mainProcess()

    parameters = prePara(groundTruthDataPath=dataPath + dataName + '/',
                         geneDataPath='./results/')

    TP, FP, TN, FN, p, r, f, RI = process(parameters)
    result[i, :] = TP, FP, TN, FN, p, r, f, RI, runningTime

    pprint(result)

    gc.collect()
Example #6
0
from LogSig import *

RawLogPath = './'
RawLogFile = 'rawlog.log'
OutputPath = './results/'
para=Para(path=RawLogPath, logname=RawLogFile, savePath=OutputPath)

myparser=LogSig(para)
time=myparser.mainProcess()

print ('The running time of LogSig is', time)