def getRidisData(): redisDatas = RedisDataTuning() datas = redisDatas.sampleDatas() print('aaa') print(len(datas)) freWords, deliword = getDelimiter(datas) print(freWords, deliword)
def __init__(self, messages, tRate, sRate, wRate, wHeight): self.tRate = tRate self.srate = sRate self.wRate = self.srate self.wHeight = wHeight self.messages = messages self.httpData = HttpDataTuning() self.ftpData = FTPDataTuning() self.redisData = RedisDataTuning()
def redisTest(self): redisDataTuning = RedisDataTuning('/home/wxw/data/RedisData') #srcDatas, desDatas = redisDataTuning.getProDatas() #samplesrcmsgs = redisDataTuning.sampleSourceDatas(srcDatas, 2) #print(samplesrcmsgs[0]) srcDatas = redisDataTuning.sampleDatas() message_parser = TextParseLogic() srcmessages = message_parser.ConvertDataToMessage(srcDatas, b'\r\n', h=2) #desmessages = message_parser.ConvertDataToMessage(desDatas, b'\r\n', h=1) srctextcls = TextClassifyLogicTest(srcmessages, 0.05, 0.2, 0.2, 3) srctextcls.FormatInferCirclelyTest(srcmessages)
def redisGenerateTest(self): redisDataTuning = RedisDataTuning('/home/wxw/data/RedisData') # srcDatas, desDatas = redisDataTuning.getProDatas() # samplesrcmsgs = redisDataTuning.sampleSourceDatas(srcDatas, 2) # print(samplesrcmsgs[0]) srcDatas = redisDataTuning.sampleDatas() message_parser = TextParseLogic() srcmessages = message_parser.ConvertDataToMessage(srcDatas, b'\r\n', h=2) # desmessages = message_parser.ConvertDataToMessage(desDatas, b'\r\n', h=1) srctextcls = TextClassifyLogicTest(srcmessages, 0.05, 0.2, 0.2, 3) srctextcls.FormatInferCirclelyTestTwo( srcmessages, '/home/wxw/paper/researchresult/text/formatInfer/KFCluster/redisshortOne', 'R')
class FieldHunterTest: def __init__(self): self.fhunter = FieldHunter() self.httpdata = HttpDataTuning() self.ftpdata = FTPDataTuning() self.redisdata = RedisDataTuning() def testFordeliFind(self, dirPath): datas = read_datas(dirPath, 'multy') datasF = [] for data in datas: if len(data) < 100: datasF.extend(data) else: datasF.extend(data[0:500]) datas = get_puredatas(datasF) print(self.fhunter.findDelimiter(datas)) def testForHTTPFind(self): datas = self.httpdata.sampleDatas() print(filterFieldWords(self.fhunter.findDelimiter(datas))) def testForFTPFind(self): datas = self.ftpdata.sampleData() print(filterFieldWords(self.fhunter.findDelimiter(datas))) def testForREDISFind(self): datas = self.redisdata.sampleDatas() print(filterFieldWords(self.fhunter.findDelimiter(datas)))
class TextFormInfer: def __init__(self, messages): self.clser = TextClassify(messages) self.httptuning = HttpDataTuning() self.ftptuning = FTPDataTuning() self.redistuning = RedisDataTuning() def ldaFormatInfer(self, wSize, TK, wLen, Kcls, infercls='H'): clsDatas = self.clser.clsMessages(wSize, TK, wLen, Kcls) clsFormats = [] formatInfer = Format() clusters = [cluster for cluster in clsDatas.values()] if infercls == 'H': self.httptuning.getMsgsLen(clusters) elif infercls == 'F': self.ftptuning.getMsgsLen(clusters) else: self.redistuning.getMsgsLen(clusters) for clsData in clsDatas.values(): tMessages = [RawMessage(message) for message in clsData] tempFormat = Symbol(messages=tMessages) formatInfer.splitAligned(tempFormat, doInternalSlick=True) clsFormats.append(tempFormat) return clsFormats def ladDbscanFormatInfer(self, wSize, TK, wLen, mindis, minpt, infercls): clsDatas = self.clser.clsByDbscan(wSize, TK, wLen, mindis, minpt) clusters = [cluster for cluster in clsDatas.values()] if infercls == 'H': self.httptuning.getMsgsLen(clusters) elif infercls == 'F': self.ftptuning.getMsgsLen(clusters) else: self.redistuning.getMsgsLen(clusters) clsFormats = [] formatInfer = Format() for clsData in clsDatas.values(): tMessages = [RawMessage(message) for message in clsData] tempFormat = Symbol(messages=tMessages) formatInfer.splitAligned(tempFormat, doInternalSlick=True) clsFormats.append(tempFormat) return clsFormats
class RedisDataTuningTest: def __init__(self, path): self.redisData = RedisDataTuning(path) self.msgConvert = MessageConvert() def getDatas(self, path): redisDatas = read_datas(path, 'single') return redisDatas def splitDatas(self, datas): srcDatas, desDatas = self.msgConvert.clsMessageByDire(datas) return srcDatas, desDatas def getDesCommondTest(self, desDatas): print(self.redisData.getDesCommond(desDatas)) def testSample(self): srcDatas, desDatas = self.redisData.getProDatas() sPSrcDatas = self.redisData.sampleSourceDatas(srcDatas, 150) for datas in sPSrcDatas: print(datas, len(sPSrcDatas[datas]))
class DataTuning: def __init__(self): self.ftp = FTPDataTuning() self.redis = RedisDataTuning() self.redis_deal = redis_deal() self.fConfig = FileConfig() def readDatas(self, filePath): datas = read_filedatas(filePath) datas = get_puredatas(datas) return datas def readDataSummarys(self, filePath): datas = read_filedatas(filePath) return datas def readDatasTemp(self, filePath): srcDatas, desDatas = self.ftp.tuningHttpByregix() datas = [] datas.extend(srcDatas) datas.extend(desDatas) return datas def icsReadDatasTemp(self, filePath): messages = read_datas('/home/wxw/data/ToolDatas/15895903730.10.222', 'single') messages = get_puredatas(messages) return messages def textReadDatasTemp(self, filePath): messages = self.redis.sampleDatas() return messages def readDatasByType(self, fileType): fileName = self.redis_deal.read_from_redis(fileType) filePath = os.path.join(self.fConfig.pathDir, fileName) return self.readDatas(filePath) def readSummaryByType(self, fileType): fileName = self.redis_deal.read_from_redis(fileType) filePath = os.path.join(self.fConfig.pathDir, fileName) print('aa') print(filePath) print('bb') return self.readDataSummarys(filePath)
def __init__(self): self.ftp = FTPDataTuning() self.redis = RedisDataTuning() self.redis_deal = redis_deal() self.fConfig = FileConfig()
def __init__(self, messages): self.clser = TextClassify(messages) self.httptuning = HttpDataTuning() self.ftptuning = FTPDataTuning() self.redistuning = RedisDataTuning()
class TextClassifyLogic: def __init__(self, messages, tRate, sRate, wRate, wHeight): self.tRate = tRate self.srate = sRate self.wRate = self.srate self.wHeight = wHeight self.messages = messages self.httpData = HttpDataTuning() self.ftpData = FTPDataTuning() self.redisData = RedisDataTuning() def GetLocData(self, datas): nowLocData = [] for data in datas: nowLocData.append(data.now()) return nowLocData def filterShort(self, freWords, h): newFreWords = set() for value in freWords: if (len(value) >= h): newFreWords.add(value) return newFreWords def GetFrequentWords(self, rate, h, datas): nowLocDatas = self.GetLocData(datas) Datas = [str(data) for data in nowLocDatas] freWords = ApriorFreAnalyZer(Datas, rate).getApriorFre() freWords = self.filterShort(freWords, h) return freWords def RankWord(self, word, datas): nowLocDatas = self.GetLocData(datas) Datas = [str(data) for data in nowLocDatas] cnt = 0 loc = 0 for data in Datas: tempLoc = data.find(word) if tempLoc != -1: cnt = cnt + 1 loc = loc + tempLoc print(word, cnt, loc) return (cnt, (loc + 1) / cnt) def RankWords(self, freWords, datas): words = [] for freWord in freWords: nums = self.RankWord(freWord, datas) words.append((freWord, nums[0], nums[1])) words = BaseRankModel.sortList(words) return words def ConvertFreWords(self, data): freSet = {} for freWord in self.freWords: lo = data.find(freWord) if lo != -1: freSet[freWord] = lo frePattern = sorted(freSet.items(), key=lambda key: key[1]) finalPattern = ''.join([item[0] for item in frePattern]) return finalPattern def GetWodsRank(self, datas): freWords = self.GetFrequentWords(self.wRate, self.wHeight, datas) rankWords = self.RankWords(freWords, datas) return rankWords def ClassifyMessages(self, messages): msgSet = {} for message in messages: freWord = self.ConvertFreWords(str(message.message)) if freWord not in msgSet: msgSet[freWord] = [] msgSet[freWord].append(message) return msgSet def ClassifyCircleLy(self, preWords, messages): rankWords = self.GetWodsRank(messages) funCode = None for word in rankWords: if word not in preWords and word[1] / len( self.messages) > self.tRate and word[1] != len(messages): funCode = word break fResult = [] print(funCode, len(messages)) #if funCode is not None and funCode[1] / len(messages) > self.trate: if funCode is not None: clsOne, clsTwo = self.ClassifyByCodes(funCode[0], messages) print(len(clsOne), len(clsTwo)) if len(clsTwo) / len(self.messages) > self.tRate: fResult.append(self.ClassifyCircleLy(preWords, clsTwo)) else: if len(clsTwo) > 0: fResult.append(clsTwo) if len(clsOne) / len(self.messages) > self.tRate: preWords.add(funCode) fResult.append(self.ClassifyCircleLy(preWords, clsOne)) preWords.remove(funCode) else: fResult.append(clsOne) else: fResult = messages return fResult def ClassifyByCodes(self, codes, messages): clsTwo = [] clsOne = [] for message in messages: value = str(message.now()) if value.find(codes) != -1: clsOne.append(message) else: clsTwo.append(message) return (clsOne, clsTwo) def FormatInfer(self, rate, h): self.GetFrequentWords(rate, h) messageClassify = self.ClassifyMessages(self.datas) finalFormats = [] formatInfer = Format() for key, value in messageClassify.items(): tMessages = [] for message in value: singleMessage = RawMessage(message.message) tMessages.append(singleMessage) tempFormat = Symbol(messages=tMessages) formatInfer.splitAligned(tempFormat, doInternalSlick=True) finalFormats.append(tempFormat) return finalFormats def FormatInferCirclely(self, messages, Mtype): preFre = set() #result = textClassify.classifyMessages(preFre, messages) result = self.classifyMessages(preFre, messages) clsResult = [] for res in result: clsr = [] for msg in res: clsr.append(msg.message) clsResult.append(clsr) if Mtype == 'H': self.httpData.getMsgsLen(clsResult) elif Mtype == 'F': self.ftpData.getMsgsLen(clsResult) else: self.redisData.getMsgsLen(clsResult) #httpTuning = HttpDataTuning() #print(httpTuning.getMsgsLen(clsResult)) #ftpTuning = FTPDataTuning() #print(ftpTuning.getMsgsLen(clsResult)) #redisTuning = RedisDataTuning() #redisTuning.getMsgsLen(clsResult) finalFormats = [] formatInfer = Format() for dataList in result: tMessages = [] for data in dataList: singMessage = RawMessage(data.message) tMessages.append(singMessage) tempFormat = Symbol(messages=tMessages) formatInfer.splitAligned(tempFormat, doInternalSlick=True) finalFormats.append(tempFormat) return finalFormats def formatInfer(self, messages): preFre = set() result = self.classifyMessages(preFre, messages) finalFormats = [] formatInfer = Format() for dataList in result: tMessages = [] for data in dataList: singMessage = RawMessage(data.message) tMessages.append(singMessage) tempFormat = Symbol(messages=tMessages) formatInfer.splitAligned(tempFormat, doInternalSlick=True) finalFormats.append(tempFormat) return finalFormats def filterSets(self, result, fResult): cverter = Converter() cverter.ConvertMultiListPure(result, fResult) def classifyMessages(self, preSet, messages): datas = self.ClassifyCircleLy(preSet, messages) result = [] self.filterSets(datas, result) return result
def __init__(self, messages): self.tFomInfer = TextFormInfer(messages) self.httpdata = HttpDataTuning() self.ftpData = FTPDataTuning() self.redisData = RedisDataTuning() self.txtSymTree = TextSympolToTree()
class TextFormInferTest: def __init__(self, messages): self.tFomInfer = TextFormInfer(messages) self.httpdata = HttpDataTuning() self.ftpData = FTPDataTuning() self.redisData = RedisDataTuning() self.txtSymTree = TextSympolToTree() def ldaFormatInferTest(self, wSize, TK, wLen, Kcls, path='', infercls='H'): fNums = self.tFomInfer.ldaFormatInfer(wSize, TK, wLen, Kcls, infercls) self.txtSymTree.symbolsToTree(fNums, path) #for fnum in fNums: # nodeT = self.txtSymTree.symbolToTree(fnum) # nodeT.showTree(0) #print(fnum._str_debug()) def ladDbscanFormatInfer(self, wSize, TK, wLen, mindis, minpt, path='', infercls='H'): fNums = self.tFomInfer.ladDbscanFormatInfer(wSize, TK, wLen, mindis, minpt, infercls) self.txtSymTree.symbolsToTree(fNums, path) #for fnum in fNums: # print(fnum._str_debug()) def httpDataTest(self): srcDatas, desDatas = self.httpdata.tuningHttpByregix() self.tFomInfer = TextFormInfer(desDatas) self.ldaFormatInferTest(3, 15, 2, 4) def httpTotalTest(self): datas = self.httpdata.sampleDatas() self.tFomInfer = TextFormInfer(datas) self.ldaFormatInferTest(3, 15, 2, 5) def httpTotalDBSTest(self): datas = self.httpdata.sampleDatas() self.tFomInfer = TextFormInfer(datas) self.ladDbscanFormatInfer(3, 5, 3, 0.15, 10) def ftpTotalTest(self): datas = self.ftpData.sampleData() self.tFomInfer = TextFormInfer(datas) #self.ldaFormatInferTest(3, 15, 2, 10) self.ladDbscanFormatInfer(3, 5, 3, 0.05, 10) #self.ldaFormatInferTest(3, 15, 2, 15) #self.ldaFormatInferTest(3, 15, 2, 20) #self.ldaFormatInferTest(3, 15, 2, 10) #self.ldaFormatInferTest(3, 15, 2, 5) def ftpTotalGenerate(self): datas = self.ftpData.sampleData() self.tFomInfer = TextFormInfer(datas) #self.ldaFormatInferTest(3, 15, 2, 15, '/home/wxw/paper/researchresult/text/formatInfer/KMEANS/15 ftp one.png') #self.ladDbscanFormatInfer(3, 15, 3, 0.01, 4, '/home/wxw/paper/researchresult/text/formatInfer/DBSCAN/0.01 ftp.png') self.ladDbscanFormatInfer( 3, 15, 3, 0.05, 10, '/home/wxw/paper/researchresult/text/formatInfer/DBSCAN/0.05 ftp.png' ) self.ladDbscanFormatInfer(3, 5, 3, 0.05, 10) def httpDataTestDBS(self): httpDataTuning = HttpDataTuning() srcMsgs, desMsgs = httpDataTuning.tuningHttpByregix() self.tFomInfer = TextFormInfer(desMsgs) self.ladDbscanFormatInfer(3, 5, 3, 0.15, 10) def ftpDataTest(self): srcDatas, desDatas = self.ftpData.tuningHttpByregix() self.tFomInfer = TextFormInfer(srcDatas) self.ldaFormatInferTest(3, 15, 3, 15) def ftpDataTestDBS(self): httpDataTuning = FTPDataTuning() srcMsgs, desMsgs = httpDataTuning.tuningHttpByregix() print(len(srcMsgs) + len(desMsgs)) self.tFomInfer = TextFormInfer(desMsgs) self.ladDbscanFormatInfer(3, 15, 3, 0.01, 4) def redisTotalTest(self): datas = self.redisData.sampleDatas() self.tFomInfer = TextFormInfer(datas) #self.ladDbscanFormatInfer(3, 5, 3, 0.10, 10) #self.ladDbscanFormatInfer(3, 5, 3, 0.10, 20) #self.ladDbscanFormatInfer(3, 5, 3, 0.01, 20) #self.ladDbscanFormatInfer(3, 5, 3, 0.2, 20) #self.ldaFormatInferTest(3, 15, 2, 5) self.ldaFormatInferTest(3, 15, 2, 10) #self.ldaFormatInferTest(3, 15, 2, 15) #self.ldaFormatInferTest(3, 15, 2, 20) def redisTotalGenerate(self): datas = self.redisData.sampleDatas() self.tFomInfer = TextFormInfer(datas) self.ladDbscanFormatInfer( 3, 5, 3, 0.05, 4, '/home/wxw/paper/researchresult/text/formatInfer/DBSCAN/redis .png' ) #self.ldaFormatInferTest(3, 15, 2, 10, '/home/wxw/paper/researchresult/text/formatInfer/KMEANS/11 three redis.png') def httpTotalGenerate(self, kClus): datas = self.httpdata.sampleDatas() self.tFomInfer = TextFormInfer(datas) print('ss', kClus) #self.ladDbscanFormatInfer(3, 5, 3, 0.05, 10, '/home/wxw/paper/researchresult/text/formatInfer/DBSCAN/0.15 http.png') self.ldaFormatInferTest( 3, 15, 2, kClus, '/home/wxw/paper/researchresult/text/formatInfer/KMEANS/' + str(kClus) + 'httptwo.png', infercls='H') print('ee', kClus) def httpTotalRepeatGenerate(self, kClus, rTime): datas = self.httpdata.sampleDatas() self.tFomInfer = TextFormInfer(datas) self.ldaFormatInferTest( 3, 15, 2, kClus, '/home/wxw/paper/researchresult/text/formatInfer/KMEANS/' + str(kClus) + ' ' + str(rTime) + 'httpone.png', infercls='H') def httpDBSTotalGenerate(self, r, C, rTime=''): datas = self.httpdata.sampleDatas() self.tFomInfer = TextFormInfer(datas) print('ss', r) self.ladDbscanFormatInfer( 3, 5, 3, r, C, '/home/wxw/paper/researchresult/text/formatInfer/DBSCAN/' + str(r) + str(C) + rTime + 'httptwo.png', infercls='H') #self.ldaFormatInferTest(3, 15, 2, kClus, '/home/wxw/paper/researchresult/text/formatInfer/KMEANS/' #+ str(kClus) + 'httpone.png', infercls='H') print('ee', r) def ftpTotalGenerate(self, kClus): datas = self.ftpData.sampleData() self.tFomInfer = TextFormInfer(datas) self.ldaFormatInferTest( 3, 15, 2, kClus, '/home/wxw/paper/researchresult/text/formatInfer/KMEANS/FTP/' + str(kClus) + 'ftp.png', infercls='F') #self.ladDbscanFormatInfer(3, 15, 3, 0.01, 4, '/home/wxw/paper/researchresult/text/formatInfer/DBSCAN/0.01 ftp.png') #self.ladDbscanFormatInfer(3, 15, 3, 0.05, 10, # '/home/wxw/paper/researchresult/text/formatInfer/DBSCAN/0.05 ftp.png') #self.ladDbscanFormatInfer(3, 5, 3, 0.05, 10) def ftpTotalGenerateRepeat(self, kClus, rTime): datas = self.ftpData.sampleData() self.tFomInfer = TextFormInfer(datas) self.ldaFormatInferTest( 3, 15, 2, kClus, '/home/wxw/paper/researchresult/text/formatInfer/KMEANS/FTP/' + str(kClus) + str(rTime) + 'ftp.png', infercls='F') #self.ladDbscanFormatInfer(3, 15, 3, 0.01, 4, '/home/wxw/paper/researchresult/text/formatInfer/DBSCAN/0.01 ftp.png') #self.ladDbscanFormatInfer(3, 15, 3, 0.05, 10, # '/home/wxw/paper/researchresult/text/formatInfer/DBSCAN/0.05 ftp.png') #self.ladDbscanFormatInfer(3, 5, 3, 0.05, 10) def ftpDBSCANGenerate(self, r, C, rTime=''): datas = self.ftpData.sampleData() self.tFomInfer = TextFormInfer(datas) print('ss', r) self.ladDbscanFormatInfer( 3, 5, 3, r, C, '/home/wxw/paper/researchresult/text/formatInfer/DBSCAN/FTP/' + str(r) + str(C) + rTime + 'ftp.png', infercls='F') # self.ldaFormatInferTest(3, 15, 2, kClus, '/home/wxw/paper/researchresult/text/formatInfer/KMEANS/' # + str(kClus) + 'httpone.png', infercls='H') print('ee', r) def redisTotalGenerate(self, kClus, rTime=''): datas = self.redisData.sampleDatas() self.tFomInfer = TextFormInfer(datas) self.ldaFormatInferTest( 3, 15, 2, kClus, '/home/wxw/paper/researchresult/text/formatInfer/KMEANS/REDIS/' + str(kClus) + rTime + 'redis.png', infercls='R') #self.ladDbscanFormatInfer(3, 5, 3, 0.05, kClus, '/home/wxw/paper/researchresult/text/formatInfer/DBSCAN/redis .png') def redisDBSCANGenerate(self, r, C, rTime=''): datas = self.redisData.sampleDatas() self.tFomInfer = TextFormInfer(datas) print('ss', r) self.ladDbscanFormatInfer( 3, 5, 3, r, C, '/home/wxw/paper/researchresult/text/formatInfer/DBSCAN/REDIS/' + str(r) + str(C) + rTime + 'redis.png', infercls='R') # self.ldaFormatInferTest(3, 15, 2, kClus, '/home/wxw/paper/researchresult/text/formatInfer/KMEANS/' # + str(kClus) + 'httpone.png', infercls='H') print('ee', r)
def __init__(self, path): self.redisData = RedisDataTuning(path) self.msgConvert = MessageConvert()
def __init__(self): self.fhunter = FieldHunter() self.httpdata = HttpDataTuning() self.ftpdata = FTPDataTuning() self.redisdata = RedisDataTuning()