Esempio n. 1
0
class TextFormInfer:
    def __init__(self, messages):
        self.clser = TextClassify(messages)
        self.httptuning = HttpDataTuning()
        self.ftptuning = FTPDataTuning()
        self.redistuning = RedisDataTuning()

    def ldaFormatInfer(self, wSize, TK, wLen, Kcls, infercls='H'):
        clsDatas = self.clser.clsMessages(wSize, TK, wLen, Kcls)
        clsFormats = []
        formatInfer = Format()
        clusters = [cluster for cluster in clsDatas.values()]
        if infercls == 'H':
            self.httptuning.getMsgsLen(clusters)
        elif infercls == 'F':
            self.ftptuning.getMsgsLen(clusters)
        else:
            self.redistuning.getMsgsLen(clusters)
        for clsData in clsDatas.values():
            tMessages = [RawMessage(message) for message in clsData]
            tempFormat = Symbol(messages=tMessages)
            formatInfer.splitAligned(tempFormat, doInternalSlick=True)
            clsFormats.append(tempFormat)
        return clsFormats

    def ladDbscanFormatInfer(self, wSize, TK, wLen, mindis, minpt, infercls):
        clsDatas = self.clser.clsByDbscan(wSize, TK, wLen, mindis, minpt)
        clusters = [cluster for cluster in clsDatas.values()]
        if infercls == 'H':
            self.httptuning.getMsgsLen(clusters)
        elif infercls == 'F':
            self.ftptuning.getMsgsLen(clusters)
        else:
            self.redistuning.getMsgsLen(clusters)
        clsFormats = []
        formatInfer = Format()
        for clsData in clsDatas.values():
            tMessages = [RawMessage(message) for message in clsData]
            tempFormat = Symbol(messages=tMessages)
            formatInfer.splitAligned(tempFormat, doInternalSlick=True)
            clsFormats.append(tempFormat)
        return clsFormats
Esempio n. 2
0
class TextClassifyLogic:
    def __init__(self, messages, tRate, sRate, wRate, wHeight):
        self.tRate = tRate
        self.srate = sRate
        self.wRate = self.srate
        self.wHeight = wHeight
        self.messages = messages
        self.httpData = HttpDataTuning()
        self.ftpData = FTPDataTuning()
        self.redisData = RedisDataTuning()

    def GetLocData(self, datas):
        nowLocData = []
        for data in datas:
            nowLocData.append(data.now())
        return nowLocData

    def filterShort(self, freWords, h):
        newFreWords = set()
        for value in freWords:
            if (len(value) >= h):
                newFreWords.add(value)
        return newFreWords

    def GetFrequentWords(self, rate, h, datas):
        nowLocDatas = self.GetLocData(datas)
        Datas = [str(data) for data in nowLocDatas]
        freWords = ApriorFreAnalyZer(Datas, rate).getApriorFre()
        freWords = self.filterShort(freWords, h)
        return freWords

    def RankWord(self, word, datas):
        nowLocDatas = self.GetLocData(datas)
        Datas = [str(data) for data in nowLocDatas]
        cnt = 0
        loc = 0
        for data in Datas:
            tempLoc = data.find(word)
            if tempLoc != -1:
                cnt = cnt + 1
                loc = loc + tempLoc
        print(word, cnt, loc)
        return (cnt, (loc + 1) / cnt)

    def RankWords(self, freWords, datas):
        words = []
        for freWord in freWords:
            nums = self.RankWord(freWord, datas)
            words.append((freWord, nums[0], nums[1]))
        words = BaseRankModel.sortList(words)
        return words

    def ConvertFreWords(self, data):
        freSet = {}
        for freWord in self.freWords:
            lo = data.find(freWord)
            if lo != -1:
                freSet[freWord] = lo
        frePattern = sorted(freSet.items(), key=lambda key: key[1])
        finalPattern = ''.join([item[0] for item in frePattern])
        return finalPattern

    def GetWodsRank(self, datas):
        freWords = self.GetFrequentWords(self.wRate, self.wHeight, datas)
        rankWords = self.RankWords(freWords, datas)
        return rankWords

    def ClassifyMessages(self, messages):
        msgSet = {}
        for message in messages:
            freWord = self.ConvertFreWords(str(message.message))
            if freWord not in msgSet:
                msgSet[freWord] = []
            msgSet[freWord].append(message)
        return msgSet

    def ClassifyCircleLy(self, preWords, messages):
        rankWords = self.GetWodsRank(messages)
        funCode = None
        for word in rankWords:
            if word not in preWords and word[1] / len(
                    self.messages) > self.tRate and word[1] != len(messages):
                funCode = word
                break
        fResult = []
        print(funCode, len(messages))
        #if funCode is not None and funCode[1] / len(messages) > self.trate:
        if funCode is not None:
            clsOne, clsTwo = self.ClassifyByCodes(funCode[0], messages)
            print(len(clsOne), len(clsTwo))
            if len(clsTwo) / len(self.messages) > self.tRate:
                fResult.append(self.ClassifyCircleLy(preWords, clsTwo))
            else:
                if len(clsTwo) > 0:
                    fResult.append(clsTwo)
            if len(clsOne) / len(self.messages) > self.tRate:
                preWords.add(funCode)
                fResult.append(self.ClassifyCircleLy(preWords, clsOne))
                preWords.remove(funCode)
            else:
                fResult.append(clsOne)
        else:
            fResult = messages
        return fResult

    def ClassifyByCodes(self, codes, messages):
        clsTwo = []
        clsOne = []
        for message in messages:
            value = str(message.now())
            if value.find(codes) != -1:
                clsOne.append(message)
            else:
                clsTwo.append(message)
        return (clsOne, clsTwo)

    def FormatInfer(self, rate, h):
        self.GetFrequentWords(rate, h)
        messageClassify = self.ClassifyMessages(self.datas)
        finalFormats = []
        formatInfer = Format()
        for key, value in messageClassify.items():
            tMessages = []
            for message in value:
                singleMessage = RawMessage(message.message)
                tMessages.append(singleMessage)
            tempFormat = Symbol(messages=tMessages)
            formatInfer.splitAligned(tempFormat, doInternalSlick=True)
            finalFormats.append(tempFormat)
        return finalFormats

    def FormatInferCirclely(self, messages, Mtype):
        preFre = set()
        #result = textClassify.classifyMessages(preFre, messages)
        result = self.classifyMessages(preFre, messages)
        clsResult = []
        for res in result:
            clsr = []
            for msg in res:
                clsr.append(msg.message)
            clsResult.append(clsr)
        if Mtype == 'H':
            self.httpData.getMsgsLen(clsResult)
        elif Mtype == 'F':
            self.ftpData.getMsgsLen(clsResult)
        else:
            self.redisData.getMsgsLen(clsResult)
        #httpTuning = HttpDataTuning()
        #print(httpTuning.getMsgsLen(clsResult))
        #ftpTuning = FTPDataTuning()
        #print(ftpTuning.getMsgsLen(clsResult))
        #redisTuning = RedisDataTuning()
        #redisTuning.getMsgsLen(clsResult)
        finalFormats = []
        formatInfer = Format()
        for dataList in result:
            tMessages = []
            for data in dataList:
                singMessage = RawMessage(data.message)
                tMessages.append(singMessage)
            tempFormat = Symbol(messages=tMessages)
            formatInfer.splitAligned(tempFormat, doInternalSlick=True)
            finalFormats.append(tempFormat)
        return finalFormats

    def formatInfer(self, messages):
        preFre = set()
        result = self.classifyMessages(preFre, messages)
        finalFormats = []
        formatInfer = Format()
        for dataList in result:
            tMessages = []
            for data in dataList:
                singMessage = RawMessage(data.message)
                tMessages.append(singMessage)
            tempFormat = Symbol(messages=tMessages)
            formatInfer.splitAligned(tempFormat, doInternalSlick=True)
            finalFormats.append(tempFormat)
        return finalFormats

    def filterSets(self, result, fResult):
        cverter = Converter()
        cverter.ConvertMultiListPure(result, fResult)

    def classifyMessages(self, preSet, messages):
        datas = self.ClassifyCircleLy(preSet, messages)
        result = []
        self.filterSets(datas, result)
        return result