def __init__(self, messages=None): self.messages = messages self.wordTypeInfer = WholeFieldTypeInfer(self.messages) self.cvter = Converter() self.wcvter = word_convert() self.msgSplt = MsgSpliter() self.dataTuning = DataTuning() self.icsSymTree = IcsSymbolToTree()
class vertical_splitter: def __init__(self, messages): self.messages = messages self.wholeFieldInfer= WholeFieldTypeInfer(self.messages) def split_by_words_type(self, datas, T_max_range): fields_set = [] w_infer = word_infer() w_merger = base_merger() w_convert = Converter() b_analyzer = base_analyzer() for i in range(T_max_range): lo_datas = get_data_bylo(datas, i) w_cnt = w_convert.convert_raw_to_count(lo_datas) w_frequent = b_analyzer.convert_num_to_frequent(w_cnt) w_type = w_infer.is_const_word(w_frequent, 0.95) if w_type: t_field = loc_field((i,i), 0) else: t_field = loc_field((i,i), 4) fields_set.append(t_field) words_f = w_merger.merge_words(fields_set) candidate_borders = [w.loc[0] for w in words_f] return words_f, candidate_borders def splitWordSimple(self, word): if word[1] - word[0] == 1: return word, None else: j = word[0] + 1 tLo = -1 while(j < word[1]): if (self.wholeFieldInfer.inferConst((word[0], j)) \ and not self.wholeFieldInfer.inferConst((j, word[1]))) \ or (self.wholeFieldInfer.inferConst((j, word[1])) and \ not self.wholeFieldInfer.inferConst((word[0], j))): tLo = j j = j + 1 wA = (word[0], tLo) wB = (tLo, word[1]) if tLo != -1: return wA, wB else: return word, None def splitWordsSimple(self, words): i = 0 while(i < len(words)): self.splitWordSimple(words[i]) wOne, wTwo = self.splitWordSimple(words[i]) if wTwo != None: words.remove(words[i]) words.append(wOne) words.append(wTwo) words = sorted(words, key = lambda x:x[0]) i = i + 1 return words
def reAjustBorders(self, words, messages): vSpliter = vertical_splitter(messages) words = vSpliter.splitWordsSimple(words) Nodes = [] typeInfer = WholeFieldTypeInfer(messages) mgerItoms = base_merger() for word in words: if typeInfer.inferConst(word): tNode = node(loc=word, wType=1) else: tNode = node(loc=word, wType=6) Nodes.append(tNode) return mgerItoms.merge_words(Nodes)
class IcsSymbolToTree: def __init__(self): self.wTInfer = WholeFieldTypeInfer() def icsSymToTree(self, gFormat, cFormats, h=10): gNodeFirst, gNodeLast = self.transLineToNodes(gFormat[0:len(gFormat)-1]) for cFormat in cFormats: tFuncNode = node() tType = 'F' + ',' + str(len(cFormat)) + ',' + str(cFormat) tFuncNode.value.append(tType) tCformat,_ = self.transLineToNodes(cFormats[cFormat], h=3) tFuncNode.children.append(tCformat) gNodeLast.children.append(tFuncNode) return gNodeFirst def transLineToNodes(self, words, h=10): nodes = [] for word in words: wType = self.wTInfer.cVertNumToName(word[1]) sNode = '' if wType == 'Payload': sNode = sNode + wType + ',' + '-1' else: sNode = sNode + wType + ',' + str(word[0][1] - word[0][0]) tNode = node() tNode.value.append(sNode) nodes.append(tNode) t_len = min(len(words), h) i = t_len - 2 while(i >= 0): nodes[i].children.append(nodes[i+1]) i = i - 1 return nodes[0], nodes[t_len-1]
class IcsFieldMerger(base_merger): def __init__(self, messages): super().__init__() self.wholeType = WholeFieldTypeInfer(messages) def mergeConstFields(self, words, messages): wordsType = [] for word in words: if self.wholeType.inferConst(word): wordsType.append()
class TestWholeField: def __init__(self, messages, locs): self.messages = messages self.locs = locs self.gFieldInfer = WholeFieldTypeInfer() def TestConst(self, lo): lodatas = [] for message in self.messages: if len(message) > lo[-1]: lodatas.append(message[lo[0]:lo[1]]) return self.gFieldInfer.inferConst(lodatas)
class ReAjustLogic: def __init__(self, words, msgs): self.words = words self.msgs = msgs self.wholeTypeInfer = WholeFieldTypeInfer(self.msgs) def reSplit(self): self.words.sort(key=lambda word: word[0]) t_len = len(self.words) i = 0 while (i < t_len): t_idom = self.words[i] t_pre = t_idom[0] t_last = t_idom[1] t_middle = t_pre + 1 if (t_idom[1] - t_idom[0] >= 2): if (((self.wholeTypeInfer.inferConst((t_pre, t_middle))) and not (self.wholeTypeInfer.inferConst( (t_middle, t_last)))) or ((self.wholeTypeInfer.inferConst( (t_middle, t_last))) and not (self.wholeTypeInfer.inferConst( (t_pre, t_middle))))): self.words.remove(t_idom) self.words.append((t_pre, t_middle)) self.words.append((t_middle, t_last)) self.words.sort(key=lambda word: word[0]) t_len = t_len + 1 i = i + 1 def reCluster(self): t_len = len(self.words) i = 0 while (i < t_len - 1): t_next = self.words[i + 1] t_now = self.words[i] if self.wholeTypeInfer.inferConst( (t_now)) and self.wholeTypeInfer.inferConst((t_next)): t_s = t_now[0] t_e = t_next[1] self.words.remove(t_now) self.words.remove(t_next) self.words.append((t_s, t_e)) t_len = t_len - 1 i = i - 1 i = i + 1 def reAjustBorders(self, words, messages): vSpliter = vertical_splitter(messages) words = vSpliter.splitWordsSimple(words) Nodes = [] typeInfer = WholeFieldTypeInfer(messages) mgerItoms = base_merger() for word in words: if typeInfer.inferConst(word): tNode = node(loc=word, wType=1) else: tNode = node(loc=word, wType=6) Nodes.append(tNode) return mgerItoms.merge_words(Nodes)
def getCFormat(self, configParas, gVeparas, msgs): if len(msgs) < 10: return [((0, -1), 7)] gVoterLogic = GvoterLogic() boundaries = gVoterLogic.getSplitMessages(configParas, gVeparas, msgs, FType='C') boundaries = self.cvter.border2item(boundaries) #print('ss') #print(len(msgs)) #print(msgs[0]) #print(boundaries) #print('ee') fRange = self.getRanges(msgs) boundaries = self.cvter.filterB(boundaries, fRange) LoRdj = ReAjustLogic(boundaries, msgs) LoRdj.reSplit() LoRdj.reCluster() boundaries = LoRdj.words cWordTypeInfer = WholeFieldTypeInfer(msgs) wordsType = cWordTypeInfer.extractCWords(boundaries) wordsType = self.sortWordsType(wordsType) return wordsType
def __init__(self, messages): self.messages = messages self.wholeFieldInfer= WholeFieldTypeInfer(self.messages)
def __init__(self, messages, locs): self.messages = messages self.locs = locs self.gFieldInfer = WholeFieldTypeInfer()
def __init__(self, messages): super().__init__() self.wholeType = WholeFieldTypeInfer(messages)
class FormatGeneLogic: def __init__(self, messages=None): self.messages = messages self.wordTypeInfer = WholeFieldTypeInfer(self.messages) self.cvter = Converter() self.wcvter = word_convert() self.msgSplt = MsgSpliter() self.dataTuning = DataTuning() self.icsSymTree = IcsSymbolToTree() def getRanges(self, messages): L_len = 65536 for message in messages: if len(message) < L_len: L_len = len(message) return min(23, L_len + 2) def getMesFormat(self): pass def sortWordsType(self, words): words = sorted(words.items(), key=lambda x: x[0][0]) return words def getGFormat(self, congigParas, gVeparas): gVoterLogic = GvoterLogic() boundaries = gVoterLogic.getSplitMessages(congigParas, gVeparas, self.messages, FType='G') boundaries = self.cvter.border2item(boundaries) fRange = self.getRanges(self.messages) LoRdj = ReAjustLogic(boundaries, self.messages) LoRdj.reSplit() LoRdj.reCluster() boundaries = LoRdj.words wordsType = self.wordTypeInfer.extractWords(boundaries, fRange) wordsType = self.sortWordsType(wordsType) boundaries = self.wcvter.itemtoborder(boundaries) return boundaries, wordsType def getCFormat(self, configParas, gVeparas, msgs): if len(msgs) < 10: return [((0, -1), 7)] gVoterLogic = GvoterLogic() boundaries = gVoterLogic.getSplitMessages(configParas, gVeparas, msgs, FType='C') boundaries = self.cvter.border2item(boundaries) #print('ss') #print(len(msgs)) #print(msgs[0]) #print(boundaries) #print('ee') fRange = self.getRanges(msgs) boundaries = self.cvter.filterB(boundaries, fRange) LoRdj = ReAjustLogic(boundaries, msgs) LoRdj.reSplit() LoRdj.reCluster() boundaries = LoRdj.words cWordTypeInfer = WholeFieldTypeInfer(msgs) wordsType = cWordTypeInfer.extractCWords(boundaries) wordsType = self.sortWordsType(wordsType) return wordsType def clsByFunc(self, los): tCls = {} for msg in self.messages: tFunc = msg[los[0]:los[1]] if tFunc not in tCls: tCls[tFunc] = [] tCls[tFunc].append(msg[los[1]:]) return tCls def GTreeGenerate(self, configParas, gVeparas): _, wordsInfer = self.getGFormat(configParas, gVeparas) fcCode = None for word in wordsInfer: if word[1] == 0: fcCode = word[0] break tFunMsgs = self.clsByFunc(fcCode) for fcKey in tFunMsgs: tFunMsgs[fcKey] = self.getCFormat(configParas, gVeparas, tFunMsgs[fcKey]) return wordsInfer, tFunMsgs #print(tFunMsgs[fcKey]) def GTJsonTree(self, configParas, gVeparas): gFormat, cFormats = self.GTreeGenerate(configParas, gVeparas) print(gFormat) groot = self.icsSymTree.icsSymToTree(gFormat, cFormats) return groot.transToIcsDictTree() def changeFormat(self, boundaries, wordsType): boundaries = [boundaries for i in range(len(self.messages))] gForMsg = self.msgSplt.splitMsgByTypes(boundaries, self.messages) wordTHeaders = [] for wordType in wordsType: wordTHeaders.append(self.wordTypeInfer.cVertNumToName(wordType[1])) return wordTHeaders, gForMsg def getGJson(self, congigParas, gVeparas): boundaries, wType = self.getGFormat(congigParas, gVeparas) return self.changeFormat(boundaries, wType) #print(boundaries) def getGF(self, uId=' '): # future uConfig = UserConfig('/home/wxw/data/ToolDatas/15895903730.10.222', '15895903730') gVeParas = GveConf.geneGveParas() return self.getGJson(uConfig, gVeParas) def combineFormats(self): pass def clsMessages(self): pass
def __init__(self): self.wTInfer = WholeFieldTypeInfer()
def __init__(self, words, msgs): self.words = words self.msgs = msgs self.wholeTypeInfer = WholeFieldTypeInfer(self.msgs)