def split(self): newmsg = list() for sid, seg in enumerate( self.segments ): # enum necessary to change to in place edit after debug (want to do?) didReplace = False for comfeat in self._moco: comoff = seg.bytes.find(comfeat) if comoff == -1: # comfeat not in moco, continue with next in moco continue featlen = len(comfeat) if seg.length == featlen: # its already the concise frequent feature newmsg.append(seg) else: if CropDistinct._debug: print("\nReplaced {} by:".format(seg.bytes.hex()), end=" ") absco = seg.offset + comoff if comoff > 0: segl = MessageSegment(seg.analyzer, seg.offset, comoff) newmsg.append(segl) if CropDistinct._debug: print(segl.bytes.hex(), end=" ") segc = MessageSegment(seg.analyzer, absco, featlen) newmsg.append(segc) if CropDistinct._debug: print(segc.bytes.hex(), end=" ") rlen = seg.length - comoff - featlen if rlen > 0: segr = MessageSegment(seg.analyzer, absco + featlen, rlen) newmsg.append(segr) if CropDistinct._debug: print(segr.bytes.hex(), end=" ") didReplace = True break # only most common match!? otherwise how to handle subsequent matches after split(s)? if not didReplace: newmsg.append(seg) elif CropDistinct._debug: print() return newmsg
def wobbleSegmentInMessage(segment: MessageSegment): """ At start for now. For end if would be, e. g.: if segment.nextOffset < len(segment.message.data): segment.nextOffset + 1 :param segment: :return: """ wobbles = [segment] if segment.offset > 0: wobbles.append( MessageSegment(segment.analyzer, segment.offset - 1, segment.length + 1)) if segment.length > 1: wobbles.append( MessageSegment(segment.analyzer, segment.offset + 1, segment.length - 1)) return wobbles
def calcHexDist(hexA, hexB): from netzob.Model.Vocabulary.Messages.RawMessage import RawMessage from nemere.inference.analyzers import Value from nemere.inference.segments import MessageSegment from nemere.inference.templates import DistanceCalculator bytedata = [bytes.fromhex(hexA),bytes.fromhex(hexB)] messages = [RawMessage(bd) for bd in bytedata] analyzers = [Value(message) for message in messages] segments = [MessageSegment(analyzer, 0, len(analyzer.message.data)) for analyzer in analyzers] dc = DistanceCalculator(segments) return dc.pairDistance(*segments)
def split(self): """ Perform the splitting of the segments. :return: List of segments splitted from the input. """ segmentStack = list(reversed(self.segments[1:])) mangledSegments = [self.segments[0]] if len(self.segments) > 1: while segmentStack: segc = segmentStack.pop() segl = mangledSegments[-1] if segl.offset + segl.length == segc.offset: # compare byte pairs' frequency splitshift = self.lookupLeastFrequent(segc) if ( 0 > splitshift >= -segl.length) \ or (0 < splitshift <= segc.length): if segl.length != -splitshift: mangledSegments[-1] = MessageSegment( mangledSegments[-1].analyzer, mangledSegments[-1].offset, mangledSegments[-1].length + splitshift) else: # segment to the left completely used up in center del mangledSegments[-1] if self._debug: print("Recombined segments: \n{} and {} into ". format(segl, segc)) segc = MessageSegment(segc.analyzer, segc.offset + splitshift, segc.length - splitshift) if self._debug: print("{} and {}".format( mangledSegments[-1] if mangledSegments else 'Empty', segc)) mangledSegments.append(segc) return mangledSegments
def generateTestSegments(): from netzob.Model.Vocabulary.Messages.RawMessage import RawMessage from nemere.inference.analyzers import Value from nemere.inference.segments import MessageSegment bytedata = [ bytes([1, 2, 3, 4]), bytes([2, 3, 4]), bytes([1, 3, 4]), bytes([2, 4]), bytes([2, 3]), bytes([20, 30, 37, 50, 69, 2, 30]), bytes([37, 5, 69]), bytes([0, 0, 0, 0]), bytes([3, 2, 3, 4]) ] messages = [RawMessage(bd) for bd in bytedata] analyzers = [Value(message) for message in messages] segments = [MessageSegment(analyzer, 0, len(analyzer.message.data)) for analyzer in analyzers] return segments
def split(self, segmentID: int, chunkLength: int): """ :param segmentID: The index of the segment to split within the sequence of segments composing the message :param chunkLength: The fixed length of the target segments in bytes :return: The message segments with the given segment replaced by multiple segments of the given fixed length. """ selSeg = self.segments[segmentID] if chunkLength < selSeg.length: newSegs = list() for chunkoff in range(selSeg.offset, selSeg.nextOffset, chunkLength): remainLen = selSeg.nextOffset - chunkoff newSegs.append( MessageSegment(selSeg.analyzer, chunkoff, min(remainLen, chunkLength))) newmsg = self.segments[:segmentID] + newSegs + self.segments[ segmentID + 1:] return newmsg else: return self.segments
def merge(self): """ Perform the merging. :return: a new set of segments after the input has been merged """ mergedSegments = self.segments[0:1] if len(self.segments) > 1: for segl, segr in zip(self.segments[:-1], self.segments[1:]): # TODO check for equal analyzer, requires implementing a suitable equality-check in analyzer # from inference.MessageAnalyzer import MessageAnalyzer if segl.offset + segl.length == segr.offset and self.condition( segl, segr): mergedSegments[-1] = MessageSegment( mergedSegments[-1].analyzer, mergedSegments[-1].offset, mergedSegments[-1].length + segr.length) if self._debug: print( "Merged segments: \n{} and \n{} into \n{}".format( segl, segr, mergedSegments[-1])) else: mergedSegments.append(segr) return mergedSegments
def symbolsFromSegments( segmentsPerMsg: Iterable[Sequence[MessageSegment]]) -> List[Symbol]: """ Generate a list of Netzob Symbols from the given lists of segments for each message. >>> from nemere.inference.segmentHandler import symbolsFromSegments >>> from nemere.inference.segments import MessageSegment >>> from nemere.inference.analyzers import Value >>> from netzob.Model.Vocabulary.Symbol import Symbol >>> from netzob.Model.Vocabulary.Messages.RawMessage import RawMessage >>> # prevent Netzob from producing debug output. >>> import logging >>> logging.getLogger().setLevel(30) >>> >>> dummymsg = RawMessage(bytes(list(range(50, 70)))) >>> dummyana = Value(dummymsg) >>> testgapped = [[ MessageSegment(dummyana, 0, 2), MessageSegment(dummyana, 5, 2), MessageSegment(dummyana, 7, 6), ... MessageSegment(dummyana, 17, 2) ]] >>> symbol = symbolsFromSegments(testgapped)[0] >>> print(symbol) Field | Field | Field | Field | Field | Field | Field ----- | ----- | ----- | -------- | ------ | ----- | ----- '23' | '456' | '78' | '9:;<=>' | '?@AB' | 'CD' | 'E'... ----- | ----- | ----- | -------- | ------ | ----- | ----- Intermediately produces: ``` from pprint import pprint pprint(filledSegments) [[MessageSegment 2 bytes at (0, 2): 1415 | values: (20, 21), MessageSegment 3 bytes at (2, 5): 161718 | values: (22, 23, 24), MessageSegment 2 bytes at (5, 7): 191a | values: (25, 26), MessageSegment 6 bytes at (7, 13): 1b1c1d1e1f20 | values: (27, 28, 29..., MessageSegment 4 bytes at (13, 17): 21222324 | values: (33, 34, 35..., MessageSegment 2 bytes at (17, 19): 2526 | values: (37, 38)]] ```` :param segmentsPerMsg: List of messages, represented by lists of segments. :return: list of Symbols, one for each entry in the given iterable of lists. """ sortedSegments = (sorted(segSeq, key=lambda f: f.offset) for segSeq in segmentsPerMsg) filledSegments = list() for segSeq in sortedSegments: assert len(segSeq) > 0 filledGaps = list() for segment in segSeq: lastoffset = filledGaps[-1].nextOffset if len( filledGaps) > 0 else 0 if segment.offset > lastoffset: gaplength = segment.offset - lastoffset filledGaps.append( MessageSegment(segment.analyzer, lastoffset, gaplength)) filledGaps.append(segment) # check for required trailing segment lastoffset = filledGaps[-1].nextOffset msglen = len(filledGaps[-1].message.data) if lastoffset < msglen: gaplength = msglen - lastoffset filledGaps.append( MessageSegment(filledGaps[-1].analyzer, lastoffset, gaplength)) filledSegments.append(filledGaps) return [ Symbol([Field(segment.bytes) for segment in segSeq], messages=[segSeq[0].message], name=f"nemesys Symbol {i}") for i, segSeq in enumerate(filledSegments) ]
def fixedlengthSegmenter(length: int, specimens: BaseLoader, analyzerType: type, analysisArgs: Union[Tuple, None], unit=MessageAnalyzer.U_BYTE, padded=False) \ -> List[Tuple[MessageSegment]]: """ Segment messages into fixed size chunks. >>> from nemere.utils.loader import SpecimenLoader >>> from nemere.validation.dissectorMatcher import MessageComparator >>> from nemere.inference.analyzers import Value >>> from nemere.inference.segmentHandler import fixedlengthSegmenter >>> specimens = SpecimenLoader("../input/deduped-orig/ntp_SMIA-20111010_deduped-100.pcap", 2, True) >>> comparator = MessageComparator(specimens, 2, True, debug=False) Wait for tshark output (max 20s)... >>> segmentedMessages = fixedlengthSegmenter(4, specimens, Value, None) >>> areIdentical = True >>> for msgsegs in segmentedMessages: ... msg = msgsegs[0].message ... msgbytes = b"".join([seg.bytes for seg in msgsegs]) ... areIdentical = areIdentical and msgbytes == msg.data >>> print(areIdentical) True :param length: Fixed length for all segments. Overhanging segments at the end that are shorter than length will be padded with NANs. :param specimens: Loader utility class that contains the payload messages. :param analyzerType: Type of the analysis. Subclass of inference.analyzers.MessageAnalyzer. :param analysisArgs: Arguments for the analysis method. :param unit: Base unit for the analysis. Either MessageAnalyzer.U_BYTE or MessageAnalyzer.U_NIBBLE. :param padded: Toggle to pad the last segment to the requested fixed length or leave the last segment to be shorter than length if the message length is not an exact multiple of the segment length. :return: Segments of the analyzer's message according to the true format. """ segments = list() for l4msg, rmsg in specimens.messagePool.items(): if len(l4msg.data) % length == 0: # exclude the overlap lastOffset = len(l4msg.data) else: lastOffset = (len(l4msg.data) // length) * length originalAnalyzer = MessageAnalyzer.findExistingAnalysis( analyzerType, unit, l4msg, analysisArgs) sequence = [ MessageSegment(originalAnalyzer, offset, length) for offset in range(0, lastOffset, length) ] if len(l4msg.data) > lastOffset: # append the overlap if padded: # here are nasty hacks! # TODO Better define a new subclass of MessageSegment that internally padds values # (and bytes? what are the guarantees?) to a given length that exceeds the message length residuepadd = lastOffset + length - len(l4msg.data) newMessage = copy.copy(originalAnalyzer.message) newMessage.data = newMessage.data + b'\x00' * residuepadd newAnalyzer = type(originalAnalyzer)( newMessage, originalAnalyzer.unit) # type: MessageAnalyzer newAnalyzer.setAnalysisParams(*originalAnalyzer.analysisParams) padd = [numpy.nan] * residuepadd newAnalyzer._values = originalAnalyzer.values + padd newSegment = MessageSegment(newAnalyzer, lastOffset, length) for seg in sequence: # replace all previous analyzers to make the sequence homogeneous for this message seg.analyzer = newAnalyzer sequence.append(newSegment) else: newSegment = MessageSegment(originalAnalyzer, lastOffset, len(l4msg.data) - lastOffset) sequence.append(newSegment) segments.append(tuple(sequence)) return segments
def merge(self): """ Perform the merging. >>> from nemere.utils.loader import SpecimenLoader >>> from nemere.inference.segmentHandler import bcDeltaGaussMessageSegmentation >>> from nemere.inference.formatRefinement import CumulativeCharMerger >>> sl = SpecimenLoader('../input/deduped-orig/dns_ictf2010_deduped-100.pcap', layer=0, relativeToIP=True) >>> segmentsPerMsg = bcDeltaGaussMessageSegmentation(sl) Segmentation by inflections of sigma-0.6-gauss-filtered bit-variance. >>> for messageSegments in segmentsPerMsg: ... ccm = CumulativeCharMerger(messageSegments) ... ccmmsg = ccm.merge() ... if ccmmsg != messageSegments: ... sgms = b''.join([m.bytes for m in ccmmsg]) ... sgss = b''.join([m.bytes for m in messageSegments]) ... if sgms != sgss: ... print("Mismatch!") :return: a new set of segments after the input has been merged """ minLen = 6 segmentStack = list(reversed(self.segments)) newmsg = list() isCharCand = False workingStack = list() while segmentStack: workingStack.append(segmentStack.pop()) if sum([len(ws.bytes) for ws in workingStack]) < minLen: continue # now we have 6 bytes # and the merge is a new char candidate joinedbytes = b"".join([ws.bytes for ws in workingStack]) if isExtendedCharSeq(joinedbytes) \ and b"\x00\x00" not in joinedbytes: isCharCand = True continue # the last segment ended the char candidate elif isCharCand: isCharCand = False if len(workingStack) > 2: newlen = sum([ws.length for ws in workingStack[:-1]]) newseg = MessageSegment(workingStack[0].analyzer, workingStack[0].offset, newlen) newmsg.append(newseg) else: # retain the original segment (for equality test and to save creating a new object instance) newmsg.append(workingStack[0]) if len(workingStack) > 1: segmentStack.append(workingStack[-1]) workingStack = list() # there was not a char candidate else: newmsg.append(workingStack[0]) for ws in reversed(workingStack[1:]): segmentStack.append(ws) workingStack = list() # there are segments in the working stack left if len(workingStack) > 1 and isCharCand: newlen = sum([ws.length for ws in workingStack]) newseg = MessageSegment(workingStack[0].analyzer, workingStack[0].offset, newlen) newmsg.append(newseg) # there was no char sequence and there are segments in the working stack left else: newmsg.extend(workingStack) return newmsg
def split(self): """ Perform the splitting of the segments. :return: List of segments splitted from the input. """ segmentStack = list(reversed(self.segments)) mangledSegments = list() if len(self.segments) > 1: while segmentStack: # TODO check for equal analyzer, requires equality-check in analyzer # from inference.MessageAnalyzer import MessageAnalyzer segc = segmentStack.pop() # TODO: this is char specific only! if not isPrintable(segc.bytes): # cancel split relocation mangledSegments.append(segc) continue if mangledSegments: # integrate segment to the left into center segl = mangledSegments[-1] if segl.offset + segl.length == segc.offset: splitpos = self.toTheLeft(segl) # segment to the left ends with chars, add them to the center segment if splitpos < segl.length: if splitpos > 0: mangledSegments[-1] = MessageSegment( mangledSegments[-1].analyzer, mangledSegments[-1].offset, splitpos) else: # segment to the left completely used up in center del mangledSegments[-1] restlen = segl.length - splitpos if self._debug: print("Recombined segments: \n{} and {} into ". format(segl, segc)) segc = MessageSegment(segc.analyzer, segc.offset - restlen, segc.length + restlen) if self._debug: print("{} and {}".format( mangledSegments[-1] if mangledSegments else 'Empty', segc)) if segmentStack: # integrate segment to the right into center segr = segmentStack[-1] if segc.offset + segc.length == segr.offset: splitpos = self.toTheRight(segr) # segment to the right starts with chars, add them to the center segment if splitpos > 0: if segr.length - splitpos > 0: segmentStack[-1] = MessageSegment( segr.analyzer, segr.offset + splitpos, segr.length - splitpos) else: # segment to the right completely used up in center del segmentStack[-1] if self._debug: print("Recombined segments: \n{} and {} into ". format(segc, segr)) segc = MessageSegment(segc.analyzer, segc.offset, segc.length + splitpos) if self._debug: print("{} and {}".format( segc, segmentStack[-1] if segmentStack else 'Empty')) mangledSegments.append(segc) return mangledSegments