def analyze(self): """ "amplitude" of variance => threshold. change of the amplitude * intra message * TODO across messages """ self._values = MessageAnalyzer.tokenDelta( MessageAnalyzer.tokenDelta(list(self._message.data), self._unit))
def extrema(self) -> List[Tuple[int, bool]]: """ :return: all extrema of the smoothed bcd, each described by a tuple of its index and bool (min is False) """ bcdNR = self.values lmin = MessageAnalyzer.localMinima(bcdNR) lmax = MessageAnalyzer.localMaxima(bcdNR) nrExtrema = sorted([(i, False) for i in lmin[0]] + [(i, True) for i in lmax[0]], key=lambda k: k[0]) return nrExtrema
def analyze(self): ngramEntropies = list() for gram in [gram for gram in self.ngrams(self._n)]: if self._unit == MessageAnalyzer.U_NIBBLE: tokens = MessageAnalyzer.nibblesFromBytes(gram) else: tokens = gram ngramEntropies.append( MessageAnalyzer.calcEntropy(tokens)) # should work for bytes self._values = ngramEntropies
def analyze(self): """ not unit-dependant bit congruence directly between ngrams """ if not self._n: raise ParametersNotSet( 'Analysis parameter missing: N-gram size ("n").') tokenlist = list(self.ngrams(self._n)) self._values = BitCongruence.bitCongruenceBetweenTokens(tokenlist) MessageAnalyzer.analyze(self)
def plotSubfigs(self, analysisResults: List[List[float]], subfigName: List[str]=None, compareValue: List[List[float]]=None, fieldEnds: List[List[int]]=None, markextrema: bool=False, resultsLabel: str=None, compareLabel: str=None, fieldEndMarks: bool=True): """ Plot different aspects about analysis results. :param analysisResults: The results of a message analyzer for each message. :param subfigName: Titles for each subplot. :param compareValue: Values to plot for comparison for each message. :param fieldEnds: True field ends to plot for reference to each message. :param markextrema: If set, plot the local extrema of the analysis results. :param resultsLabel: Label for the results. :param compareLabel: Label for the compare values. :param fieldEndMarks: Mark the field ends with dots on the graph """ # xshift=1 # shift to the right by one, since we want to see the value for x at position x+1 self.plotInEachAx(analysisResults, linestyle=MessagePlotter.STYLE_ALTMAINLINE \ if not resultsLabel else dict(MessagePlotter.STYLE_ALTMAINLINE, label=resultsLabel) ) if markextrema: self.scatterInEachAx([MessageAnalyzer.localMinima(values) for values in analysisResults]) self.scatterInEachAx([MessageAnalyzer.localMaxima(values) for values in analysisResults]) if compareValue: self.plotInEachAx(compareValue, linestyle=MessagePlotter.STYLE_COMPARELINE \ if not compareLabel else dict(MessagePlotter.STYLE_COMPARELINE, label=compareLabel) ) if fieldEnds: self.fieldmarkersInEachAx(fieldEnds) if fieldEndMarks: try: self.scatterInEachAx( [ (fe[:-1], [ar[endbyte] for endbyte in fe[:-1] ]) for fe, ar in zip(fieldEnds, analysisResults) ], marker='.' ) except IndexError: print('Error: Dissector field index and message are contradicting. Field ends could not be marked.\n' 'Check dissector and message.') if subfigName: self.nameEachAx(subfigName) if resultsLabel or compareLabel: plt.legend()
def pinpointMaxima(self): """ Pinpoint the exact positions of local maxima within the scope of each smoothed local maximum. The exact position is looked for in self.bitcongruences. Only those extrema of the smoothed graph are taken into account which are above the sensitivity threshold. :return: One exact local maximum m in the interval ( center(m_n-1, m_n), center(m_n, m_n+1) ) for each n in (0, smoothed local maximum, -1) """ from itertools import compress localmaxima = MessageAnalyzer.localMaxima( self.values) # List[idx], List[max] allovermax = max(localmaxima[1]) maxSmsk = [ True if e > self._sensitivity * allovermax else False for e in localmaxima[1] ] lmaxAO = [0] + list(compress(localmaxima[0], maxSmsk)) + [len(self._message.data)] lmaxMed = (numpy.round(numpy.ediff1d(lmaxAO) / 2) + lmaxAO[:-1]).astype(int) bclmaxs = [ medl + numpy.argmax(self.bitcongruences[medl:medr]) for medl, medr in zip(lmaxMed[:-1], lmaxMed[1:]) ] return bclmaxs
def analyze(self): """ 2nd order delta of bitwise congruence. see :func:`MessageAnalyzer.bitCongruence()` not unit-dependant, always byte-wise :return: list of amplitudes of bit congruences from index i = 1 to n between bits of i-1 and i """ super().analyze() self._values = MessageAnalyzer.tokenDelta(self._values) self._startskip += 1
def analyze(self): """ bit congruence compared to number of bytes of horizon backwards. :return: """ if not self._analysisArgs: raise ParametersNotSet('Analysis parameter missing: horizon.') horizon = self._analysisArgs[0] self._startskip += horizon tokenlist = self._message.data # tokenlist could also be list of ngrams. bitcongruences = BitCongruence.bitCongruenceBetweenTokens(tokenlist) mbhBitVar = list() for idx, token in enumerate(bitcongruences[horizon:], horizon): congruenceUptoHorizon = numpy.mean(bitcongruences[idx - 2:idx]) mbVar = token - congruenceUptoHorizon mbhBitVar.append(mbVar) self._values = mbhBitVar # add this object to the cache MessageAnalyzer.analyze(self)
def bcHighPlateaus(self): """ :return: Plateaus in the bit congruence at high level (> 0.8) """ plateauElevation = 0.8 plat = MessageAnalyzer.plateouStart(self.bitcongruences) # filter for plateaus of high bit congruence hiPlat = ([], []) for ix, vl in zip(plat[0], plat[1]): if vl > plateauElevation: hiPlat[0].append(ix) hiPlat[1].append(vl) return hiPlat
def analyze(self): """ Delta of bitwise congruence. see :func:`MessageAnalyzer.bitCongruence` not unit-dependant, always byte-wise :return: list of amplitudes of bit congruence from index i = 1 to n between bits of i-1 and i """ super().analyze() self._bcvalues = self._values self._values = MessageAnalyzer.tokenDelta(self._values) self._startskip += 1 assert self._startskip + len(self._values) == len(self._message.data), \ "{} + {} != {}".format(self._startskip, len(self._values), len(self._message.data))
def annotateFieldTypes( analyzerType: type, analysisArgs: Union[Tuple, None], comparator, unit=MessageAnalyzer.U_BYTE) -> List[Tuple[TypedSegment]]: """ :return: list of lists of segments that are annotated with their field type. """ segmentedMessages = [ segmentsFromLabels( MessageAnalyzer.findExistingAnalysis(analyzerType, unit, l4msg, analysisArgs), comparator.dissections[rmsg]) for l4msg, rmsg in comparator.messages.items() ] return segmentedMessages
def pinpointMinima(self): """ Pinpoint the exact positions of local minima within the scope of each smoothed local minimum. The exact position is looked for in self.bitcongruences. :return: One exact local minium m in the interval ( center(m_n-1, m_n), center(m_n, m_n+1) ) for each n in (0, smoothed local minimum, -1) """ localminima = MessageAnalyzer.localMinima( self.values) # List[idx], List[min] # localmaxima = MessageAnalyzer.localMaxima(self.values) # List[idx], List[max] # for lminix in range(len(localminima)): # localminima[lminix] lminAO = [0] + localminima[0] + [len(self._message.data)] lminMed = (numpy.round(numpy.ediff1d(lminAO) / 2) + lminAO[:-1]).astype(int) bclmins = [ medl + numpy.argmin(self.bitcongruences[medl:medr]) for medl, medr in zip(lminMed[:-1], lminMed[1:]) ] return bclmins
def messageSegmentation(self) -> List[MessageSegment]: """ Segment message by determining local extrema of sigma-s-gauss-filtered sliding n-byte-mean bit-congruence. >>> from netzob.Model.Vocabulary.Messages.L4NetworkMessage import L4NetworkMessage >>> tstmsg = '19040aec0000027b000012850a6400c8d23d06a2535ed71ed23d09faa4673315d23d09faa1766325d23d09faa17b4b10' >>> l4m = L4NetworkMessage(bytes.fromhex(tstmsg)) >>> hbg = HorizonBitcongruenceGauss(l4m) >>> hbg.setAnalysisParams() >>> hbg.analyze() >>> spm = hbg.messageSegmentation() >>> print(b''.join([seg.bytes for seg in spm]).hex() == spm[0].message.data.hex()) True :return: Segmentation of this message based on this analyzer's type. """ if not self.values: if not self._analysisArgs: raise ValueError('No values or analysis parameters set.') self.analyze() bcd = MessageAnalyzer.findExistingAnalysis(BitCongruenceDelta, MessageAnalyzer.U_BYTE, self.message) # all local minima bclmins = self.pinpointMinima() # local maxima, if bc[e] < bc[e+1] or bc[e] > 2*s2mbc[e] for all e in cadidate indices bclmaxs = self.pinpointMaxima() bcdmaxs = [ e for e in bclmaxs if bcd.values[e + 1] > bcd.values[e] or bcd.values[e] > 2 * self.bitcongruences[e] ] minmax = bclmins for bdm in bcdmaxs: # only keep bcdmaxs if not in scope if min if bdm + 1 not in minmax and bdm - 1 not in minmax: minmax.append(bdm) # starts of plateaus of bit congruences bcplats = MessageAnalyzer.plateouStart( self.bitcongruences)[0] # bcd.values for bps in bcplats: # only keep platoustarts if not in scope if min or max if bps + 1 not in minmax and bps - 1 not in minmax: minmax.append(bps) # # separate nan-values # nansep = MessageAnalyzer.separateNaNs(self.values) relevantPositions = list(sorted(minmax)) # get candidates to cut segments from message cutCandidates = [0] + [int(b) for b in relevantPositions if not numpy.isnan(b)] \ + [len(self._message.data)] # add the message end # cut only where a segment is of a length larger than 1 cutPositions = [0] + [ right for left, right in zip(cutCandidates[:-1], cutCandidates[1:]) if right - left > 1 ] # cutPositions = list(sorted(cutPositions + nansep[0])) # add the end of the message if its not already there if cutPositions[-1] != cutCandidates[-1]: cutPositions[-1] = cutCandidates[-1] segments = list() for lmaxCurr, lmaxNext in zip(cutPositions[:-1], cutPositions[1:]): segments.append(MessageSegment(self, lmaxCurr, lmaxNext - lmaxCurr)) return segments
def analyze(self): """ Relative variance of single message bytes. """ self._values = MessageAnalyzer.tokenDelta(list(self._message.data), self._unit)
def setAnalysisParams(self, analysisMethod: Type[MessageAnalyzer], *analysisArgs): self._am = MessageAnalyzer.findExistingAnalysis( analysisMethod, MessageAnalyzer.U_BYTE, self._message, analysisArgs)
def cacheAndLoadDC(pcapfilename: str, analysisTitle: str, tokenizer: str, debug: bool, analyzerType: type, analysisArgs: Tuple=None, sigma: float=None, filterTrivial=False, refinementCallback:Union[Callable, None] = refinements, disableCache=False) \ -> Tuple[SpecimenLoader, MessageComparator, List[Tuple[MessageSegment]], DistanceCalculator, float, float]: """ cache or load the DistanceCalculator to or from the filesystem :param filterTrivial: Filter out **one-byte** segments and such just consisting of **zeros**. :param disableCache: When experimenting with distances manipulation, deactivate caching! :return: """ pcapbasename = os.path.basename(pcapfilename) # if refinementCallback == pcaMocoRefinements: # sigma = pcamocoSigmapertrace[pcapbasename] if not sigma and pcapbasename in pcamocoSigmapertrace else \ # 0.9 if not sigma else sigma # else: sigma = sigmapertrace[pcapbasename] if not sigma and pcapbasename in sigmapertrace else \ 0.9 if not sigma else sigma pcapName = os.path.splitext(pcapbasename)[0] # noinspection PyUnboundLocalVariable tokenparm = tokenizer if tokenizer != "nemesys" else \ "{}{:.0f}".format(tokenizer, sigma * 10) dccachefn = os.path.join( cacheFolder, 'cache-dc-{}-{}-{}-{}-{}.{}'.format( analysisTitle, tokenparm, "filtered" if filterTrivial else "all", refinementCallback.__name__ if refinementCallback is not None else "raw", pcapName, 'ddc')) # dccachefn = 'cache-dc-{}-{}-{}.{}'.format(analysisTitle, tokenizer, pcapName, 'dc') if disableCache or not os.path.exists(dccachefn): # dissect and label messages print("Load messages from {}...".format(pcapName)) specimens = SpecimenLoader(pcapfilename, 2, True) comparator = MessageComparator(specimens, 2, True, debug=debug) print("Segmenting messages...", end=' ') segmentationTime = time.time() # select tokenizer by command line parameter if tokenizer == "tshark": # 1. segment messages according to true fields from the labels segmentedMessages = annotateFieldTypes(analyzerType, analysisArgs, comparator) elif tokenizer == "4bytesfixed": # 2. segment messages into fixed size chunks for testing segmentedMessages = segmentsFixed(4, comparator, analyzerType, analysisArgs) elif tokenizer == "nemesys": # 3. segment messages by NEMESYS segmentsPerMsg = bcDeltaGaussMessageSegmentation(specimens, sigma) # get analyzer requested by analyzerType/analysisArgs segmentedMessages = [[ MessageSegment( MessageAnalyzer.findExistingAnalysis( analyzerType, MessageAnalyzer.U_BYTE, seg.message, analysisArgs), seg.offset, seg.length) for seg in msg ] for msg in segmentsPerMsg] if refinementCallback is not None: if refinementCallback.__code__.co_argcount > 1: # assume the second argument is expected to be a distance calculator chainedSegments = list( chain.from_iterable(segmentedMessages)) print("Refinement: Calculate distance for {} segments...". format(len(chainedSegments))) if len(chainedSegments)**2 > MemmapDC.maxMemMatrix: refinementDC = MemmapDC(chainedSegments) else: refinementDC = DelegatingDC(chainedSegments) segmentedMessages = refinementCallback( segmentedMessages, refinementDC) else: segmentedMessages = refinementCallback(segmentedMessages) # segments = list(chain.from_iterable(segmentedMessages)) segmentationTime = time.time() - segmentationTime print("done.") if filterTrivial: # noinspection PyUnboundLocalVariable chainedSegments = [ seg for seg in chain.from_iterable(segmentedMessages) if seg.length > 1 and set(seg.values) != {0} ] else: # noinspection PyUnboundLocalVariable chainedSegments = list(chain.from_iterable(segmentedMessages)) print("Calculate distance for {} segments...".format( len(chainedSegments))) # dc = DistanceCalculator(chainedSegments, reliefFactor=0.33) # Pairwise similarity of segments: dc.distanceMatrix dist_calc_segmentsTime = time.time() if len(chainedSegments)**2 > MemmapDC.maxMemMatrix: dc = MemmapDC(chainedSegments) else: dc = DelegatingDC(chainedSegments) assert chainedSegments == dc.rawSegments dist_calc_segmentsTime = time.time() - dist_calc_segmentsTime try: with open(dccachefn, 'wb') as f: pickle.dump((segmentedMessages, comparator, dc), f, pickle.HIGHEST_PROTOCOL) except MemoryError as e: print("DC could not be cached due to a MemoryError. Removing", dccachefn, "and continuing.") os.remove(dccachefn) else: print("Load distances from cache file {}".format(dccachefn)) with open(dccachefn, 'rb') as f: segmentedMessages, comparator, dc = pickle.load(f) if not (isinstance(comparator, MessageComparator) and isinstance(dc, DistanceCalculator)): print('Loading of cached distances failed.') exit(10) specimens = comparator.specimens # chainedSegments = list(chain.from_iterable(segmentedMessages)) segmentationTime, dist_calc_segmentsTime = None, None return specimens, comparator, segmentedMessages, dc, segmentationTime, dist_calc_segmentsTime