Ejemplo n.º 1
0
    def analyze(self):
        """
        "amplitude" of variance => threshold.

        change of the amplitude
            * intra message
            * TODO across messages
        """
        self._values = MessageAnalyzer.tokenDelta(
            MessageAnalyzer.tokenDelta(list(self._message.data), self._unit))
Ejemplo n.º 2
0
 def extrema(self) -> List[Tuple[int, bool]]:
     """
     :return: all extrema of the smoothed bcd, each described by a tuple of its index and bool (min is False)
     """
     bcdNR = self.values
     lmin = MessageAnalyzer.localMinima(bcdNR)
     lmax = MessageAnalyzer.localMaxima(bcdNR)
     nrExtrema = sorted([(i, False) for i in lmin[0]] + [(i, True)
                                                         for i in lmax[0]],
                        key=lambda k: k[0])
     return nrExtrema
Ejemplo n.º 3
0
    def analyze(self):
        ngramEntropies = list()
        for gram in [gram for gram in self.ngrams(self._n)]:
            if self._unit == MessageAnalyzer.U_NIBBLE:
                tokens = MessageAnalyzer.nibblesFromBytes(gram)
            else:
                tokens = gram

            ngramEntropies.append(
                MessageAnalyzer.calcEntropy(tokens))  # should work for bytes
        self._values = ngramEntropies
Ejemplo n.º 4
0
    def analyze(self):
        """
        not unit-dependant

        bit congruence directly between ngrams
        """
        if not self._n:
            raise ParametersNotSet(
                'Analysis parameter missing: N-gram size ("n").')
        tokenlist = list(self.ngrams(self._n))
        self._values = BitCongruence.bitCongruenceBetweenTokens(tokenlist)
        MessageAnalyzer.analyze(self)
Ejemplo n.º 5
0
    def plotSubfigs(self, analysisResults: List[List[float]], subfigName: List[str]=None,
                    compareValue: List[List[float]]=None, fieldEnds: List[List[int]]=None,
                    markextrema: bool=False,
                    resultsLabel: str=None, compareLabel: str=None, fieldEndMarks: bool=True):
        """
        Plot different aspects about analysis results.

        :param analysisResults: The results of a message analyzer for each message.
        :param subfigName: Titles for each subplot.
        :param compareValue: Values to plot for comparison for each message.
        :param fieldEnds: True field ends to plot for reference to each message.
        :param markextrema: If set, plot the local extrema of the analysis results.
        :param resultsLabel: Label for the results.
        :param compareLabel: Label for the compare values.
        :param fieldEndMarks: Mark the field ends with dots on the graph
        """

        # xshift=1  # shift to the right by one, since we want to see the value for x at position x+1
        self.plotInEachAx(analysisResults,
                          linestyle=MessagePlotter.STYLE_ALTMAINLINE \
                              if not resultsLabel else dict(MessagePlotter.STYLE_ALTMAINLINE,
                                                            label=resultsLabel)
                          )
        if markextrema:
            self.scatterInEachAx([MessageAnalyzer.localMinima(values) for values in analysisResults])
            self.scatterInEachAx([MessageAnalyzer.localMaxima(values) for values in analysisResults])

        if compareValue:
            self.plotInEachAx(compareValue,
                              linestyle=MessagePlotter.STYLE_COMPARELINE \
                                  if not compareLabel else dict(MessagePlotter.STYLE_COMPARELINE,
                                                                label=compareLabel)
                              )

        if fieldEnds:
            self.fieldmarkersInEachAx(fieldEnds)
            if fieldEndMarks:
                try:
                    self.scatterInEachAx(
                        [ (fe[:-1], [ar[endbyte] for endbyte in fe[:-1] ])
                          for fe, ar in zip(fieldEnds, analysisResults) ],
                        marker='.'
                    )
                except IndexError:
                    print('Error: Dissector field index and message are contradicting. Field ends could not be marked.\n'
                          'Check dissector and message.')

        if subfigName:
            self.nameEachAx(subfigName)

        if resultsLabel or compareLabel:
            plt.legend()
Ejemplo n.º 6
0
    def pinpointMaxima(self):
        """
        Pinpoint the exact positions of local maxima within the scope of each smoothed local maximum.
        The exact position is looked for in self.bitcongruences.
        Only those extrema of the smoothed graph are taken into account which are above the sensitivity threshold.

        :return: One exact local maximum m in the interval ( center(m_n-1, m_n), center(m_n, m_n+1) )
            for each n in (0, smoothed local maximum, -1)
        """
        from itertools import compress

        localmaxima = MessageAnalyzer.localMaxima(
            self.values)  # List[idx], List[max]
        allovermax = max(localmaxima[1])
        maxSmsk = [
            True if e > self._sensitivity * allovermax else False
            for e in localmaxima[1]
        ]
        lmaxAO = [0] + list(compress(localmaxima[0],
                                     maxSmsk)) + [len(self._message.data)]
        lmaxMed = (numpy.round(numpy.ediff1d(lmaxAO) / 2) +
                   lmaxAO[:-1]).astype(int)
        bclmaxs = [
            medl + numpy.argmax(self.bitcongruences[medl:medr])
            for medl, medr in zip(lmaxMed[:-1], lmaxMed[1:])
        ]
        return bclmaxs
Ejemplo n.º 7
0
    def analyze(self):
        """
        2nd order delta of bitwise congruence. see :func:`MessageAnalyzer.bitCongruence()`

        not unit-dependant, always byte-wise

        :return: list of amplitudes of bit congruences from index i = 1 to n between bits of i-1 and i
        """
        super().analyze()
        self._values = MessageAnalyzer.tokenDelta(self._values)
        self._startskip += 1
Ejemplo n.º 8
0
    def analyze(self):
        """
        bit congruence compared to number of bytes of horizon backwards.

        :return:
        """
        if not self._analysisArgs:
            raise ParametersNotSet('Analysis parameter missing: horizon.')
        horizon = self._analysisArgs[0]
        self._startskip += horizon

        tokenlist = self._message.data  # tokenlist could also be list of ngrams.
        bitcongruences = BitCongruence.bitCongruenceBetweenTokens(tokenlist)

        mbhBitVar = list()
        for idx, token in enumerate(bitcongruences[horizon:], horizon):
            congruenceUptoHorizon = numpy.mean(bitcongruences[idx - 2:idx])
            mbVar = token - congruenceUptoHorizon
            mbhBitVar.append(mbVar)
        self._values = mbhBitVar

        # add this object to the cache
        MessageAnalyzer.analyze(self)
Ejemplo n.º 9
0
    def bcHighPlateaus(self):
        """
        :return: Plateaus in the bit congruence at high level (> 0.8)
        """
        plateauElevation = 0.8
        plat = MessageAnalyzer.plateouStart(self.bitcongruences)

        # filter for plateaus of high bit congruence
        hiPlat = ([], [])
        for ix, vl in zip(plat[0], plat[1]):
            if vl > plateauElevation:
                hiPlat[0].append(ix)
                hiPlat[1].append(vl)
        return hiPlat
Ejemplo n.º 10
0
    def analyze(self):
        """
        Delta of bitwise congruence. see :func:`MessageAnalyzer.bitCongruence`

        not unit-dependant, always byte-wise

        :return: list of amplitudes of bit congruence from index i = 1 to n between bits of i-1 and i
        """
        super().analyze()
        self._bcvalues = self._values
        self._values = MessageAnalyzer.tokenDelta(self._values)
        self._startskip += 1
        assert self._startskip + len(self._values) == len(self._message.data), \
            "{} + {} != {}".format(self._startskip, len(self._values), len(self._message.data))
Ejemplo n.º 11
0
def annotateFieldTypes(
        analyzerType: type,
        analysisArgs: Union[Tuple, None],
        comparator,
        unit=MessageAnalyzer.U_BYTE) -> List[Tuple[TypedSegment]]:
    """
    :return: list of lists of segments that are annotated with their field type.
    """
    segmentedMessages = [
        segmentsFromLabels(
            MessageAnalyzer.findExistingAnalysis(analyzerType, unit, l4msg,
                                                 analysisArgs),
            comparator.dissections[rmsg])
        for l4msg, rmsg in comparator.messages.items()
    ]
    return segmentedMessages
Ejemplo n.º 12
0
    def pinpointMinima(self):
        """
        Pinpoint the exact positions of local minima within the scope of each smoothed local minimum.
        The exact position is looked for in self.bitcongruences.

        :return: One exact local minium m in the interval ( center(m_n-1, m_n), center(m_n, m_n+1) )
            for each n in (0, smoothed local minimum, -1)
        """
        localminima = MessageAnalyzer.localMinima(
            self.values)  # List[idx], List[min]
        # localmaxima = MessageAnalyzer.localMaxima(self.values)  # List[idx], List[max]
        # for lminix in range(len(localminima)):
        #     localminima[lminix]
        lminAO = [0] + localminima[0] + [len(self._message.data)]
        lminMed = (numpy.round(numpy.ediff1d(lminAO) / 2) +
                   lminAO[:-1]).astype(int)
        bclmins = [
            medl + numpy.argmin(self.bitcongruences[medl:medr])
            for medl, medr in zip(lminMed[:-1], lminMed[1:])
        ]
        return bclmins
Ejemplo n.º 13
0
    def messageSegmentation(self) -> List[MessageSegment]:
        """
        Segment message by determining local extrema of sigma-s-gauss-filtered sliding n-byte-mean bit-congruence.

        >>> from netzob.Model.Vocabulary.Messages.L4NetworkMessage import L4NetworkMessage
        >>> tstmsg = '19040aec0000027b000012850a6400c8d23d06a2535ed71ed23d09faa4673315d23d09faa1766325d23d09faa17b4b10'
        >>> l4m = L4NetworkMessage(bytes.fromhex(tstmsg))
        >>> hbg = HorizonBitcongruenceGauss(l4m)
        >>> hbg.setAnalysisParams()
        >>> hbg.analyze()
        >>> spm = hbg.messageSegmentation()
        >>> print(b''.join([seg.bytes for seg in spm]).hex() == spm[0].message.data.hex())
        True

        :return: Segmentation of this message based on this analyzer's type.
        """
        if not self.values:
            if not self._analysisArgs:
                raise ValueError('No values or analysis parameters set.')
            self.analyze()

        bcd = MessageAnalyzer.findExistingAnalysis(BitCongruenceDelta,
                                                   MessageAnalyzer.U_BYTE,
                                                   self.message)

        # all local minima
        bclmins = self.pinpointMinima()
        # local maxima, if bc[e] < bc[e+1] or bc[e] > 2*s2mbc[e] for all e in cadidate indices
        bclmaxs = self.pinpointMaxima()
        bcdmaxs = [
            e for e in bclmaxs
            if bcd.values[e + 1] > bcd.values[e] or bcd.values[e] > 2 *
            self.bitcongruences[e]
        ]
        minmax = bclmins
        for bdm in bcdmaxs:  # only keep bcdmaxs if not in scope if min
            if bdm + 1 not in minmax and bdm - 1 not in minmax:
                minmax.append(bdm)
        # starts of plateaus of bit congruences
        bcplats = MessageAnalyzer.plateouStart(
            self.bitcongruences)[0]  # bcd.values
        for bps in bcplats:  # only keep platoustarts if not in scope if min or max
            if bps + 1 not in minmax and bps - 1 not in minmax:
                minmax.append(bps)

        # # separate nan-values
        # nansep = MessageAnalyzer.separateNaNs(self.values)
        relevantPositions = list(sorted(minmax))
        # get candidates to cut segments from message
        cutCandidates = [0] + [int(b) for b in relevantPositions if not numpy.isnan(b)] \
                        + [len(self._message.data)]  # add the message end
        # cut only where a segment is of a length larger than 1
        cutPositions = [0] + [
            right for left, right in zip(cutCandidates[:-1], cutCandidates[1:])
            if right - left > 1
        ]
        # cutPositions = list(sorted(cutPositions + nansep[0]))
        # add the end of the message if its not already there
        if cutPositions[-1] != cutCandidates[-1]:
            cutPositions[-1] = cutCandidates[-1]

        segments = list()
        for lmaxCurr, lmaxNext in zip(cutPositions[:-1], cutPositions[1:]):
            segments.append(MessageSegment(self, lmaxCurr,
                                           lmaxNext - lmaxCurr))
        return segments
Ejemplo n.º 14
0
 def analyze(self):
     """
     Relative variance of single message bytes.
     """
     self._values = MessageAnalyzer.tokenDelta(list(self._message.data),
                                               self._unit)
Ejemplo n.º 15
0
 def setAnalysisParams(self, analysisMethod: Type[MessageAnalyzer],
                       *analysisArgs):
     self._am = MessageAnalyzer.findExistingAnalysis(
         analysisMethod, MessageAnalyzer.U_BYTE, self._message,
         analysisArgs)
Ejemplo n.º 16
0
def cacheAndLoadDC(pcapfilename: str, analysisTitle: str, tokenizer: str, debug: bool,
                   analyzerType: type, analysisArgs: Tuple=None, sigma: float=None, filterTrivial=False,
                   refinementCallback:Union[Callable, None] = refinements,
                   disableCache=False) \
        -> Tuple[SpecimenLoader, MessageComparator, List[Tuple[MessageSegment]], DistanceCalculator,
        float, float]:
    """
    cache or load the DistanceCalculator to or from the filesystem


    :param filterTrivial: Filter out **one-byte** segments and such just consisting of **zeros**.
    :param disableCache: When experimenting with distances manipulation, deactivate caching!
    :return:
    """
    pcapbasename = os.path.basename(pcapfilename)
    # if refinementCallback == pcaMocoRefinements:
    #     sigma = pcamocoSigmapertrace[pcapbasename] if not sigma and pcapbasename in pcamocoSigmapertrace else \
    #         0.9 if not sigma else sigma
    # else:
    sigma = sigmapertrace[pcapbasename] if not sigma and pcapbasename in sigmapertrace else \
        0.9 if not sigma else sigma
    pcapName = os.path.splitext(pcapbasename)[0]
    # noinspection PyUnboundLocalVariable
    tokenparm = tokenizer if tokenizer != "nemesys" else \
        "{}{:.0f}".format(tokenizer, sigma * 10)
    dccachefn = os.path.join(
        cacheFolder, 'cache-dc-{}-{}-{}-{}-{}.{}'.format(
            analysisTitle, tokenparm, "filtered" if filterTrivial else "all",
            refinementCallback.__name__
            if refinementCallback is not None else "raw", pcapName, 'ddc'))
    # dccachefn = 'cache-dc-{}-{}-{}.{}'.format(analysisTitle, tokenizer, pcapName, 'dc')
    if disableCache or not os.path.exists(dccachefn):
        # dissect and label messages
        print("Load messages from {}...".format(pcapName))
        specimens = SpecimenLoader(pcapfilename, 2, True)
        comparator = MessageComparator(specimens, 2, True, debug=debug)

        print("Segmenting messages...", end=' ')
        segmentationTime = time.time()
        # select tokenizer by command line parameter
        if tokenizer == "tshark":
            # 1. segment messages according to true fields from the labels
            segmentedMessages = annotateFieldTypes(analyzerType, analysisArgs,
                                                   comparator)
        elif tokenizer == "4bytesfixed":
            # 2. segment messages into fixed size chunks for testing
            segmentedMessages = segmentsFixed(4, comparator, analyzerType,
                                              analysisArgs)
        elif tokenizer == "nemesys":
            # 3. segment messages by NEMESYS
            segmentsPerMsg = bcDeltaGaussMessageSegmentation(specimens, sigma)

            # get analyzer requested by analyzerType/analysisArgs
            segmentedMessages = [[
                MessageSegment(
                    MessageAnalyzer.findExistingAnalysis(
                        analyzerType, MessageAnalyzer.U_BYTE, seg.message,
                        analysisArgs), seg.offset, seg.length) for seg in msg
            ] for msg in segmentsPerMsg]

            if refinementCallback is not None:
                if refinementCallback.__code__.co_argcount > 1:
                    # assume the second argument is expected to be a distance calculator
                    chainedSegments = list(
                        chain.from_iterable(segmentedMessages))
                    print("Refinement: Calculate distance for {} segments...".
                          format(len(chainedSegments)))
                    if len(chainedSegments)**2 > MemmapDC.maxMemMatrix:
                        refinementDC = MemmapDC(chainedSegments)
                    else:
                        refinementDC = DelegatingDC(chainedSegments)
                    segmentedMessages = refinementCallback(
                        segmentedMessages, refinementDC)
                else:
                    segmentedMessages = refinementCallback(segmentedMessages)

            # segments = list(chain.from_iterable(segmentedMessages))

        segmentationTime = time.time() - segmentationTime
        print("done.")

        if filterTrivial:
            # noinspection PyUnboundLocalVariable
            chainedSegments = [
                seg for seg in chain.from_iterable(segmentedMessages)
                if seg.length > 1 and set(seg.values) != {0}
            ]
        else:
            # noinspection PyUnboundLocalVariable
            chainedSegments = list(chain.from_iterable(segmentedMessages))

        print("Calculate distance for {} segments...".format(
            len(chainedSegments)))
        # dc = DistanceCalculator(chainedSegments, reliefFactor=0.33)  # Pairwise similarity of segments: dc.distanceMatrix
        dist_calc_segmentsTime = time.time()
        if len(chainedSegments)**2 > MemmapDC.maxMemMatrix:
            dc = MemmapDC(chainedSegments)
        else:
            dc = DelegatingDC(chainedSegments)
        assert chainedSegments == dc.rawSegments
        dist_calc_segmentsTime = time.time() - dist_calc_segmentsTime
        try:
            with open(dccachefn, 'wb') as f:
                pickle.dump((segmentedMessages, comparator, dc), f,
                            pickle.HIGHEST_PROTOCOL)
        except MemoryError as e:
            print("DC could not be cached due to a MemoryError. Removing",
                  dccachefn, "and continuing.")
            os.remove(dccachefn)
    else:
        print("Load distances from cache file {}".format(dccachefn))
        with open(dccachefn, 'rb') as f:
            segmentedMessages, comparator, dc = pickle.load(f)
        if not (isinstance(comparator, MessageComparator)
                and isinstance(dc, DistanceCalculator)):
            print('Loading of cached distances failed.')
            exit(10)
        specimens = comparator.specimens
        # chainedSegments = list(chain.from_iterable(segmentedMessages))
        segmentationTime, dist_calc_segmentsTime = None, None

    return specimens, comparator, segmentedMessages, dc, segmentationTime, dist_calc_segmentsTime