Esempio n. 1
0
def annotateFieldTypes(
        analyzerType: type,
        analysisArgs: Union[Tuple, None],
        comparator,
        unit=MessageAnalyzer.U_BYTE) -> List[Tuple[TypedSegment]]:
    """
    :return: list of lists of segments that are annotated with their field type.
    """
    segmentedMessages = [
        segmentsFromLabels(
            MessageAnalyzer.findExistingAnalysis(analyzerType, unit, l4msg,
                                                 analysisArgs),
            comparator.dissections[rmsg])
        for l4msg, rmsg in comparator.messages.items()
    ]
    return segmentedMessages
Esempio n. 2
0
    def messageSegmentation(self) -> List[MessageSegment]:
        """
        Segment message by determining local extrema of sigma-s-gauss-filtered sliding n-byte-mean bit-congruence.

        >>> from netzob.Model.Vocabulary.Messages.L4NetworkMessage import L4NetworkMessage
        >>> tstmsg = '19040aec0000027b000012850a6400c8d23d06a2535ed71ed23d09faa4673315d23d09faa1766325d23d09faa17b4b10'
        >>> l4m = L4NetworkMessage(bytes.fromhex(tstmsg))
        >>> hbg = HorizonBitcongruenceGauss(l4m)
        >>> hbg.setAnalysisParams()
        >>> hbg.analyze()
        >>> spm = hbg.messageSegmentation()
        >>> print(b''.join([seg.bytes for seg in spm]).hex() == spm[0].message.data.hex())
        True

        :return: Segmentation of this message based on this analyzer's type.
        """
        if not self.values:
            if not self._analysisArgs:
                raise ValueError('No values or analysis parameters set.')
            self.analyze()

        bcd = MessageAnalyzer.findExistingAnalysis(BitCongruenceDelta,
                                                   MessageAnalyzer.U_BYTE,
                                                   self.message)

        # all local minima
        bclmins = self.pinpointMinima()
        # local maxima, if bc[e] < bc[e+1] or bc[e] > 2*s2mbc[e] for all e in cadidate indices
        bclmaxs = self.pinpointMaxima()
        bcdmaxs = [
            e for e in bclmaxs
            if bcd.values[e + 1] > bcd.values[e] or bcd.values[e] > 2 *
            self.bitcongruences[e]
        ]
        minmax = bclmins
        for bdm in bcdmaxs:  # only keep bcdmaxs if not in scope if min
            if bdm + 1 not in minmax and bdm - 1 not in minmax:
                minmax.append(bdm)
        # starts of plateaus of bit congruences
        bcplats = MessageAnalyzer.plateouStart(
            self.bitcongruences)[0]  # bcd.values
        for bps in bcplats:  # only keep platoustarts if not in scope if min or max
            if bps + 1 not in minmax and bps - 1 not in minmax:
                minmax.append(bps)

        # # separate nan-values
        # nansep = MessageAnalyzer.separateNaNs(self.values)
        relevantPositions = list(sorted(minmax))
        # get candidates to cut segments from message
        cutCandidates = [0] + [int(b) for b in relevantPositions if not numpy.isnan(b)] \
                        + [len(self._message.data)]  # add the message end
        # cut only where a segment is of a length larger than 1
        cutPositions = [0] + [
            right for left, right in zip(cutCandidates[:-1], cutCandidates[1:])
            if right - left > 1
        ]
        # cutPositions = list(sorted(cutPositions + nansep[0]))
        # add the end of the message if its not already there
        if cutPositions[-1] != cutCandidates[-1]:
            cutPositions[-1] = cutCandidates[-1]

        segments = list()
        for lmaxCurr, lmaxNext in zip(cutPositions[:-1], cutPositions[1:]):
            segments.append(MessageSegment(self, lmaxCurr,
                                           lmaxNext - lmaxCurr))
        return segments
Esempio n. 3
0
 def setAnalysisParams(self, analysisMethod: Type[MessageAnalyzer],
                       *analysisArgs):
     self._am = MessageAnalyzer.findExistingAnalysis(
         analysisMethod, MessageAnalyzer.U_BYTE, self._message,
         analysisArgs)
Esempio n. 4
0
def cacheAndLoadDC(pcapfilename: str, analysisTitle: str, tokenizer: str, debug: bool,
                   analyzerType: type, analysisArgs: Tuple=None, sigma: float=None, filterTrivial=False,
                   refinementCallback:Union[Callable, None] = refinements,
                   disableCache=False) \
        -> Tuple[SpecimenLoader, MessageComparator, List[Tuple[MessageSegment]], DistanceCalculator,
        float, float]:
    """
    cache or load the DistanceCalculator to or from the filesystem


    :param filterTrivial: Filter out **one-byte** segments and such just consisting of **zeros**.
    :param disableCache: When experimenting with distances manipulation, deactivate caching!
    :return:
    """
    pcapbasename = os.path.basename(pcapfilename)
    # if refinementCallback == pcaMocoRefinements:
    #     sigma = pcamocoSigmapertrace[pcapbasename] if not sigma and pcapbasename in pcamocoSigmapertrace else \
    #         0.9 if not sigma else sigma
    # else:
    sigma = sigmapertrace[pcapbasename] if not sigma and pcapbasename in sigmapertrace else \
        0.9 if not sigma else sigma
    pcapName = os.path.splitext(pcapbasename)[0]
    # noinspection PyUnboundLocalVariable
    tokenparm = tokenizer if tokenizer != "nemesys" else \
        "{}{:.0f}".format(tokenizer, sigma * 10)
    dccachefn = os.path.join(
        cacheFolder, 'cache-dc-{}-{}-{}-{}-{}.{}'.format(
            analysisTitle, tokenparm, "filtered" if filterTrivial else "all",
            refinementCallback.__name__
            if refinementCallback is not None else "raw", pcapName, 'ddc'))
    # dccachefn = 'cache-dc-{}-{}-{}.{}'.format(analysisTitle, tokenizer, pcapName, 'dc')
    if disableCache or not os.path.exists(dccachefn):
        # dissect and label messages
        print("Load messages from {}...".format(pcapName))
        specimens = SpecimenLoader(pcapfilename, 2, True)
        comparator = MessageComparator(specimens, 2, True, debug=debug)

        print("Segmenting messages...", end=' ')
        segmentationTime = time.time()
        # select tokenizer by command line parameter
        if tokenizer == "tshark":
            # 1. segment messages according to true fields from the labels
            segmentedMessages = annotateFieldTypes(analyzerType, analysisArgs,
                                                   comparator)
        elif tokenizer == "4bytesfixed":
            # 2. segment messages into fixed size chunks for testing
            segmentedMessages = segmentsFixed(4, comparator, analyzerType,
                                              analysisArgs)
        elif tokenizer == "nemesys":
            # 3. segment messages by NEMESYS
            segmentsPerMsg = bcDeltaGaussMessageSegmentation(specimens, sigma)

            # get analyzer requested by analyzerType/analysisArgs
            segmentedMessages = [[
                MessageSegment(
                    MessageAnalyzer.findExistingAnalysis(
                        analyzerType, MessageAnalyzer.U_BYTE, seg.message,
                        analysisArgs), seg.offset, seg.length) for seg in msg
            ] for msg in segmentsPerMsg]

            if refinementCallback is not None:
                if refinementCallback.__code__.co_argcount > 1:
                    # assume the second argument is expected to be a distance calculator
                    chainedSegments = list(
                        chain.from_iterable(segmentedMessages))
                    print("Refinement: Calculate distance for {} segments...".
                          format(len(chainedSegments)))
                    if len(chainedSegments)**2 > MemmapDC.maxMemMatrix:
                        refinementDC = MemmapDC(chainedSegments)
                    else:
                        refinementDC = DelegatingDC(chainedSegments)
                    segmentedMessages = refinementCallback(
                        segmentedMessages, refinementDC)
                else:
                    segmentedMessages = refinementCallback(segmentedMessages)

            # segments = list(chain.from_iterable(segmentedMessages))

        segmentationTime = time.time() - segmentationTime
        print("done.")

        if filterTrivial:
            # noinspection PyUnboundLocalVariable
            chainedSegments = [
                seg for seg in chain.from_iterable(segmentedMessages)
                if seg.length > 1 and set(seg.values) != {0}
            ]
        else:
            # noinspection PyUnboundLocalVariable
            chainedSegments = list(chain.from_iterable(segmentedMessages))

        print("Calculate distance for {} segments...".format(
            len(chainedSegments)))
        # dc = DistanceCalculator(chainedSegments, reliefFactor=0.33)  # Pairwise similarity of segments: dc.distanceMatrix
        dist_calc_segmentsTime = time.time()
        if len(chainedSegments)**2 > MemmapDC.maxMemMatrix:
            dc = MemmapDC(chainedSegments)
        else:
            dc = DelegatingDC(chainedSegments)
        assert chainedSegments == dc.rawSegments
        dist_calc_segmentsTime = time.time() - dist_calc_segmentsTime
        try:
            with open(dccachefn, 'wb') as f:
                pickle.dump((segmentedMessages, comparator, dc), f,
                            pickle.HIGHEST_PROTOCOL)
        except MemoryError as e:
            print("DC could not be cached due to a MemoryError. Removing",
                  dccachefn, "and continuing.")
            os.remove(dccachefn)
    else:
        print("Load distances from cache file {}".format(dccachefn))
        with open(dccachefn, 'rb') as f:
            segmentedMessages, comparator, dc = pickle.load(f)
        if not (isinstance(comparator, MessageComparator)
                and isinstance(dc, DistanceCalculator)):
            print('Loading of cached distances failed.')
            exit(10)
        specimens = comparator.specimens
        # chainedSegments = list(chain.from_iterable(segmentedMessages))
        segmentationTime, dist_calc_segmentsTime = None, None

    return specimens, comparator, segmentedMessages, dc, segmentationTime, dist_calc_segmentsTime