コード例 #1
0
ファイル: formatRefinement.py プロジェクト: yigechen1/nemesys
    def split(self):
        """
        Perform the splitting of the segments.

        :return: List of segments splitted from the input.
        """
        segmentStack = list(reversed(self.segments[1:]))
        mangledSegments = [self.segments[0]]
        if len(self.segments) > 1:
            while segmentStack:
                segc = segmentStack.pop()
                segl = mangledSegments[-1]
                if segl.offset + segl.length == segc.offset:
                    # compare byte pairs' frequency
                    splitshift = self.lookupLeastFrequent(segc)
                    if ( 0 > splitshift >= -segl.length) \
                        or (0 < splitshift <= segc.length):
                        if segl.length != -splitshift:
                            mangledSegments[-1] = MessageSegment(mangledSegments[-1].analyzer,
                                                                 mangledSegments[-1].offset,
                                                                 mangledSegments[-1].length + splitshift)
                        else: # segment to the left completely used up in center
                            del mangledSegments[-1]
                        if self._debug:
                            print("Recombined segments: \n{} and {} into ".format(segl, segc))
                        segc = MessageSegment(segc.analyzer, segc.offset + splitshift,
                                                         segc.length - splitshift)
                        if self._debug:
                            print("{} and {}".format(mangledSegments[-1] if mangledSegments else 'Empty', segc))
                mangledSegments.append(segc)
        return mangledSegments
コード例 #2
0
ファイル: formatRefinement.py プロジェクト: yigechen1/nemesys
    def split(self):
        """
        Perform the splitting of the segments.

        :return: List of segments splitted from the input.
        """
        segmentStack = list(reversed(self.segments))
        mangledSegments = list()
        if len(self.segments) > 1:
            while segmentStack:
                # TODO check for equal analyzer, requires equality-check in analyzer
                # from inference.MessageAnalyzer import MessageAnalyzer

                segc = segmentStack.pop()
                # TODO: this is char specific only!
                if not isPrintable(segc.bytes):
                    mangledSegments.append(segc)
                    continue

                if mangledSegments:
                    segl = mangledSegments[-1]
                    if segl.offset + segl.length == segc.offset:
                        splitpos = self.toTheLeft(segl)
                        # segment to the left ends with chars, add them to the center segment
                        if splitpos < segl.length:
                            if splitpos > 0:
                                mangledSegments[-1] = MessageSegment(mangledSegments[-1].analyzer,
                                                                 mangledSegments[-1].offset, splitpos)
                            else: # segment to the left completely used up in center
                                del mangledSegments[-1]
                            restlen = segl.length - splitpos
                            if self._debug:
                                print("Recombined segments: \n{} and {} into ".format(segl, segc))
                            segc = MessageSegment(segc.analyzer, segc.offset - restlen,
                                                             segc.length + restlen)
                            if self._debug:
                                print("{} and {}".format(mangledSegments[-1] if mangledSegments else 'Empty', segc))

                if segmentStack:
                    segr = segmentStack[-1]
                    if segc.offset + segc.length == segr.offset:
                        splitpos = self.toTheRight(segr)
                        # segment to the right starts with chars, add them to the center segment
                        if splitpos > 0:
                            if segr.length - splitpos > 0:
                                segmentStack[-1] = MessageSegment(segr.analyzer, segr.offset + splitpos,
                                                                 segr.length - splitpos)
                            else: # segment to the right completely used up in center
                                del segmentStack[-1]
                            if self._debug:
                                print("Recombined segments: \n{} and {} into ".format(segc, segr))
                            segc = MessageSegment(segc.analyzer, segc.offset,
                                                              segc.length + splitpos)
                            if self._debug:
                                print("{} and {}".format(segc, segmentStack[-1] if segmentStack else 'Empty'))

                mangledSegments.append(segc)
        return mangledSegments
コード例 #3
0
ファイル: formatRefinement.py プロジェクト: yigechen1/nemesys
    def split(self):
        newmsg = list()
        for sid, seg in enumerate(self.segments):  # enum necessary to change to in place edit after debug (want to do?)
            didReplace = False
            for comfeat in self._moco:
                comoff = seg.bytes.find(comfeat)
                if comoff == -1:  # comfeat not in moco, continue with next in moco
                    continue

                featlen = len(comfeat)
                if seg.length == featlen:  # its already the concise frequent feature
                    newmsg.append(seg)
                else:
                    if CropDistinct._debug:
                        print("\nReplaced {} by:".format(seg.bytes.hex()), end=" ")

                    absco = seg.offset + comoff
                    if comoff > 0:
                        segl = MessageSegment(seg.analyzer, seg.offset, comoff)
                        newmsg.append(segl)
                        if CropDistinct._debug:
                            print(segl.bytes.hex(), end=" ")

                    segc = MessageSegment(seg.analyzer, absco, featlen)
                    newmsg.append(segc)
                    if CropDistinct._debug:
                        print(segc.bytes.hex(), end=" ")

                    rlen = seg.length - comoff - featlen
                    if rlen > 0:
                        segr = MessageSegment(seg.analyzer, absco + featlen, rlen)
                        newmsg.append(segr)
                        if CropDistinct._debug:
                            print(segr.bytes.hex(), end=" ")

                didReplace = True
                break  # only most common match!? otherwise how to handle subsequent matches after split(s)?
            if not didReplace:
                newmsg.append(seg)
            elif CropDistinct._debug:
                print()

        return newmsg
コード例 #4
0
ファイル: formatRefinement.py プロジェクト: yigechen1/nemesys
 def split(self, segmentID: int, chunkLength: int):
     selSeg = self.segments[segmentID]
     if chunkLength < selSeg.length:
         newSegs = list()
         for chunkoff in range(selSeg.offset, selSeg.nextOffset, chunkLength):
             remainLen = selSeg.nextOffset - chunkoff
             newSegs.append(MessageSegment(selSeg.analyzer, chunkoff, min(remainLen, chunkLength)))
         newmsg = self.segments[:segmentID] + newSegs + self.segments[segmentID + 1:]
         return newmsg
     else:
         return self.segments
コード例 #5
0
def calcHexDist(hexA, hexB):
    from netzob.Model.Vocabulary.Messages.RawMessage import RawMessage
    from inference.analyzers import Value
    from inference.segments import MessageSegment
    from inference.templates import DistanceCalculator

    bytedata = [bytes.fromhex(hexA), bytes.fromhex(hexB)]
    messages = [RawMessage(bd) for bd in bytedata]
    analyzers = [Value(message) for message in messages]
    segments = [
        MessageSegment(analyzer, 0, len(analyzer.message.data))
        for analyzer in analyzers
    ]
    dc = DistanceCalculator(segments)
    return dc.pairDistance(*segments)
コード例 #6
0
ファイル: analyzers.py プロジェクト: yohstone/nemesys
    def messageSegmentation(self) -> List[MessageSegment]:
        """
        Segment message by determining inflection points of sigma-s-gauss-filtered bit-congruence.
        The cut position is the delta max of the unsmoothed bcd in the scope of a min/max (rising) pair.

        additionally cut at high plateaus starts in the basic bc values.

        :return: Segmentation of this message based on this analyzer's type.
        """
        if not self.values:
            if not self._analysisArgs:
                raise ValueError('No values or analysis parameters set.')
            self.analyze()

        # cut one byte before the inflection
        inflectionPoints = self.inflectionPoints()
        inflectionCuts = [int(i) - 1 for i in inflectionPoints[0]]

        # # cut one byte before the plateau
        # # | has yielded mixed quality results (was better for dhcp, much worse for ntp and dns)
        # # | TODO probably having some kind of precedence whether inflection or plateau is to be kept
        # # | if both cut positions are near to each other might make this worthwhile.
        # highPlats = self.bcHighPlateaus()
        # highPlatCuts = [ int(i)-1 for i in highPlats[0]]
        # # below: sorted( + highPlatCuts)

        # get candidates to cut segments from message
        cutCandidates = [0] + inflectionCuts \
                        + [len(self._message.data)]  # add the message end
        # cut only where a segment is of a length larger than 1
        cutPositions = [0] + [
            right for left, right in zip(cutCandidates[:-1], cutCandidates[1:])
            if right - left > 1
        ]
        # cutPositions = list(sorted(cutPositions + nansep[0]))
        # add the end of the message if its not already there
        if cutPositions[-1] != cutCandidates[-1]:
            cutPositions[-1] = cutCandidates[-1]

        segments = list()
        for cutCurr, cutNext in zip(cutPositions[:-1], cutPositions[1:]):
            segments.append(MessageSegment(self, cutCurr, cutNext - cutCurr))
        return segments
コード例 #7
0
ファイル: formatRefinement.py プロジェクト: yigechen1/nemesys
    def merge(self):
        """
        Perform the merging.

        :return: a new set of segments after the input has been merged
        """
        mergedSegments = self.segments[0:1]
        if len(self.segments) > 1:
            for segl, segr in zip(self.segments[:-1], self.segments[1:]):
                # TODO check for equal analyzer, requires implementing a suitable equality-check in analyzer
                # from inference.MessageAnalyzer import MessageAnalyzer
                if segl.offset + segl.length == segr.offset and self.condition(segl, segr):
                    mergedSegments[-1] = MessageSegment(mergedSegments[-1].analyzer, mergedSegments[-1].offset,
                                                        mergedSegments[-1].length + segr.length)
                    if self._debug:
                        print("Merged segments: \n{} and \n{} into \n{}".format(segl, segr, mergedSegments[-1]))
                else:
                    mergedSegments.append(segr)
        return mergedSegments
コード例 #8
0
ファイル: analyzers.py プロジェクト: yohstone/nemesys
    def messageSegmentation(self) -> List[MessageSegment]:

        segments = self._recursivePivotVar(
            MessageSegment(BitCongruence(self.message), 0,
                           len(self._message.data)))
        sortedSegments = sorted(segments, key=lambda x: x.offset)
        # varPerSeg = list()
        # for segment in sortedSegments:
        #     if segment.offset > len(varPerSeg):
        #         raise ValueError('Segment before offset {} missing for message with data ...{}...'.format(
        #             segment.offset, hex(self._message.data[len(varPerSeg):segment.offset])))
        #         # # instead of failing we could also add placeholders if something is missing.
        #         # # But is shouldn't happen: We do not have overlapping or omitted segments.
        #         # meanVarPerSeg.extend( [-1]*(segment.offset-len(meanVarPerSeg)) )
        #     # add mean value for all byte positions of one segment.
        #     varPerSeg.extend( [ segment.stdev() ]*segment.length )
        # self._values = varPerSeg
        if self.__debug:
            input('next message: ')
        return sortedSegments
コード例 #9
0
ファイル: analyzers.py プロジェクト: yohstone/nemesys
    def messageSegmentation(self) -> List[MessageSegment]:
        if not self.values:
            self.analyze()

        # value drop or rise more than 200 (?) in one step, split at highest abs(value)
        sc = self.steepChanges(200)  # TODO iterate best value
        # and value drop to or rise from 0, split at the non-zero value
        zb = self.zeroBorders()

        cutat = numpy.add(sorted(set(sc + zb)), self._startskip).tolist()
        if cutat[0] != 0:
            cutat = [0] + cutat
        if cutat[-1] != len(self._message.data):
            cutat = cutat + [len(self._message.data)]

        segments = list()
        for cutCurr, cutNext in zip(cutat[:-1],
                                    cutat[1:]):  # add the message end
            segments.append(MessageSegment(self, cutCurr, cutNext - cutCurr))
        return segments
コード例 #10
0
ファイル: baseAlgorithms.py プロジェクト: yigechen1/nemesys
def generateTestSegments():
    from netzob.Model.Vocabulary.Messages.RawMessage import RawMessage
    from inference.analyzers import Value
    from inference.segments import MessageSegment

    bytedata = [
        bytes([1, 2, 3, 4]),
        bytes([2, 3, 4]),
        bytes([1, 3, 4]),
        bytes([2, 4]),
        bytes([2, 3]),
        bytes([20, 30, 37, 50, 69, 2, 30]),
        bytes([37, 5, 69]),
        bytes([0, 0, 0, 0]),
        bytes([3, 2, 3, 4])
    ]
    messages = [RawMessage(bd) for bd in bytedata]
    analyzers = [Value(message) for message in messages]
    segments = [MessageSegment(analyzer, 0, len(analyzer.message.data)) for analyzer in analyzers]
    return segments
コード例 #11
0
ファイル: analyzers.py プロジェクト: yohstone/nemesys
    def messageSegmentation(self) -> List[MessageSegment]:
        """
        produces very bad/unusable results.

        :return:
        """
        if not self.values:
            self.analyze()

        # sudden drop (inversion?) in progression delta steepness.
        sc = self.steepChanges(.3)  # TODO iterate best value

        cutat = numpy.add(sorted(set(sc)), self._startskip).tolist()
        if len(cutat) == 0 or cutat[0] != 0:
            cutat = [0] + cutat
        if len(cutat) == 0 or cutat[-1] != len(self._message.data):
            cutat = cutat + [len(self._message.data)]  # add the message end

        segments = list()
        for cutCurr, cutNext in zip(cutat[:-1], cutat[1:]):
            segments.append(MessageSegment(self, cutCurr, cutNext - cutCurr))
        return segments
コード例 #12
0
ファイル: analyzers.py プロジェクト: yohstone/nemesys
    def messageSegmentation(self) -> List[MessageSegment]:
        """
        Segment message by determining local maxima of sigma-1.5-gauss-filtered 2-byte-horizon bit-congruence.

        >>> from netzob.Model.Vocabulary.Messages.L4NetworkMessage import L4NetworkMessage
        >>> tstmsg = '19040aec0000027b000012850a6400c8d23d06a2535ed71ed23d09faa4673315d23d09faa1766325d23d09faa17b4b10'
        >>> l4m = L4NetworkMessage(bytes.fromhex(tstmsg))
        >>> hbg = HorizonBitcongruenceGauss(l4m)
        >>> hbg.setAnalysisParams()
        >>> hbg.analyze()
        >>> spm = hbg.messageSegmentation()
        >>> print(b''.join([seg.bytes for seg in spm]).hex() == spm[0].message.data.hex())
        True

        :return: Segmentation of this message based on this analyzer's type.
        """
        if not self.values:
            if not self._analysisArgs:
                raise ValueError('No values or analysis parameters set.')
            self.analyze()

        bclmins = self.pinpointMinima()

        # prevent 1 byte segments, since they do not contain usable congruence!
        cutCandidates = [0] + [int(b)
                               for b in bclmins] + [len(self._message.data)
                                                    ]  # add the message end
        cutPositions = [0] + [
            right for left, right in zip(cutCandidates[:-1], cutCandidates[1:])
            if right - left > 1
        ]
        if cutPositions[-1] != cutCandidates[-1]:
            cutPositions[-1] = cutCandidates[-1]

        segments = list()
        for lmaxCurr, lmaxNext in zip(cutPositions[:-1], cutPositions[1:]):
            segments.append(MessageSegment(self, lmaxCurr,
                                           lmaxNext - lmaxCurr))
        return segments
コード例 #13
0
ファイル: analyzers.py プロジェクト: yohstone/nemesys
    def _recursivePivotMean(self, segment: MessageSegment):
        """
        Recursively split the segment in half, calculate the mean for the values of each of the two resulting
        sub-segments, and compare each of them to the original segments mean. If a sub-segment is sufficiently
        different from its parent (meanThreshold = .02) further split the sub-segment.

        :param segment: One message segment that should be segmented.
        :return: List of segments after the splitting.
        """

        if not segment.values:
            segment.analyzer.analyze()
        mymean = segment.mean()

        if segment.length >= 4:  # we need two bytes for each segment to get a bit congruence of them
            pivot = segment.length // 2
            leftSegment = MessageSegment(segment.analyzer, segment.offset,
                                         pivot)
            rightSegment = MessageSegment(segment.analyzer,
                                          segment.offset + pivot,
                                          segment.length - pivot)

            # test for recursion conditions
            returnSegments = list()
            if abs(leftSegment.mean() -
                   mymean) > self._meanThreshold:  # still different
                returnSegments.extend(self._recursivePivotMean(leftSegment))
            else:
                returnSegments.append(leftSegment)
            if abs(rightSegment.mean() -
                   mymean) > self._meanThreshold:  # still different
                returnSegments.extend(self._recursivePivotMean(rightSegment))
            else:
                returnSegments.append(rightSegment)
            # if abs(lsm - rsm) > .1:  # still different
            return returnSegments
        else:
            return [segment]
コード例 #14
0
def cacheAndLoadDC(pcapfilename: str, analysisTitle: str, tokenizer: str, debug: bool,
                   analyzerType: type, analysisArgs: Tuple=None, sigma: float=None, filterTrivial=False,
                   refinementCallback:Union[Callable, None] = refinements,
                   disableCache=False) \
        -> Tuple[SpecimenLoader, MessageComparator, List[Tuple[MessageSegment]], DistanceCalculator,
        float, float]:
    """
    cache or load the DistanceCalculator to or from the filesystem


    :param filterTrivial: Filter out **one-byte** segments and such just consisting of **zeros**.
    :param disableCache: When experimenting with distances manipulation, deactivate caching!
    :return:
    """
    pcapbasename = os.path.basename(pcapfilename)
    # if refinementCallback == pcaMocoRefinements:
    #     sigma = pcamocoSigmapertrace[pcapbasename] if not sigma and pcapbasename in pcamocoSigmapertrace else \
    #         0.9 if not sigma else sigma
    # else:
    sigma = sigmapertrace[pcapbasename] if not sigma and pcapbasename in sigmapertrace else \
        0.9 if not sigma else sigma
    pcapName = os.path.splitext(pcapbasename)[0]
    # noinspection PyUnboundLocalVariable
    tokenparm = tokenizer if tokenizer != "nemesys" else \
        "{}{:.0f}".format(tokenizer, sigma * 10)
    dccachefn = os.path.join(
        cacheFolder, 'cache-dc-{}-{}-{}-{}-{}.{}'.format(
            analysisTitle, tokenparm, "filtered" if filterTrivial else "all",
            refinementCallback.__name__
            if refinementCallback is not None else "raw", pcapName, 'ddc'))
    # dccachefn = 'cache-dc-{}-{}-{}.{}'.format(analysisTitle, tokenizer, pcapName, 'dc')
    if disableCache or not os.path.exists(dccachefn):
        # dissect and label messages
        print("Load messages from {}...".format(pcapName))
        specimens = SpecimenLoader(pcapfilename, 2, True)
        comparator = MessageComparator(specimens, 2, True, debug=debug)

        print("Segmenting messages...", end=' ')
        segmentationTime = time.time()
        # select tokenizer by command line parameter
        if tokenizer == "tshark":
            # 1. segment messages according to true fields from the labels
            segmentedMessages = annotateFieldTypes(analyzerType, analysisArgs,
                                                   comparator)
        elif tokenizer == "4bytesfixed":
            # 2. segment messages into fixed size chunks for testing
            segmentedMessages = segmentsFixed(4, comparator, analyzerType,
                                              analysisArgs)
        elif tokenizer == "nemesys":
            # 3. segment messages by NEMESYS
            segmentsPerMsg = bcDeltaGaussMessageSegmentation(specimens, sigma)

            # get analyzer requested by analyzerType/analysisArgs
            segmentedMessages = [[
                MessageSegment(
                    MessageAnalyzer.findExistingAnalysis(
                        analyzerType, MessageAnalyzer.U_BYTE, seg.message,
                        analysisArgs), seg.offset, seg.length) for seg in msg
            ] for msg in segmentsPerMsg]

            if refinementCallback is not None:
                if refinementCallback.__code__.co_argcount > 1:
                    # assume the second argument is expected to be a distance calculator
                    chainedSegments = list(
                        chain.from_iterable(segmentedMessages))
                    print("Refinement: Calculate distance for {} segments...".
                          format(len(chainedSegments)))
                    if len(chainedSegments)**2 > MemmapDC.maxMemMatrix:
                        refinementDC = MemmapDC(chainedSegments)
                    else:
                        refinementDC = DelegatingDC(chainedSegments)
                    segmentedMessages = refinementCallback(
                        segmentedMessages, refinementDC)
                else:
                    segmentedMessages = refinementCallback(segmentedMessages)

            # segments = list(chain.from_iterable(segmentedMessages))

        segmentationTime = time.time() - segmentationTime
        print("done.")

        if filterTrivial:
            # noinspection PyUnboundLocalVariable
            chainedSegments = [
                seg for seg in chain.from_iterable(segmentedMessages)
                if seg.length > 1 and set(seg.values) != {0}
            ]
        else:
            # noinspection PyUnboundLocalVariable
            chainedSegments = list(chain.from_iterable(segmentedMessages))

        print("Calculate distance for {} segments...".format(
            len(chainedSegments)))
        # dc = DistanceCalculator(chainedSegments, reliefFactor=0.33)  # Pairwise similarity of segments: dc.distanceMatrix
        dist_calc_segmentsTime = time.time()
        if len(chainedSegments)**2 > MemmapDC.maxMemMatrix:
            dc = MemmapDC(chainedSegments)
        else:
            dc = DelegatingDC(chainedSegments)
        assert chainedSegments == dc.rawSegments
        dist_calc_segmentsTime = time.time() - dist_calc_segmentsTime
        try:
            with open(dccachefn, 'wb') as f:
                pickle.dump((segmentedMessages, comparator, dc), f,
                            pickle.HIGHEST_PROTOCOL)
        except MemoryError as e:
            print("DC could not be cached due to a MemoryError. Removing",
                  dccachefn, "and continuing.")
            os.remove(dccachefn)
    else:
        print("Load distances from cache file {}".format(dccachefn))
        with open(dccachefn, 'rb') as f:
            segmentedMessages, comparator, dc = pickle.load(f)
        if not (isinstance(comparator, MessageComparator)
                and isinstance(dc, DistanceCalculator)):
            print('Loading of cached distances failed.')
            exit(10)
        specimens = comparator.specimens
        # chainedSegments = list(chain.from_iterable(segmentedMessages))
        segmentationTime, dist_calc_segmentsTime = None, None

    return specimens, comparator, segmentedMessages, dc, segmentationTime, dist_calc_segmentsTime
コード例 #15
0
ファイル: analyzers.py プロジェクト: yohstone/nemesys
    def messageSegmentation(self) -> List[MessageSegment]:
        """
        Segment message by determining local extrema of sigma-s-gauss-filtered sliding n-byte-mean bit-congruence.

        >>> from netzob.Model.Vocabulary.Messages.L4NetworkMessage import L4NetworkMessage
        >>> tstmsg = '19040aec0000027b000012850a6400c8d23d06a2535ed71ed23d09faa4673315d23d09faa1766325d23d09faa17b4b10'
        >>> l4m = L4NetworkMessage(bytes.fromhex(tstmsg))
        >>> hbg = HorizonBitcongruenceGauss(l4m)
        >>> hbg.setAnalysisParams()
        >>> hbg.analyze()
        >>> spm = hbg.messageSegmentation()
        >>> print(b''.join([seg.bytes for seg in spm]).hex() == spm[0].message.data.hex())
        True

        :return: Segmentation of this message based on this analyzer's type.
        """
        if not self.values:
            if not self._analysisArgs:
                raise ValueError('No values or analysis parameters set.')
            self.analyze()

        bcd = MessageAnalyzer.findExistingAnalysis(BitCongruenceDelta,
                                                   MessageAnalyzer.U_BYTE,
                                                   self.message)

        # all local minima
        bclmins = self.pinpointMinima()
        # local maxima, if bc[e] < bc[e+1] or bc[e] > 2*s2mbc[e] for all e in cadidate indices
        bclmaxs = self.pinpointMaxima()
        bcdmaxs = [
            e for e in bclmaxs
            if bcd.values[e + 1] > bcd.values[e] or bcd.values[e] > 2 *
            self.bitcongruences[e]
        ]
        minmax = bclmins
        for bdm in bcdmaxs:  # only keep bcdmaxs if not in scope if min
            if bdm + 1 not in minmax and bdm - 1 not in minmax:
                minmax.append(bdm)
        # starts of plateaus of bit congruences
        bcplats = MessageAnalyzer.plateouStart(
            self.bitcongruences)[0]  # bcd.values
        for bps in bcplats:  # only keep platoustarts if not in scope if min or max
            if bps + 1 not in minmax and bps - 1 not in minmax:
                minmax.append(bps)

        # # separate nan-values
        # nansep = MessageAnalyzer.separateNaNs(self.values)
        relevantPositions = list(sorted(minmax))
        # get candidates to cut segments from message
        cutCandidates = [0] + [int(b) for b in relevantPositions if not numpy.isnan(b)] \
                        + [len(self._message.data)]  # add the message end
        # cut only where a segment is of a length larger than 1
        cutPositions = [0] + [
            right for left, right in zip(cutCandidates[:-1], cutCandidates[1:])
            if right - left > 1
        ]
        # cutPositions = list(sorted(cutPositions + nansep[0]))
        # add the end of the message if its not already there
        if cutPositions[-1] != cutCandidates[-1]:
            cutPositions[-1] = cutCandidates[-1]

        segments = list()
        for lmaxCurr, lmaxNext in zip(cutPositions[:-1], cutPositions[1:]):
            segments.append(MessageSegment(self, lmaxCurr,
                                           lmaxNext - lmaxCurr))
        return segments
コード例 #16
0
ファイル: analyzers.py プロジェクト: yohstone/nemesys
    def _recursivePivotVar(self, segment: MessageSegment):
        """
        Recursively split the segment at positions shifting from 2 to n-2, calculate the standard deviation for the
        values of each of the two resulting sub-segments, and compare each of them to the original segments deviation.
        If a sub-segment is sufficiently different from its parent
        (varThreshold = 0.5 parentvar * min(len(vl), len(vr))/(len(vl) + len(vr))) further split the sub-segment.

        :param segment: One message segment that should be segmented.
        :return: List of segments after the splitting.
        """

        if not segment.values:
            segment.analyzer.analyze()
        myvar = segment.stdev()

        if segment.length >= 4:  # we need two bytes for each segment to get a bit congruence of them

            # select a suitable pivot: find the one yielding the highest deviation-difference from parent
            segmentSplit = dict()
            for pivot in range(2, segment.length - 1):
                leftSegment = MessageSegment(segment.analyzer, segment.offset,
                                             pivot)
                rightSegment = MessageSegment(segment.analyzer,
                                              segment.offset + pivot,
                                              segment.length - pivot)
                # deviation needs to be higher towards the edges to be a probable splitting point
                lenweight = 2 * min(leftSegment.length,
                                    rightSegment.length) / segment.length
                # add splits: varDiff: (leftSegment, rightSegment)
                segmentSplit[abs(leftSegment.stdev() - rightSegment.stdev()) * lenweight] \
                    = (leftSegment, rightSegment)

            if self.__debug:
                from tabulate import tabulate
                print(
                    tabulate(
                        sorted([(wlrdiff, ls.offset, ls.stdev(), rs.offset,
                                 rs.stdev(), rs.offset + rs.length)
                                for wlrdiff, (ls, rs) in segmentSplit.items()],
                               key=lambda x: x[0]),
                        headers=[
                            'wlrdiff', 'l.o', 'lvar', 'r.o', 'rvar', 'r.b'
                        ]))  #abs(x[3] - x[4])

            # use the segments splitted at selected pivot: search max varDiff in splits
            splitdiffmax = max(segmentSplit.keys())
            leftSegment, rightSegment = segmentSplit[splitdiffmax]
            # weightedThresh = 0.5 * myvar * min(leftSegment.length, rightSegment.length) / segment.length
            weightedThresh = 0.1 * myvar
            if self.__debug:
                print('parent segment stdev:', myvar)
                print('weighted threshold:', weightedThresh)

            # test for recursion conditions: recurse if above weightedThresh
            returnSegments = list()
            if abs(leftSegment.stdev() -
                   myvar) > weightedThresh:  # still different
                if self.__debug:
                    print('split left', leftSegment.offset)
                returnSegments.extend(self._recursivePivotVar(leftSegment))
            else:
                if self.__debug:
                    print('left finished', abs(rightSegment.stdev() - myvar))
                returnSegments.append(leftSegment)
            if abs(rightSegment.stdev() -
                   myvar) > weightedThresh:  # still different
                if self.__debug:
                    print('split right', rightSegment.offset)
                returnSegments.extend(self._recursivePivotVar(rightSegment))
            else:
                if self.__debug:
                    print('right finished', abs(rightSegment.stdev() - myvar))
                returnSegments.append(rightSegment)

            # if abs(lsm - rsm) > .1:  # still different
            return returnSegments
        else:
            return [segment]
コード例 #17
0
ファイル: formatRefinement.py プロジェクト: yigechen1/nemesys
    def merge(self):
        """
        Perform the merging.

        >>> from utils.loader import SpecimenLoader
        >>> from inference.segmentHandler import bcDeltaGaussMessageSegmentation
        >>> from inference.formatRefinement import CumulativeCharMerger
        >>> sl = SpecimenLoader('../input/dns_ictf2010_deduped-100.pcap', layer=0, relativeToIP=True)
        >>> segmentsPerMsg = bcDeltaGaussMessageSegmentation(sl)
        Segmentation by inflections of sigma-0.6-gauss-filtered bit-variance.
        >>> for messageSegments in segmentsPerMsg:
        ...     ccm = CumulativeCharMerger(messageSegments)
        ...     ccmmsg = ccm.merge()
        ...     if ccmmsg != messageSegments:
        ...         sgms = b''.join([m.bytes for m in ccmmsg])
        ...         sgss = b''.join([m.bytes for m in messageSegments])
        ...         if sgms != sgss:
        ...             print("Mismatch!")

        :return: a new set of segments after the input has been merged
        """
        from inference.segmentHandler import isExtendedCharSeq

        minLen = 6

        segmentStack = list(reversed(self.segments))
        newmsg = list()
        isCharCand = False
        workingStack = list()
        while segmentStack:
            workingStack.append(segmentStack.pop())
            if sum([len(ws.bytes) for ws in workingStack]) < minLen:
                continue

            # now we have 6 bytes
            # and the merge is a new char candidate
            joinedbytes = b"".join([ws.bytes for ws in workingStack])
            if isExtendedCharSeq(joinedbytes) \
                    and b"\x00\x00" not in joinedbytes:
                isCharCand = True
                continue
            # the last segment ended the char candidate
            elif isCharCand:
                isCharCand = False
                if len(workingStack) > 2:
                    newlen = sum([ws.length for ws in workingStack[:-1]])
                    newseg = MessageSegment(workingStack[0].analyzer,
                                            workingStack[0].offset, newlen)
                    newmsg.append(newseg)
                else:
                    # retain the original segment (for equality test and to save creating a new object instance)
                    newmsg.append(workingStack[0])
                if len(workingStack) > 1:
                    segmentStack.append(workingStack[-1])
                workingStack = list()
            # there was not a char candidate
            else:
                newmsg.append(workingStack[0])
                for ws in reversed(workingStack[1:]):
                    segmentStack.append(ws)
                workingStack = list()
        # there are segments in the working stack left
        if len(workingStack) > 1 and isCharCand:
            newlen = sum([ws.length for ws in workingStack])
            newseg = MessageSegment(workingStack[0].analyzer,
                                    workingStack[0].offset, newlen)
            newmsg.append(newseg)
        # there was no char sequence and there are segments in the working stack left
        else:
            newmsg.extend(workingStack)
        return newmsg
コード例 #18
0
def segmentsFixed(length: int, comparator,
                  analyzerType: type, analysisArgs: Union[Tuple, None], unit=MessageAnalyzer.U_BYTE, padded=False) \
        -> List[Tuple[MessageSegment]]:
    """
    Segment messages into fixed size chunks.

    >>> from utils.loader import SpecimenLoader
    >>> from validation.dissectorMatcher import MessageComparator
    >>> from inference.analyzers import Value
    >>> from inference.segmentHandler import segmentsFixed
    >>> specimens = SpecimenLoader("../input/ntp_SMIA-20111010_deduped-100.pcap", 2, True)
    >>> comparator = MessageComparator(specimens, 2, True, debug=False)
    >>> segmentedMessages = segmentsFixed(4, comparator, Value, None)
    >>> areIdentical = True
    >>> for msgsegs in segmentedMessages:
    ...     msg = msgsegs[0].message
    ...     msgbytes = b"".join([seg.bytes for seg in msgsegs])
    ...     areIdentical = areIdentical and msgbytes == msg.data
    >>> print(areIdentical)
    True

    :param length: Fixed length for all segments. Overhanging segments at the end that are shorter than length
        will be padded with NANs.
    :param comparator: Comparator that contains the payload messages.
    :param analyzerType: Type of the analysis. Subclass of inference.analyzers.MessageAnalyzer.
    :param analysisArgs: Arguments for the analysis method.
    :param unit: Base unit for the analysis. Either MessageAnalyzer.U_BYTE or MessageAnalyzer.U_NIBBLE.
    :param padded: Toggle to pad the last segment to the requested fixed length or leave the last segment to be
        shorter than length if the message length is not an exact multiple of the segment length.
    :return: Segments of the analyzer's message according to the true format.
    """
    segments = list()
    for l4msg, rmsg in comparator.messages.items():
        if len(l4msg.data) % length == 0:
            lastOffset = len(l4msg.data)
        else:
            lastOffset = (len(l4msg.data) // length) * length

        originalAnalyzer = MessageAnalyzer.findExistingAnalysis(
            analyzerType, unit, l4msg, analysisArgs)
        sequence = [
            MessageSegment(originalAnalyzer, offset, length)
            for offset in range(0, lastOffset, length)
        ]

        if len(l4msg.data) > lastOffset:  # append the overlap
            if padded:
                # here are nasty hacks!
                # TODO Better define a new subclass of MessageSegment that internally padds values
                #  (and bytes? what are the guarantees?) to a given length that exceeds the message length
                residuepadd = lastOffset + length - len(l4msg.data)
                newMessage = copy.copy(originalAnalyzer.message)
                newMessage.data = newMessage.data + b'\x00' * residuepadd
                newAnalyzer = type(originalAnalyzer)(
                    newMessage, originalAnalyzer.unit)  # type: MessageAnalyzer
                newAnalyzer.setAnalysisParams(*originalAnalyzer.analysisParams)
                padd = [numpy.nan] * residuepadd
                newAnalyzer._values = originalAnalyzer.values + padd
                newSegment = MessageSegment(newAnalyzer, lastOffset, length)
                for seg in sequence:  # replace all previous analyzers to make the sequence homogeneous for this message
                    seg.analyzer = newAnalyzer
                sequence.append(newSegment)
            else:
                newSegment = MessageSegment(originalAnalyzer, lastOffset,
                                            len(l4msg.data) - lastOffset)
                sequence.append(newSegment)

        segments.append(tuple(sequence))

    return segments