def __init__(self, specimens: sl.SpecimenLoader, layer: int = -1, relativeToIP: bool = False, failOnUndissectable=True, debug=False): self.specimens = specimens self.messages = specimens.messagePool # type: OrderedDict[AbstractMessage, netzob.RawMessage] """:type messages: OrderedDict[AbstractMessage, RawMessage]""" self.baselayer = specimens.getBaseLayerOfPCAP() self.debug = debug # Cache messages that already have been parsed and labeled self._messageCache = dict() # type: Dict[netzob.RawMessage, ] self._targetlayer = layer self._relativeToIP = relativeToIP self._failOnUndissectable = failOnUndissectable self._dissections = self._dissectAndLabel(self.messages.values())
'(typically the payload of a transport protocol).') parser.add_argument('-r', '--relativeToIP', default=False, action='store_true') args = parser.parse_args() if not isfile(args.pcapfilename): print('File not found: ' + args.pcapfilename) exit(1) import time swstart = time.time() print('\nLoading ...') specimens = SpecimenLoader(args.pcapfilename, layer=args.layer, relativeToIP=args.relativeToIP) comparator = MessageComparator(specimens, layer=args.layer, relativeToIP=args.relativeToIP, failOnUndissectable=False, debug=debug) print('Loaded and dissected in {:.3f}s'.format(time.time() - swstart)) print('\nNetzob Inference ...') # dict ( similaritythreshold : dict ( symbol : (quality, fieldcount, exactf, nearf, uospecific) ) ) if args.smin: minThresh = args.smin maxThresh = args.smax if args.smax else args.smin threshSymbTfmtTime = iterSimilarities(minThresh, maxThresh) threshSymbTfmt = {t: s for t, (s, r) in threshSymbTfmtTime.items()}
if __name__ == '__main__': parser = ArgumentParser( description='Dissect PCAP with tshark and parse to python.') parser.add_argument('pcapfilename', help='pcapfilename') parser.add_argument('-l', '--targetlayer', type=int) parser.add_argument('-r', '--relativeToIP', default=False, action='store_true') args = parser.parse_args() if not isfile(args.pcapfilename): print('File not found: ' + args.pcapfilename) exit(1) if args.targetlayer: specimens = SpecimenLoader(args.pcapfilename, args.targetlayer, args.relativeToIP) else: specimens = SpecimenLoader(args.pcapfilename) pkt = list(specimens.messagePool.values()) st = time.time() ########################### # Performance tests, comparing the dissection of one message at a time by the static parseMultiple method, # the ParsedMessage constructor and finally the dissection of a batch of all messages. ########################### # # Single messages with ParsedMessage.parseMultiple test: Dissection ran in 63.48 seconds. # pms = dict() # for p in pkt: # pms.update(ParsedMessage.parseMultiple([p])) # pms = list(pms.values())
def cacheAndLoadDC(pcapfilename: str, analysisTitle: str, tokenizer: str, debug: bool, analyzerType: type, analysisArgs: Tuple=None, sigma: float=None, filterTrivial=False, refinementCallback:Union[Callable, None] = refinements, disableCache=False) \ -> Tuple[SpecimenLoader, MessageComparator, List[Tuple[MessageSegment]], DistanceCalculator, float, float]: """ cache or load the DistanceCalculator to or from the filesystem :param filterTrivial: Filter out **one-byte** segments and such just consisting of **zeros**. :param disableCache: When experimenting with distances manipulation, deactivate caching! :return: """ pcapbasename = os.path.basename(pcapfilename) # if refinementCallback == pcaMocoRefinements: # sigma = pcamocoSigmapertrace[pcapbasename] if not sigma and pcapbasename in pcamocoSigmapertrace else \ # 0.9 if not sigma else sigma # else: sigma = sigmapertrace[pcapbasename] if not sigma and pcapbasename in sigmapertrace else \ 0.9 if not sigma else sigma pcapName = os.path.splitext(pcapbasename)[0] # noinspection PyUnboundLocalVariable tokenparm = tokenizer if tokenizer != "nemesys" else \ "{}{:.0f}".format(tokenizer, sigma * 10) dccachefn = os.path.join( cacheFolder, 'cache-dc-{}-{}-{}-{}-{}.{}'.format( analysisTitle, tokenparm, "filtered" if filterTrivial else "all", refinementCallback.__name__ if refinementCallback is not None else "raw", pcapName, 'ddc')) # dccachefn = 'cache-dc-{}-{}-{}.{}'.format(analysisTitle, tokenizer, pcapName, 'dc') if disableCache or not os.path.exists(dccachefn): # dissect and label messages print("Load messages from {}...".format(pcapName)) specimens = SpecimenLoader(pcapfilename, 2, True) comparator = MessageComparator(specimens, 2, True, debug=debug) print("Segmenting messages...", end=' ') segmentationTime = time.time() # select tokenizer by command line parameter if tokenizer == "tshark": # 1. segment messages according to true fields from the labels segmentedMessages = annotateFieldTypes(analyzerType, analysisArgs, comparator) elif tokenizer == "4bytesfixed": # 2. segment messages into fixed size chunks for testing segmentedMessages = segmentsFixed(4, comparator, analyzerType, analysisArgs) elif tokenizer == "nemesys": # 3. segment messages by NEMESYS segmentsPerMsg = bcDeltaGaussMessageSegmentation(specimens, sigma) # get analyzer requested by analyzerType/analysisArgs segmentedMessages = [[ MessageSegment( MessageAnalyzer.findExistingAnalysis( analyzerType, MessageAnalyzer.U_BYTE, seg.message, analysisArgs), seg.offset, seg.length) for seg in msg ] for msg in segmentsPerMsg] if refinementCallback is not None: if refinementCallback.__code__.co_argcount > 1: # assume the second argument is expected to be a distance calculator chainedSegments = list( chain.from_iterable(segmentedMessages)) print("Refinement: Calculate distance for {} segments...". format(len(chainedSegments))) if len(chainedSegments)**2 > MemmapDC.maxMemMatrix: refinementDC = MemmapDC(chainedSegments) else: refinementDC = DelegatingDC(chainedSegments) segmentedMessages = refinementCallback( segmentedMessages, refinementDC) else: segmentedMessages = refinementCallback(segmentedMessages) # segments = list(chain.from_iterable(segmentedMessages)) segmentationTime = time.time() - segmentationTime print("done.") if filterTrivial: # noinspection PyUnboundLocalVariable chainedSegments = [ seg for seg in chain.from_iterable(segmentedMessages) if seg.length > 1 and set(seg.values) != {0} ] else: # noinspection PyUnboundLocalVariable chainedSegments = list(chain.from_iterable(segmentedMessages)) print("Calculate distance for {} segments...".format( len(chainedSegments))) # dc = DistanceCalculator(chainedSegments, reliefFactor=0.33) # Pairwise similarity of segments: dc.distanceMatrix dist_calc_segmentsTime = time.time() if len(chainedSegments)**2 > MemmapDC.maxMemMatrix: dc = MemmapDC(chainedSegments) else: dc = DelegatingDC(chainedSegments) assert chainedSegments == dc.rawSegments dist_calc_segmentsTime = time.time() - dist_calc_segmentsTime try: with open(dccachefn, 'wb') as f: pickle.dump((segmentedMessages, comparator, dc), f, pickle.HIGHEST_PROTOCOL) except MemoryError as e: print("DC could not be cached due to a MemoryError. Removing", dccachefn, "and continuing.") os.remove(dccachefn) else: print("Load distances from cache file {}".format(dccachefn)) with open(dccachefn, 'rb') as f: segmentedMessages, comparator, dc = pickle.load(f) if not (isinstance(comparator, MessageComparator) and isinstance(dc, DistanceCalculator)): print('Loading of cached distances failed.') exit(10) specimens = comparator.specimens # chainedSegments = list(chain.from_iterable(segmentedMessages)) segmentationTime, dist_calc_segmentsTime = None, None return specimens, comparator, segmentedMessages, dc, segmentationTime, dist_calc_segmentsTime