def reassemble(protocol, strict=False): """Reassemble fragmented datagrams. Keyword arguments: * protocol -- str, protocol to be reassembled * strict -- bool, if return all datagrams (including those not implemented) when submit (default is False) <keyword> True / False Returns: * [if protocol is IPv4] IPv4_Reassembly -- a Reassembly object from `pcapkit.reassembly` * [if protocol is IPv6] IPv6_Reassembly -- a Reassembly object from `pcapkit.reassembly` * [if protocol is TCP] TCP_Reassembly -- a Reassembly object from `pcapkit.reassembly` """ if isinstance(protocol, type) and issubclass(protocol, Protocol): protocol = protocol.__index__() str_check(protocol) bool_check(strict) if protocol == 'IPv4': return IPv4_Reassembly(strict=strict) elif protocol == 'IPv6': return IPv6_Reassembly(strict=strict) elif protocol == 'TCP': return TCP_Reassembly(strict=strict) else: raise FormatError(f'Unsupported reassembly protocol: {protocol}')
def reassemble(protocol, strict=False): """Reassemble fragmented datagrams. Arguments: protocol (Union[str, Type[Protocol]]) protocol to be reassembled strict (bool): if return all datagrams (including those not implemented) when submit Returns: Union[IPv4_Reassembly, IPv6_Reassembly, TCP_Reassembly]: a :class:`~pcapkit.reassembly.reassembly.Reassembly` object of corresponding protocol Raises: FormatError: If ``protocol`` is **NOT** any of IPv4, IPv6 or TCP. """ if isinstance(protocol, type) and issubclass(protocol, Protocol): protocol = protocol.id() str_check(protocol) bool_check(strict) if protocol == 'IPv4': return IPv4_Reassembly(strict=strict) if protocol == 'IPv6': return IPv6_Reassembly(strict=strict) if protocol == 'TCP': return TCP_Reassembly(strict=strict) raise FormatError(f'Unsupported reassembly protocol: {protocol}')
def follow_tcp_stream(fin=None, verbose=False, extension=True, engine=None, # Extrator options fout=None, format=None, byteorder=None, nanosecond=None): # TraceFlow options """Follow TCP streams. Arguments: fin (Optiona[str]): file name to be read; if file not exist, raise :exc:`FileNotFound` extension (bool): if check and append extensions to output file verbose (bool): if print verbose output information engine (Optional[Literal['default', 'pcapkit', 'dpkt', 'scapy', 'pyshark', 'server', 'pipeline']]): extraction engine to be used fout (Optional[str]): path name for flow tracer if necessary format (Optional[Literal['plist', 'json', 'tree', 'pcap']]): output file format of flow tracer byteorder (Literal['little', 'big']): output file byte order nanosecond (bool): output nanosecond-resolution file flag Returns: Tuple[pcapkit.corekit.infoclass.Info]: List of extracted TCP streams. """ if isinstance(engine, str) and engine.casefold() == 'pyshark': warnings.warn(f'unsupported extraction engine: {engine}; fallback to default engine', EngineWarning, stacklevel=stacklevel()) engine = None extraction = Extractor(fin=fin, fout=None, format=None, auto=True, extension=extension, store=True, files=False, nofile=True, verbose=verbose, engine=engine, layer=None, protocol=None, ip=False, ipv4=False, ipv6=False, tcp=False, strict=False, trace=True, trace_fout=fout, trace_format=format, trace_byteorder=byteorder, trace_nanosecond=nanosecond) fallback = False if extraction.engine == 'dpkt': from pcapkit.toolkit.dpkt import tcp_reassembly elif extraction.engine == 'scapy': from pcapkit.toolkit.scapy import tcp_reassembly else: from pcapkit.toolkit.default import tcp_reassembly fallback = True streams = list() frames = extraction.frame for stream in extraction.trace: reassembly = TCP_Reassembly(strict=False) packets = list() for index in stream.index: frame = frames[index-1] packets.append(frame) if fallback: flag, data = tcp_reassembly(frame) else: flag, data = tcp_reassembly(frame, count=index) if flag: reassembly(data) streams.append(Info( filename=stream.fpout, packets=tuple(packets), conversations=tuple(datagram.payload for datagram in sorted( reassembly.datagram, key=lambda datagram: datagram.index # make sure the converstations are in order )), )) return tuple(streams)
def __init__( self, *, fin=None, fout=None, format=None, # basic settings auto=True, extension=True, store=True, # internal settings files=False, nofile=False, verbose=False, # output settings engine=None, layer=None, protocol=None, # extraction settings ip=False, ipv4=False, ipv6=False, tcp=False, strict=True, # reassembly settings trace=False, trace_fout=None, trace_format=None, # trace settings trace_byteorder=sys.byteorder, trace_nanosecond=False): # trace settings """Initialise PCAP Reader. Keyword arguments: * fin -- str, file name to be read; if file not exist, raise an error * fout -- str, file name to be written * format -- str, file format of output <keyword> 'plist' / 'json' / 'tree' / 'html' * auto -- bool, if automatically run till EOF (default is True) <keyword> True / False * extension -- bool, if check and append extensions to output file (default is True) <keyword> True / False * store -- bool, if store extracted packet info (default is True) <keyword> True / False * files -- bool, if split each frame into different files (default is False) <keyword> True / False * nofile -- bool, if no output file is to be dumped (default is False) <keyword> True / False * verbose -- bool, if print verbose output information (default is False) <keyword> True / False * engine -- str, extraction engine to be used <keyword> 'default | pcapkit' * layer -- str, extract til which layer <keyword> 'Link' / 'Internet' / 'Transport' / 'Application' * protocol -- str, extract til which protocol <keyword> available protocol name * ip -- bool, if record data for IPv4 & IPv6 reassembly (default is False) <keyword> True / False * ipv4 -- bool, if perform IPv4 reassembly (default is False) <keyword> True / False * ipv6 -- bool, if perform IPv6 reassembly (default is False) <keyword> True / False * tcp -- bool, if perform TCP reassembly (default is False) <keyword> True / False * strict -- bool, if set strict flag for reassembly (default is True) <keyword> True / False * trace -- bool, if trace TCP traffic flows (default is False) <keyword> True / False * trace_fout -- str, path name for flow tracer if necessary * trace_format -- str, output file format of flow tracer <keyword> 'plist' / 'json' / 'tree' / 'html' / 'pcap' * trace_byteorder -- str, output file byte order <keyword> 'little' / 'big' * trace_nanosecond -- bool, output nanosecond-resolution file flag <keyword> True / False """ ifnm, ofnm, fmt, ext, files = \ self.make_name(fin, fout, format, extension, files=files, nofile=nofile) format = __fmt__ self._ifnm = ifnm # input file name self._ofnm = ofnm # output file name self._fext = ext # output file extension self._flag_a = auto # auto extract flag self._flag_d = store # store data flag self._flag_e = False # EOF flag self._flag_f = files # split file flag self._flag_m = False # multiprocessing flag self._flag_q = nofile # no output flag self._flag_t = trace # trace flag self._flag_v = verbose # verbose output flag self._frnum = 0 # frame number self._frame = list() # frame record self._proto = None # frame ProtoChain self._reasm = [None ] * 3 # frame record for reassembly (IPv4 / IPv6 / TCP) self._trace = NotImplemented # flow tracer self._ipv4 = ipv4 or ip # IPv4 Reassembly self._ipv6 = ipv6 or ip # IPv6 Reassembly self._tcp = tcp # TCP Reassembly self._exptl = protocol or 'null' # extract til protocol self._exlyr = (layer or 'none').capitalize() # extract til layer self._exeng = (engine or 'default').lower() # extract using engine if self._ipv4: from pcapkit.reassembly.ipv4 import IPv4_Reassembly self._reasm[0] = IPv4_Reassembly(strict=strict) if self._ipv6: from pcapkit.reassembly.ipv6 import IPv6_Reassembly self._reasm[1] = IPv6_Reassembly(strict=strict) if self._tcp: from pcapkit.reassembly.tcp import TCP_Reassembly self._reasm[2] = TCP_Reassembly(strict=strict) if trace: from pcapkit.foundation.traceflow import TraceFlow if self._exeng in ('pyshark', ) and re.fullmatch( 'pcap', str(trace_format), re.IGNORECASE): warnings.warn( f"'Extractor(engine={self._exeng})' does not support 'trace_format={trace_format}'; " "using 'trace_format=None' instead", FormatWarning, stacklevel=stacklevel()) trace_format = None self._trace = TraceFlow(fout=trace_fout, format=trace_format, byteorder=trace_byteorder, nanosecond=trace_nanosecond) self._ifile = open(ifnm, 'rb') # input file if not self._flag_q: if fmt == 'plist': from dictdumper import PLIST as output # output PLIST file elif fmt == 'json': from dictdumper import JSON as output # output JSON file elif fmt == 'tree': from dictdumper import Tree as output # output treeview text file elif fmt == 'html': from dictdumper import JavaScript as output # output JavaScript file elif fmt == 'xml': from dictdumper import XML as output # output XML file else: from pcapkit.dumpkit import NotImplementedIO as output # no output file warnings.warn( f'unsupported output format: {fmt}; disabled file output feature', FormatWarning, stacklevel=stacklevel()) class DictDumper(output): @classmethod def object_hook(cls, obj): import enum import aenum if isinstance(obj, (enum.IntEnum, aenum.IntEnum)): return f'No.{obj.value} {obj.name}' if isinstance(obj, ipaddress._BaseAddress): return str(obj) if isinstance(obj, Info): return dict(obj) return super().object_hook(obj) self._ofile = DictDumper if self._flag_f else DictDumper( ofnm) # output file self.check() # check layer & protocol self.run() # start extraction