def _run_pyshark(self, pyshark): """Call pyshark.FileCapture to extract PCAP files.""" # if not self._flag_a: # self._flag_a = True # warnings.warn(f"'Extractor(engine=pyshark)' object is not iterable; " # "so 'auto=False' will be ignored", AttributeWarning, stacklevel=stacklevel()) if self._exlyr != 'None' or self._exptl != 'null': warnings.warn( "'Extractor(engine=pyshark)' does not support protocol and layer threshold; " f"'layer={self._exlyr}' and 'protocol={self._exptl}' ignored", AttributeWarning, stacklevel=stacklevel()) if (self._ipv4 or self._ipv6 or self._tcp): self._ipv4 = self._ipv6 = self._tcp = False self._reasm = [None] * 3 warnings.warn( "'Extractor(engine=pyshark)' object dose not support reassembly; " f"so 'ipv4={self._ipv4}', 'ipv6={self._ipv6}' and 'tcp={self._tcp}' will be ignored", AttributeWarning, stacklevel=stacklevel()) # extract & analyse file self._expkg = pyshark self._extmp = iter(pyshark.FileCapture(self._ifnm, keep_packets=False)) # start iteration self.record_frames()
def make_fout(fout='./tmp', fmt='pcap'): """Make root path for output. Positional arguments: * fout -- str, root path for output * fmt -- str, output format Returns: * output -- dumper of specified format """ if fmt == 'pcap': # output PCAP file from pcapkit.dumpkit import PCAP as output elif fmt == 'plist': # output PLIST file from dictdumper import PLIST as output elif fmt == 'json': # output JSON file from dictdumper import JSON as output elif fmt == 'tree': # output treeview text file from dictdumper import Tree as output fmt = 'txt' elif fmt == 'html': # output JavaScript file from dictdumper import JavaScript as output fmt = 'js' elif fmt == 'xml': # output XML file from dictdumper import XML as output else: # no output file from pcapkit.dumpkit import NotImplementedIO as output if fmt is not None: warnings.warn( f'Unsupported output format: {fmt}; disabled file output feature', FormatWarning, stacklevel=stacklevel()) return output, '' try: path = pathlib.Path(fout) path.mkdir(parents=True) except FileExistsError as error: if path.is_dir(): pass elif fmt is None: warnings.warn(error.strerror, FileWarning, stacklevel=stacklevel()) else: raise FileExists(*error.args) from None except OSError: if not path.is_dir(): raise return output, fmt
def run(self): """Start extraction.""" flag = True if self._exeng == 'dpkt': flag, engine = self.import_test('dpkt', name='DPKT') if flag: return self._run_dpkt(engine) elif self._exeng == 'scapy': flag, engine = self.import_test('scapy.all', name='Scapy') if flag: return self._run_scapy(engine) elif self._exeng == 'pyshark': flag, engine = self.import_test('pyshark', name='PyShark') if flag: return self._run_pyshark(engine) elif self._exeng == 'pipeline': flag, engine = self.import_test('multiprocessing', name='Pipeline Multiprocessing') self._flag_m = flag = bool(flag and (self._flag_a and CPU_CNT > 1)) if self._flag_m: return self._run_pipeline(engine) warnings.warn( f'extraction engine Pipeline Multiprocessing is not available; ' 'using default engine instead', EngineWarning, stacklevel=stacklevel()) elif self._exeng == 'server': flag, engine = self.import_test('multiprocessing', name='Server Multiprocessing') self._flag_m = flag = bool(flag and (self._flag_a and CPU_CNT > 2)) if self._flag_m: return self._run_server(engine) warnings.warn( f'extraction engine Server Multiprocessing is not available; ' 'using default engine instead', EngineWarning, stacklevel=stacklevel()) elif self._exeng not in ('default', 'pcapkit'): flag = False warnings.warn( f'unsupported extraction engine: {self._exeng}; ' 'using default engine instead', EngineWarning, stacklevel=stacklevel()) # using default/pcapkit engine self._exeng = self._exeng if flag else 'default' self.record_header() # read PCAP global header self.record_frames() # read frames
def import_test(engine, *, name=None): try: engine = importlib.import_module(engine) return True, engine except ImportError: warnings.warn(f"extraction engine '{name or engine}' not available; " 'using default engine instead', EngineWarning, stacklevel=stacklevel()) return False, None
def _run_scapy(self, scapy_all): """Call scapy.all.sniff to extract PCAP files.""" # if not self._flag_a: # self._flag_a = True # warnings.warn(f"'Extractor(engine=scapy)' object is not iterable; " # "so 'auto=False' will be ignored", AttributeWarning, stacklevel=stacklevel()) if self._exlyr != 'None' or self._exptl != 'null': warnings.warn("'Extractor(engine=scapy)' does not support protocol and layer threshold; " f"'layer={self._exlyr}' and 'protocol={self._exptl}' ignored", AttributeWarning, stacklevel=stacklevel()) # extract & analyse file self._expkg = scapy_all self._extmp = iter(scapy_all.sniff(offline=self._ifnm)) # start iteration self.record_frames()
def check(self): layer = self._exlyr if layer is not None: if layer not in LAYER_LIST: warnings.warn(f'unrecognised layer: {layer}', LayerWarning, stacklevel=stacklevel()) protocol = self._exptl if protocol is not None: def check_protocol(*args): for arg in args: if arg.lower() not in PROTO_LIST: warnings.warn(f'unrecognised protocol: {protocol}', ProtocolWarning, stacklevel=stacklevel()) if isinstance(protocol, tuple): check_protocol(*protocol) else: check_protocol(protocol)
def _run_dpkt(self, dpkt): """Call dpkt.pcap.Reader to extract PCAP files.""" # if not self._flag_a: # self._flag_a = True # warnings.warn(f"'Extractor(engine=dpkt)' object is not iterable; " # "so 'auto=False' will be ignored", AttributeWarning, stacklevel=stacklevel()) if self._exlyr != 'None' or self._exptl != 'null': warnings.warn("'Extractor(engine=dpkt)' does not support protocol and layer threshold; " f"'layer={self._exlyr}' and 'protocol={self._exptl}' ignored", AttributeWarning, stacklevel=stacklevel()) # extract global header self.record_header() self._ifile.seek(0, os.SEEK_SET) # extract & analyse file self._expkg = dpkt self._extmp = iter(dpkt.pcap.Reader(self._ifile)) # start iteration self.record_frames()
from pcapkit.const.misc.linktype import LinkType as LINKTYPE from pcapkit.const.misc.transtype import TransType as TP_PROTO from pcapkit.utilities.exceptions import ModuleNotFound, stacklevel from pcapkit.utilities.warnings import ScapyWarning ############################################################################### # import scapy.all ############################################################################### try: import scapy.all as scapy_all except ImportError: scapy_all = None warnings.warn("dependency package 'Scapy' not found", ScapyWarning, stacklevel=stacklevel()) __all__ = [ 'packet2chain', 'packet2dict', 'ipv4_reassembly', 'ipv6_reassembly', 'tcp_reassembly', 'tcp_traceflow' ] def packet2chain(packet): """Fetch Scapy packet protocol chain.""" if scapy_all is None: raise ModuleNotFound("No module named 'scapy'", name='scapy') chain = [packet.name] payload = packet.payload while not isinstance(payload, scapy_all.packet.NoPayload): chain.append(payload.name)
from pcapkit.const.reg.linktype import LinkType as LINKTYPE from pcapkit.const.reg.transtype import TransType as TP_PROTO from pcapkit.utilities.exceptions import ModuleNotFound, stacklevel from pcapkit.utilities.warnings import ScapyWarning ############################################################################### # import scapy.all ############################################################################### try: import scapy.all as scapy_all except ImportError: scapy_all = None warnings.warn("dependency package 'Scapy' not found", ScapyWarning, stacklevel=stacklevel()) __all__ = [ 'packet2chain', 'packet2dict', 'ipv4_reassembly', 'ipv6_reassembly', 'tcp_reassembly', 'tcp_traceflow' ] def packet2chain(packet): """Fetch Scapy packet protocol chain. Args: packet (scapy.packet.Packet): Scapy packet. Returns: str: Colon (``:``) seperated list of protocol chain.
def make_fout(fout='./tmp', fmt='pcap'): """Make root path for output. Positional arguments: fout (str): root path for output fmt (str): output format Returns: Tuple[Type[dictdumper.dumper.Dumper], str]: dumper of specified format and file extension of output file Warns: FormatWarning: If ``fmt`` is not supported. FileWarning: If ``fout`` exists and ``fmt`` is :data:`None`. Raises: FileExists: If ``fout`` exists and ``fmt`` is **NOT** :data:`None`. """ if fout is None: fout = './tmp' if fmt == 'pcap': # output PCAP file from pcapkit.dumpkit import PCAPIO as output elif fmt == 'plist': # output PLIST file from dictdumper import PLIST as output elif fmt == 'json': # output JSON file from dictdumper import JSON as output elif fmt == 'tree': # output treeview text file from dictdumper import Tree as output fmt = 'txt' elif fmt == 'html': # output JavaScript file from dictdumper import VueJS as output fmt = 'js' elif fmt == 'xml': # output XML file from dictdumper import XML as output else: # no output file from pcapkit.dumpkit import NotImplementedIO as output if fmt is not None: warnings.warn( f'Unsupported output format: {fmt}; disabled file output feature', FormatWarning, stacklevel=stacklevel()) return output, '' try: pathlib.Path(fout).mkdir(parents=True, exist_ok=True) except FileExistsError as error: if fmt is None: warnings.warn(error.strerror, FileWarning, stacklevel=stacklevel()) else: raise FileExists(*error.args).with_traceback( error.__traceback__) from None class DictDumper(output): """Customised :class:`~dictdumper.dumper.Dumper` object.""" def object_hook(self, o): """Convert content for function call. Args: o (:obj:`Any`): object to convert Returns: :obj:`Any`: the converted object """ import enum import aenum if isinstance(o, (enum.IntEnum, aenum.IntEnum)): return dict( enum=type(o).__name__, desc=o.__doc__, name=o.name, value=o.value, ) if isinstance(o, (ipaddress.IPv4Address, ipaddress.IPv6Address)): return str(o) if isinstance(o, Info): return o.info2dict() return super().object_hook(o) def default(self, o): """Check content type for function call.""" return 'fallback' def _append_fallback(self, value, file): if hasattr(value, '__slots__'): new_value = { key: getattr(value, key) for key in value.__slots__ } else: new_value = vars(value) func = self._encode_func(new_value) func(new_value, file) return DictDumper, fmt
def follow_tcp_stream(fin=None, verbose=False, extension=True, engine=None, # Extrator options fout=None, format=None, byteorder=None, nanosecond=None): # TraceFlow options """Follow TCP streams. Arguments: fin (Optiona[str]): file name to be read; if file not exist, raise :exc:`FileNotFound` extension (bool): if check and append extensions to output file verbose (bool): if print verbose output information engine (Optional[Literal['default', 'pcapkit', 'dpkt', 'scapy', 'pyshark', 'server', 'pipeline']]): extraction engine to be used fout (Optional[str]): path name for flow tracer if necessary format (Optional[Literal['plist', 'json', 'tree', 'pcap']]): output file format of flow tracer byteorder (Literal['little', 'big']): output file byte order nanosecond (bool): output nanosecond-resolution file flag Returns: Tuple[pcapkit.corekit.infoclass.Info]: List of extracted TCP streams. """ if isinstance(engine, str) and engine.casefold() == 'pyshark': warnings.warn(f'unsupported extraction engine: {engine}; fallback to default engine', EngineWarning, stacklevel=stacklevel()) engine = None extraction = Extractor(fin=fin, fout=None, format=None, auto=True, extension=extension, store=True, files=False, nofile=True, verbose=verbose, engine=engine, layer=None, protocol=None, ip=False, ipv4=False, ipv6=False, tcp=False, strict=False, trace=True, trace_fout=fout, trace_format=format, trace_byteorder=byteorder, trace_nanosecond=nanosecond) fallback = False if extraction.engine == 'dpkt': from pcapkit.toolkit.dpkt import tcp_reassembly elif extraction.engine == 'scapy': from pcapkit.toolkit.scapy import tcp_reassembly else: from pcapkit.toolkit.default import tcp_reassembly fallback = True streams = list() frames = extraction.frame for stream in extraction.trace: reassembly = TCP_Reassembly(strict=False) packets = list() for index in stream.index: frame = frames[index-1] packets.append(frame) if fallback: flag, data = tcp_reassembly(frame) else: flag, data = tcp_reassembly(frame, count=index) if flag: reassembly(data) streams.append(Info( filename=stream.fpout, packets=tuple(packets), conversations=tuple(datagram.payload for datagram in sorted( reassembly.datagram, key=lambda datagram: datagram.index # make sure the converstations are in order )), )) return tuple(streams)
def _dpkt_read_frame(self): """Read frames.""" from pcapkit.toolkit.dpkt import (ipv4_reassembly, ipv6_reassembly, packet2chain, packet2dict, tcp_reassembly, tcp_traceflow) # fetch DPKT packet timestamp, packet = next(self._extmp) # extract packet if self._dlink.value == 1: packet = self._expkg.ethernet.Ethernet(packet) elif self._dlink.value == 228: packet = self._expkg.ip.IP(packet) elif self._dlink.value == 229: packet = self._expkg.ip6.IP6(packet) else: warnings.warn( 'unrecognised link layer protocol; all analysis functions ignored', DPKTWarning, stacklevel=stacklevel()) self._frnum += 1 if self._flag_d: self._frame.append(packet) return packet # verbose output self._frnum += 1 self._proto = packet2chain(packet) if self._flag_v: print(f' - Frame {self._frnum:>3d}: {self._proto}') # write plist frnum = f'Frame {self._frnum}' if not self._flag_q: info = packet2dict(packet, timestamp, data_link=self._dlink) if self._flag_f: ofile = self._ofile(f'{self._ofnm}/{frnum}.{self._fext}') ofile(info, name=frnum) else: self._ofile(info, name=frnum) # record frames if self._flag_d: setattr(packet, 'packet2dict', packet2dict) setattr(packet, 'packet2chain', packet2chain) self._frame.append(packet) # record fragments if self._ipv4: flag, data = ipv4_reassembly(packet, count=self._frnum) if flag: self._reasm[0](data) if self._ipv6: flag, data = ipv6_reassembly(packet, count=self._frnum) if flag: self._reasm[1](data) if self._tcp: flag, data = tcp_reassembly(packet, count=self._frnum) if flag: self._reasm[2](data) # trace flows if self._flag_t: flag, data = tcp_traceflow(packet, timestamp, data_link=self._dlink, count=self._frnum) if flag: self._trace(data) return packet
def __init__( self, *, fin=None, fout=None, format=None, # basic settings auto=True, extension=True, store=True, # internal settings files=False, nofile=False, verbose=False, # output settings engine=None, layer=None, protocol=None, # extraction settings ip=False, ipv4=False, ipv6=False, tcp=False, strict=True, # reassembly settings trace=False, trace_fout=None, trace_format=None, # trace settings trace_byteorder=sys.byteorder, trace_nanosecond=False): # trace settings """Initialise PCAP Reader. Keyword arguments: * fin -- str, file name to be read; if file not exist, raise an error * fout -- str, file name to be written * format -- str, file format of output <keyword> 'plist' / 'json' / 'tree' / 'html' * auto -- bool, if automatically run till EOF (default is True) <keyword> True / False * extension -- bool, if check and append extensions to output file (default is True) <keyword> True / False * store -- bool, if store extracted packet info (default is True) <keyword> True / False * files -- bool, if split each frame into different files (default is False) <keyword> True / False * nofile -- bool, if no output file is to be dumped (default is False) <keyword> True / False * verbose -- bool, if print verbose output information (default is False) <keyword> True / False * engine -- str, extraction engine to be used <keyword> 'default | pcapkit' * layer -- str, extract til which layer <keyword> 'Link' / 'Internet' / 'Transport' / 'Application' * protocol -- str, extract til which protocol <keyword> available protocol name * ip -- bool, if record data for IPv4 & IPv6 reassembly (default is False) <keyword> True / False * ipv4 -- bool, if perform IPv4 reassembly (default is False) <keyword> True / False * ipv6 -- bool, if perform IPv6 reassembly (default is False) <keyword> True / False * tcp -- bool, if perform TCP reassembly (default is False) <keyword> True / False * strict -- bool, if set strict flag for reassembly (default is True) <keyword> True / False * trace -- bool, if trace TCP traffic flows (default is False) <keyword> True / False * trace_fout -- str, path name for flow tracer if necessary * trace_format -- str, output file format of flow tracer <keyword> 'plist' / 'json' / 'tree' / 'html' / 'pcap' * trace_byteorder -- str, output file byte order <keyword> 'little' / 'big' * trace_nanosecond -- bool, output nanosecond-resolution file flag <keyword> True / False """ ifnm, ofnm, fmt, ext, files = \ self.make_name(fin, fout, format, extension, files=files, nofile=nofile) format = __fmt__ self._ifnm = ifnm # input file name self._ofnm = ofnm # output file name self._fext = ext # output file extension self._flag_a = auto # auto extract flag self._flag_d = store # store data flag self._flag_e = False # EOF flag self._flag_f = files # split file flag self._flag_m = False # multiprocessing flag self._flag_q = nofile # no output flag self._flag_t = trace # trace flag self._flag_v = verbose # verbose output flag self._frnum = 0 # frame number self._frame = list() # frame record self._proto = None # frame ProtoChain self._reasm = [None ] * 3 # frame record for reassembly (IPv4 / IPv6 / TCP) self._trace = NotImplemented # flow tracer self._ipv4 = ipv4 or ip # IPv4 Reassembly self._ipv6 = ipv6 or ip # IPv6 Reassembly self._tcp = tcp # TCP Reassembly self._exptl = protocol or 'null' # extract til protocol self._exlyr = (layer or 'none').capitalize() # extract til layer self._exeng = (engine or 'default').lower() # extract using engine if self._ipv4: from pcapkit.reassembly.ipv4 import IPv4_Reassembly self._reasm[0] = IPv4_Reassembly(strict=strict) if self._ipv6: from pcapkit.reassembly.ipv6 import IPv6_Reassembly self._reasm[1] = IPv6_Reassembly(strict=strict) if self._tcp: from pcapkit.reassembly.tcp import TCP_Reassembly self._reasm[2] = TCP_Reassembly(strict=strict) if trace: from pcapkit.foundation.traceflow import TraceFlow if self._exeng in ('pyshark', ) and re.fullmatch( 'pcap', str(trace_format), re.IGNORECASE): warnings.warn( f"'Extractor(engine={self._exeng})' does not support 'trace_format={trace_format}'; " "using 'trace_format=None' instead", FormatWarning, stacklevel=stacklevel()) trace_format = None self._trace = TraceFlow(fout=trace_fout, format=trace_format, byteorder=trace_byteorder, nanosecond=trace_nanosecond) self._ifile = open(ifnm, 'rb') # input file if not self._flag_q: if fmt == 'plist': from dictdumper import PLIST as output # output PLIST file elif fmt == 'json': from dictdumper import JSON as output # output JSON file elif fmt == 'tree': from dictdumper import Tree as output # output treeview text file elif fmt == 'html': from dictdumper import JavaScript as output # output JavaScript file elif fmt == 'xml': from dictdumper import XML as output # output XML file else: from pcapkit.dumpkit import NotImplementedIO as output # no output file warnings.warn( f'unsupported output format: {fmt}; disabled file output feature', FormatWarning, stacklevel=stacklevel()) class DictDumper(output): @classmethod def object_hook(cls, obj): import enum import aenum if isinstance(obj, (enum.IntEnum, aenum.IntEnum)): return f'No.{obj.value} {obj.name}' if isinstance(obj, ipaddress._BaseAddress): return str(obj) if isinstance(obj, Info): return dict(obj) return super().object_hook(obj) self._ofile = DictDumper if self._flag_f else DictDumper( ofnm) # output file self.check() # check layer & protocol self.run() # start extraction
def check_protocol(*args): for arg in args: if arg.lower() not in PROTO_LIST: warnings.warn(f'unrecognised protocol: {protocol}', ProtocolWarning, stacklevel=stacklevel())
def _run_server(self, multiprocessing): """Use server multiprocessing to extract PCAP files.""" if not self._flag_m: raise UnsupportedCall( f"Extractor(engine={self._exeng})' has no attribute '_run_server'" ) if not self._flag_q: self._flag_q = True warnings.warn( "'Extractor(engine=pipeline)' does not support output; " f"'fout={self._ofnm}' ignored", AttributeWarning, stacklevel=stacklevel()) self._frnum = 1 # frame number (revised) self._expkg = multiprocessing # multiprocessing module self._mpsvc = NotImplemented # multiprocessing server process self._mpprc = list() # multiprocessing process list self._mpfdp = collections.defaultdict( multiprocessing.Queue) # multiprocessing file pointer self._mpmng = multiprocessing.Manager() # multiprocessing manager self._mpbuf = self._mpmng.dict() # multiprocessing frame dict self._mpfrm = self._mpmng.list() # multiprocessing frame storage self._mprsm = self._mpmng.list() # multiprocessing reassembly buffer self._mpkit = self._mpmng.Namespace() # multiprocessing work kit self._mpkit.counter = 0 # work count (on duty) self._mpkit.pool = 1 # work pool (ready) self._mpkit.eof = False # EOF flag self._mpkit.trace = None # flow tracer # preparation self.record_header() self._mpfdp[0].put(self._gbhdr.length) self._mpsvc = multiprocessing.Process( target=self._server_analyse_frame, kwargs={ 'mpfrm': self._mpfrm, 'mprsm': self._mprsm, 'mpbuf': self._mpbuf, 'mpkit': self._mpkit }) self._mpsvc.start() # extraction while True: # check EOF if self._mpkit.eof: self._update_eof() break # check counter if self._mpkit.pool and self._mpkit.counter < CPU_CNT - 1: # update file offset self._ifile.seek( self._mpfdp.pop(self._frnum - 1).get(), os.SEEK_SET) # create worker # print(self._frnum, 'start') proc = multiprocessing.Process( target=self._server_extract_frame, kwargs={ 'mpkit': self._mpkit, 'mpbuf': self._mpbuf, 'mpfdp': self._mpfdp[self._frnum] }) # update status self._mpkit.pool -= 1 self._mpkit.counter += 1 # start and record proc.start() self._frnum += 1 self._mpprc.append(proc) # check buffer if len(self._mpprc) >= CPU_CNT - 1: [proc.join() for proc in self._mpprc[:-4]] del self._mpprc[:-4]
def _run_pipeline(self, multiprocessing): """Use pipeline multiprocessing to extract PCAP files.""" if not self._flag_m: raise UnsupportedCall( f"Extractor(engine={self._exeng})' has no attribute '_run_pipline'" ) if not self._flag_q: self._flag_q = True warnings.warn( "'Extractor(engine=pipeline)' does not support output; " f"'fout={self._ofnm}' ignored", AttributeWarning, stacklevel=stacklevel()) self._frnum = 1 # frame number (revised) self._expkg = multiprocessing # multiprocessing module self._mpprc = list() # multiprocessing process list self._mpfdp = collections.defaultdict( multiprocessing.Queue) # multiprocessing file pointer self._mpmng = multiprocessing.Manager() # multiprocessing manager self._mpkit = self._mpmng.Namespace() # multiprocessing work kit self._mpkit.counter = 0 # work count (on duty) self._mpkit.pool = 1 # work pool (ready) self._mpkit.current = 1 # current frame number self._mpkit.eof = False # EOF flag self._mpkit.frames = dict() # frame storage self._mpkit.trace = self._trace # flow tracer self._mpkit.reassembly = copy.deepcopy( self._reasm) # reassembly buffers # preparation self.record_header() self._mpfdp[0].put(self._gbhdr.length) # extraction while True: # check EOF if self._mpkit.eof: self._update_eof() break # check counter if self._mpkit.pool and self._mpkit.counter < CPU_CNT: # update file offset self._ifile.seek( self._mpfdp.pop(self._frnum - 1).get(), os.SEEK_SET) # create worker # print(self._frnum, 'start') proc = multiprocessing.Process( target=self._pipeline_read_frame, kwargs={ 'mpkit': self._mpkit, 'mpfdp': self._mpfdp[self._frnum] }) # update status self._mpkit.pool -= 1 self._mpkit.counter += 1 # start and record proc.start() self._frnum += 1 self._mpprc.append(proc) # check buffer if len(self._mpprc) >= CPU_CNT: [proc.join() for proc in self._mpprc[:-4]] # pylint: disable=expression-not-assigned del self._mpprc[:-4]