Esempio n. 1
0
def reassemble(protocol, strict=False):
    """Reassemble fragmented datagrams.

    Keyword arguments:
        * protocol -- str, protocol to be reassembled
        * strict -- bool, if return all datagrams (including those not implemented) when submit (default is False)
                        <keyword> True / False

    Returns:
        * [if protocol is IPv4] IPv4_Reassembly -- a Reassembly object from `pcapkit.reassembly`
        * [if protocol is IPv6] IPv6_Reassembly -- a Reassembly object from `pcapkit.reassembly`
        * [if protocol is TCP] TCP_Reassembly -- a Reassembly object from `pcapkit.reassembly`

    """
    if isinstance(protocol, type) and issubclass(protocol, Protocol):
        protocol = protocol.__index__()

    str_check(protocol)
    bool_check(strict)

    if protocol == 'IPv4':
        return IPv4_Reassembly(strict=strict)
    elif protocol == 'IPv6':
        return IPv6_Reassembly(strict=strict)
    elif protocol == 'TCP':
        return TCP_Reassembly(strict=strict)
    else:
        raise FormatError(f'Unsupported reassembly protocol: {protocol}')
Esempio n. 2
0
def reassemble(protocol, strict=False):
    """Reassemble fragmented datagrams.

    Arguments:
        protocol (Union[str, Type[Protocol]]) protocol to be reassembled
        strict (bool): if return all datagrams (including those not implemented) when submit

    Returns:
        Union[IPv4_Reassembly, IPv6_Reassembly, TCP_Reassembly]: a :class:`~pcapkit.reassembly.reassembly.Reassembly`
        object of corresponding protocol

    Raises:
        FormatError: If ``protocol`` is **NOT** any of IPv4, IPv6 or TCP.

    """
    if isinstance(protocol, type) and issubclass(protocol, Protocol):
        protocol = protocol.id()

    str_check(protocol)
    bool_check(strict)

    if protocol == 'IPv4':
        return IPv4_Reassembly(strict=strict)
    if protocol == 'IPv6':
        return IPv6_Reassembly(strict=strict)
    if protocol == 'TCP':
        return TCP_Reassembly(strict=strict)
    raise FormatError(f'Unsupported reassembly protocol: {protocol}')
Esempio n. 3
0
def follow_tcp_stream(fin=None, verbose=False, extension=True, engine=None,      # Extrator options
                      fout=None, format=None, byteorder=None, nanosecond=None):  # TraceFlow options
    """Follow TCP streams.

    Arguments:
        fin (Optiona[str]): file name to be read; if file not exist, raise :exc:`FileNotFound`
        extension (bool): if check and append extensions to output file
        verbose (bool): if print verbose output information
        engine (Optional[Literal['default', 'pcapkit', 'dpkt', 'scapy', 'pyshark', 'server', 'pipeline']]):
            extraction engine to be used

        fout (Optional[str]): path name for flow tracer if necessary
        format (Optional[Literal['plist', 'json', 'tree', 'pcap']]): output file
            format of flow tracer
        byteorder (Literal['little', 'big']): output file byte order
        nanosecond (bool): output nanosecond-resolution file flag

    Returns:
        Tuple[pcapkit.corekit.infoclass.Info]: List of extracted TCP streams.

    """
    if isinstance(engine, str) and engine.casefold() == 'pyshark':
        warnings.warn(f'unsupported extraction engine: {engine}; fallback to default engine',
                      EngineWarning, stacklevel=stacklevel())
        engine = None

    extraction = Extractor(fin=fin, fout=None, format=None, auto=True, extension=extension,
                           store=True, files=False, nofile=True, verbose=verbose, engine=engine,
                           layer=None, protocol=None, ip=False, ipv4=False, ipv6=False, tcp=False,
                           strict=False, trace=True, trace_fout=fout, trace_format=format,
                           trace_byteorder=byteorder, trace_nanosecond=nanosecond)

    fallback = False
    if extraction.engine == 'dpkt':
        from pcapkit.toolkit.dpkt import tcp_reassembly
    elif extraction.engine == 'scapy':
        from pcapkit.toolkit.scapy import tcp_reassembly
    else:
        from pcapkit.toolkit.default import tcp_reassembly
        fallback = True

    streams = list()
    frames = extraction.frame
    for stream in extraction.trace:
        reassembly = TCP_Reassembly(strict=False)

        packets = list()
        for index in stream.index:
            frame = frames[index-1]
            packets.append(frame)

            if fallback:
                flag, data = tcp_reassembly(frame)
            else:
                flag, data = tcp_reassembly(frame, count=index)

            if flag:
                reassembly(data)

        streams.append(Info(
            filename=stream.fpout,
            packets=tuple(packets),
            conversations=tuple(datagram.payload for datagram in sorted(
                reassembly.datagram, key=lambda datagram: datagram.index  # make sure the converstations are in order
            )),
        ))
    return tuple(streams)
Esempio n. 4
0
    def __init__(
            self,
            *,
            fin=None,
            fout=None,
            format=None,  # basic settings
            auto=True,
            extension=True,
            store=True,  # internal settings
            files=False,
            nofile=False,
            verbose=False,  # output settings
            engine=None,
            layer=None,
            protocol=None,  # extraction settings
            ip=False,
            ipv4=False,
            ipv6=False,
            tcp=False,
            strict=True,  # reassembly settings
            trace=False,
            trace_fout=None,
            trace_format=None,  # trace settings
            trace_byteorder=sys.byteorder,
            trace_nanosecond=False):  # trace settings
        """Initialise PCAP Reader.

        Keyword arguments:
            * fin  -- str, file name to be read; if file not exist, raise an error
            * fout -- str, file name to be written
            * format  -- str, file format of output
                            <keyword> 'plist' / 'json' / 'tree' / 'html'

            * auto -- bool, if automatically run till EOF (default is True)
                            <keyword> True / False
            * extension -- bool, if check and append extensions to output file (default is True)
                            <keyword> True / False
            * store -- bool, if store extracted packet info (default is True)
                            <keyword> True / False

            * files -- bool, if split each frame into different files (default is False)
                            <keyword> True / False
            * nofile -- bool, if no output file is to be dumped (default is False)
                            <keyword> True / False
            * verbose -- bool, if print verbose output information (default is False)
                            <keyword> True / False

            * engine -- str, extraction engine to be used
                            <keyword> 'default | pcapkit'
            * layer -- str, extract til which layer
                            <keyword> 'Link' / 'Internet' / 'Transport' / 'Application'
            * protocol -- str, extract til which protocol
                            <keyword> available protocol name

            * ip -- bool, if record data for IPv4 & IPv6 reassembly (default is False)
                            <keyword> True / False
            * ipv4 -- bool, if perform IPv4 reassembly (default is False)
                            <keyword> True / False
            * ipv6 -- bool, if perform IPv6 reassembly (default is False)
                            <keyword> True / False
            * tcp -- bool, if perform TCP reassembly (default is False)
                            <keyword> True / False
            * strict -- bool, if set strict flag for reassembly (default is True)
                            <keyword> True / False

            * trace -- bool, if trace TCP traffic flows (default is False)
                            <keyword> True / False
            * trace_fout -- str, path name for flow tracer if necessary
            * trace_format -- str, output file format of flow tracer
                            <keyword> 'plist' / 'json' / 'tree' / 'html' / 'pcap'
            * trace_byteorder -- str, output file byte order
                            <keyword> 'little' / 'big'
            * trace_nanosecond -- bool, output nanosecond-resolution file flag
                            <keyword> True / False


        """
        ifnm, ofnm, fmt, ext, files = \
            self.make_name(fin, fout, format, extension, files=files, nofile=nofile)
        format = __fmt__

        self._ifnm = ifnm  # input file name
        self._ofnm = ofnm  # output file name
        self._fext = ext  # output file extension

        self._flag_a = auto  # auto extract flag
        self._flag_d = store  # store data flag
        self._flag_e = False  # EOF flag
        self._flag_f = files  # split file flag
        self._flag_m = False  # multiprocessing flag
        self._flag_q = nofile  # no output flag
        self._flag_t = trace  # trace flag
        self._flag_v = verbose  # verbose output flag

        self._frnum = 0  # frame number
        self._frame = list()  # frame record
        self._proto = None  # frame ProtoChain

        self._reasm = [None
                       ] * 3  # frame record for reassembly (IPv4 / IPv6 / TCP)
        self._trace = NotImplemented  # flow tracer

        self._ipv4 = ipv4 or ip  # IPv4 Reassembly
        self._ipv6 = ipv6 or ip  # IPv6 Reassembly
        self._tcp = tcp  # TCP Reassembly

        self._exptl = protocol or 'null'  # extract til protocol
        self._exlyr = (layer or 'none').capitalize()  # extract til layer
        self._exeng = (engine or 'default').lower()  # extract using engine

        if self._ipv4:
            from pcapkit.reassembly.ipv4 import IPv4_Reassembly
            self._reasm[0] = IPv4_Reassembly(strict=strict)
        if self._ipv6:
            from pcapkit.reassembly.ipv6 import IPv6_Reassembly
            self._reasm[1] = IPv6_Reassembly(strict=strict)
        if self._tcp:
            from pcapkit.reassembly.tcp import TCP_Reassembly
            self._reasm[2] = TCP_Reassembly(strict=strict)

        if trace:
            from pcapkit.foundation.traceflow import TraceFlow
            if self._exeng in ('pyshark', ) and re.fullmatch(
                    'pcap', str(trace_format), re.IGNORECASE):
                warnings.warn(
                    f"'Extractor(engine={self._exeng})' does not support 'trace_format={trace_format}'; "
                    "using 'trace_format=None' instead",
                    FormatWarning,
                    stacklevel=stacklevel())
                trace_format = None
            self._trace = TraceFlow(fout=trace_fout,
                                    format=trace_format,
                                    byteorder=trace_byteorder,
                                    nanosecond=trace_nanosecond)

        self._ifile = open(ifnm, 'rb')  # input file
        if not self._flag_q:
            if fmt == 'plist':
                from dictdumper import PLIST as output  # output PLIST file
            elif fmt == 'json':
                from dictdumper import JSON as output  # output JSON file
            elif fmt == 'tree':
                from dictdumper import Tree as output  # output treeview text file
            elif fmt == 'html':
                from dictdumper import JavaScript as output  # output JavaScript file
            elif fmt == 'xml':
                from dictdumper import XML as output  # output XML file
            else:
                from pcapkit.dumpkit import NotImplementedIO as output  # no output file
                warnings.warn(
                    f'unsupported output format: {fmt}; disabled file output feature',
                    FormatWarning,
                    stacklevel=stacklevel())

            class DictDumper(output):
                @classmethod
                def object_hook(cls, obj):
                    import enum
                    import aenum
                    if isinstance(obj, (enum.IntEnum, aenum.IntEnum)):
                        return f'No.{obj.value} {obj.name}'
                    if isinstance(obj, ipaddress._BaseAddress):
                        return str(obj)
                    if isinstance(obj, Info):
                        return dict(obj)
                    return super().object_hook(obj)

            self._ofile = DictDumper if self._flag_f else DictDumper(
                ofnm)  # output file

        self.check()  # check layer & protocol
        self.run()  # start extraction