Beispiel #1
0
def parse_combined(path):
    path = decode_path(path)
    if path.endswith(u'.https'):
        scheme = u'https'
    elif path.endswith(u'.noscheme'):
        scheme = None
    else:
        scheme = u'http'

    with io.open(path, 'rb') as f:
        data = f.read()
    parts1 = data.split(b'======== BEGIN INBOUND STREAM ========\r\n', 1)
    if len(parts1) != 2:
        raise InputError(u'%s: bad combined file: no inbound marker' % path)
    (preamble, rest) = parts1
    try:
        preamble = preamble.decode('utf-8')
    except UnicodeError as exc:  # pragma: no cover
        raise InputError(u'%s: invalid UTF-8 in preamble' % path) from exc
    parts2 = rest.split(b'======== BEGIN OUTBOUND STREAM ========\r\n', 1)
    if len(parts2) != 2:  # pragma: no cover
        raise InputError(u'%s: bad combined file: no outbound marker' % path)
    (inbound_data, outbound_data) = parts2

    inbound = Stream(io.BufferedReader(io.BytesIO(inbound_data)),
                     name=path + u' (inbound)')
    outbound = Stream(io.BufferedReader(io.BytesIO(outbound_data)),
                      name=path + u' (outbound)')

    return (inbound, outbound, scheme, preamble)
Beispiel #2
0
def tcpflow_input(paths):
    for dir_path in paths:
        # Parse tcpflow filenames in order to combine them into pairs.
        # We rely on the 4-tuple of
        # "source address, source port, destination address, destination port",
        # keeping track of its uniqueness.
        # See https://github.com/simsong/tcpflow/issues/128 .
        # For sorting, we rely on the timestamp.
        streams_info = []
        seen = {}
        for name in os.listdir(dir_path):
            if name in ['report.xml', 'alerts.txt']:
                continue
            match = re.match(r'^(\d+)-([^-]+-\d+)-([^-]+-\d+)-\d+$', name)
            if not match:
                raise InputError('wrong tcpflow filename %s '
                                 '(did you use the right -T option?)' % name)
            (timestamp, src, dest) = match.groups()
            timestamp = int(timestamp)
            if (src, dest) in seen:
                raise InputError('duplicate source+destination address+port: '
                                 '%s vs. %s' % (name, seen[(src, dest)]))
            seen[(src, dest)] = name
            streams_info.append((timestamp, None, src, dest, name))
        for exch in _directory_input(dir_path, streams_info):
            yield exch
Beispiel #3
0
def tcpflow_input(dir_paths):
    path_pairs = []

    for dir_path in dir_paths:
        # Extract `_StreamInfo` from tcpflow filenames so they can be
        # recombined into pairs. This relies on the 4-tuple of "source address,
        # source port, destination address, destination port", keeping track
        # of its uniqueness within a given directory.
        # See https://github.com/simsong/tcpflow/issues/128 .
        streams_info = []
        seen = {}
        for name in os.listdir(dir_path):
            if name in ['report.xml', 'alerts.txt']:
                continue
            path = os.path.join(dir_path, name)
            match = re.match(r'^(\d+)-([^-]+-\d+)-([^-]+-\d+)-\d+$', name)
            if not match:
                raise InputError('wrong tcpflow filename %s '
                                 '(did you use the right -T option?)' % name)
            (timestamp, src, dest) = match.groups()
            timestamp = int(timestamp)
            if (src, dest) in seen:
                raise InputError('duplicate source+destination address+port: '
                                 '%s vs. %s' % (path, seen[(src, dest)]))
            seen[(src, dest)] = path
            streams_info.append(_StreamInfo(
                path, source=src, destination=dest, connection_hint=None,
                time_hint=datetime.utcfromtimestamp(timestamp),
                sort_hint=timestamp))
        path_pairs.extend(_recombine_streams(streams_info))

    return _path_pairs_input(path_pairs, sniff_direction=True,
                             complain_on_one_sided=True)
Beispiel #4
0
def tcpick_input(dir_paths):
    path_pairs = []

    for dir_path in dir_paths:
        # Extract `_StreamInfo` from tcpick filenames so they can be
        # recombined into pairs. This relies on the counter produced by
        # tcpick's ``-F2`` option.
        dir_path = decode_path(dir_path)
        streams_info = []
        for name in os.listdir(dir_path):
            path = os.path.join(dir_path, name)
            match = re.match(
                r'^tcpick_(\d+)_([^_]+)_([^_]+)_[^.]+.(serv|clnt)\.dat$', name)
            if not match:
                raise InputError(u'wrong tcpick filename %s '
                                 u'(did you use the -F2 option?)' % name)
            (counter, src, dest, direction) = match.groups()
            counter = int(counter)
            if direction == 'serv':
                (src, dest) = (dest, src)
            streams_info.append(
                _StreamInfo(path,
                            source=src,
                            destination=dest,
                            connection_hint=counter,
                            time_hint=None,
                            sort_hint=counter))
        path_pairs.extend(_recombine_streams(streams_info))

    return _path_pairs_input(path_pairs,
                             sniff_direction=True,
                             complain_on_one_sided=True)
Beispiel #5
0
def har_input(paths):
    for path in paths:
        # According to the spec, HAR files are UTF-8 with an optional BOM.
        path = decode_path(path)
        with io.open(path, 'rt', encoding='utf-8-sig') as f:
            try:
                data = json.load(f)
            except ValueError as exc:
                raise InputError('%s: bad HAR file: %s' % (path, exc)) from exc
            try:
                creator = CreatorInfo(data['log']['creator'])
                for entry in data['log']['entries']:
                    yield _process_entry(entry, creator, path)
            except (TypeError, KeyError) as exc:
                raise InputError('%s: cannot understand HAR file: %r' %
                                 (path, exc)) from exc
Beispiel #6
0
def tcpick_input(paths):
    for dir_path in paths:
        # Parse tcpick filenames in order to combine them into pairs.
        # We rely on the counter produced by the ``-F2`` option.
        streams_info = []
        for name in os.listdir(dir_path):
            match = re.match(
                r'^tcpick_(\d+)_([^_]+)_([^_]+)_[^.]+.(serv|clnt)\.dat$', name)
            if not match:
                raise InputError('wrong tcpick filename %s '
                                 '(did you use the -F2 option?)' % name)
            (counter, src, dest, direction) = match.groups()
            counter = int(counter)
            if direction == 'serv':
                (src, dest) = (dest, src)
            streams_info.append((counter, counter, src, dest, name))
        for exch in _directory_input(dir_path, streams_info):
            yield exch
Beispiel #7
0
def streams_input(paths):
    if len(paths) % 2 != 0:
        raise InputError(u'even number of input streams required')
    pairs = [(paths[i], paths[i + 1], None) for i in range(0, len(paths), 2)]
    return _path_pairs_input(pairs, sniff_direction=False)
Beispiel #8
0
def streams_input(paths):
    if len(paths) % 2 != 0:
        raise InputError('even number of input streams required')
    for exch in _path_pairs_input(
        (paths[i], paths[i + 1]) for i in range(0, len(paths), 2)):
        yield exch