def parse_combined(path): path = decode_path(path) if path.endswith(u'.https'): scheme = u'https' elif path.endswith(u'.noscheme'): scheme = None else: scheme = u'http' with io.open(path, 'rb') as f: data = f.read() parts1 = data.split(b'======== BEGIN INBOUND STREAM ========\r\n', 1) if len(parts1) != 2: raise InputError(u'%s: bad combined file: no inbound marker' % path) (preamble, rest) = parts1 try: preamble = preamble.decode('utf-8') except UnicodeError as exc: # pragma: no cover raise InputError(u'%s: invalid UTF-8 in preamble' % path) from exc parts2 = rest.split(b'======== BEGIN OUTBOUND STREAM ========\r\n', 1) if len(parts2) != 2: # pragma: no cover raise InputError(u'%s: bad combined file: no outbound marker' % path) (inbound_data, outbound_data) = parts2 inbound = Stream(io.BufferedReader(io.BytesIO(inbound_data)), name=path + u' (inbound)') outbound = Stream(io.BufferedReader(io.BytesIO(outbound_data)), name=path + u' (outbound)') return (inbound, outbound, scheme, preamble)
def tcpflow_input(paths): for dir_path in paths: # Parse tcpflow filenames in order to combine them into pairs. # We rely on the 4-tuple of # "source address, source port, destination address, destination port", # keeping track of its uniqueness. # See https://github.com/simsong/tcpflow/issues/128 . # For sorting, we rely on the timestamp. streams_info = [] seen = {} for name in os.listdir(dir_path): if name in ['report.xml', 'alerts.txt']: continue match = re.match(r'^(\d+)-([^-]+-\d+)-([^-]+-\d+)-\d+$', name) if not match: raise InputError('wrong tcpflow filename %s ' '(did you use the right -T option?)' % name) (timestamp, src, dest) = match.groups() timestamp = int(timestamp) if (src, dest) in seen: raise InputError('duplicate source+destination address+port: ' '%s vs. %s' % (name, seen[(src, dest)])) seen[(src, dest)] = name streams_info.append((timestamp, None, src, dest, name)) for exch in _directory_input(dir_path, streams_info): yield exch
def tcpflow_input(dir_paths): path_pairs = [] for dir_path in dir_paths: # Extract `_StreamInfo` from tcpflow filenames so they can be # recombined into pairs. This relies on the 4-tuple of "source address, # source port, destination address, destination port", keeping track # of its uniqueness within a given directory. # See https://github.com/simsong/tcpflow/issues/128 . streams_info = [] seen = {} for name in os.listdir(dir_path): if name in ['report.xml', 'alerts.txt']: continue path = os.path.join(dir_path, name) match = re.match(r'^(\d+)-([^-]+-\d+)-([^-]+-\d+)-\d+$', name) if not match: raise InputError('wrong tcpflow filename %s ' '(did you use the right -T option?)' % name) (timestamp, src, dest) = match.groups() timestamp = int(timestamp) if (src, dest) in seen: raise InputError('duplicate source+destination address+port: ' '%s vs. %s' % (path, seen[(src, dest)])) seen[(src, dest)] = path streams_info.append(_StreamInfo( path, source=src, destination=dest, connection_hint=None, time_hint=datetime.utcfromtimestamp(timestamp), sort_hint=timestamp)) path_pairs.extend(_recombine_streams(streams_info)) return _path_pairs_input(path_pairs, sniff_direction=True, complain_on_one_sided=True)
def tcpick_input(dir_paths): path_pairs = [] for dir_path in dir_paths: # Extract `_StreamInfo` from tcpick filenames so they can be # recombined into pairs. This relies on the counter produced by # tcpick's ``-F2`` option. dir_path = decode_path(dir_path) streams_info = [] for name in os.listdir(dir_path): path = os.path.join(dir_path, name) match = re.match( r'^tcpick_(\d+)_([^_]+)_([^_]+)_[^.]+.(serv|clnt)\.dat$', name) if not match: raise InputError(u'wrong tcpick filename %s ' u'(did you use the -F2 option?)' % name) (counter, src, dest, direction) = match.groups() counter = int(counter) if direction == 'serv': (src, dest) = (dest, src) streams_info.append( _StreamInfo(path, source=src, destination=dest, connection_hint=counter, time_hint=None, sort_hint=counter)) path_pairs.extend(_recombine_streams(streams_info)) return _path_pairs_input(path_pairs, sniff_direction=True, complain_on_one_sided=True)
def har_input(paths): for path in paths: # According to the spec, HAR files are UTF-8 with an optional BOM. path = decode_path(path) with io.open(path, 'rt', encoding='utf-8-sig') as f: try: data = json.load(f) except ValueError as exc: raise InputError('%s: bad HAR file: %s' % (path, exc)) from exc try: creator = CreatorInfo(data['log']['creator']) for entry in data['log']['entries']: yield _process_entry(entry, creator, path) except (TypeError, KeyError) as exc: raise InputError('%s: cannot understand HAR file: %r' % (path, exc)) from exc
def tcpick_input(paths): for dir_path in paths: # Parse tcpick filenames in order to combine them into pairs. # We rely on the counter produced by the ``-F2`` option. streams_info = [] for name in os.listdir(dir_path): match = re.match( r'^tcpick_(\d+)_([^_]+)_([^_]+)_[^.]+.(serv|clnt)\.dat$', name) if not match: raise InputError('wrong tcpick filename %s ' '(did you use the -F2 option?)' % name) (counter, src, dest, direction) = match.groups() counter = int(counter) if direction == 'serv': (src, dest) = (dest, src) streams_info.append((counter, counter, src, dest, name)) for exch in _directory_input(dir_path, streams_info): yield exch
def streams_input(paths): if len(paths) % 2 != 0: raise InputError(u'even number of input streams required') pairs = [(paths[i], paths[i + 1], None) for i in range(0, len(paths), 2)] return _path_pairs_input(pairs, sniff_direction=False)
def streams_input(paths): if len(paths) % 2 != 0: raise InputError('even number of input streams required') for exch in _path_pairs_input( (paths[i], paths[i + 1]) for i in range(0, len(paths), 2)): yield exch