def read_request(self, reader, context): """ read and output one http request. """ if context.expect_header: # we are reading expect-100 body req_header = context.expect_header context.expect_header = None else: req_header = self.read_http_req_header(reader) if req_header is None: # reader finished, or error occurred, we skip all data. reader.skip_all() return False if req_header.expect: # it is expect:continue-100 request. save header for next body read context.expect_header = req_header return True # deal with body if not req_header.chunked: content = reader.read(req_header.content_len) else: content = self.read_chunked_body(reader) _filter = config.get_filter() show = all([ _filter.by_domain(req_header.host), _filter.by_method(req_header.method), _filter.by_keyword(content), _filter.by_uri(req_header.uri) ]) context.filtered = not show if show: self.processor.on_http_req(req_header, content) return True
def read_request(self, reader, context): """ read and output one http request. """ if context.expect_header: # we are reading expect-100 body req_header = context.expect_header context.expect_header = None else: req_header = self.read_http_req_header(reader) if req_header is None: # reader finished, or error occurred, we skip all data. reader.skip_all() return False if req_header.expect: # it is expect:continue-100 request. save header for next body read context.expect_header = req_header return True # deal with body if not req_header.chunked: content = reader.read(req_header.content_len) else: content = self.read_chunked_body(reader) _filter = config.get_filter() show = _filter.by_domain(req_header.host) and _filter.by_uri(req_header.uri) context.filtered = not show if show: self.processor.on_http_req(req_header, content) return True
def run_parser(produce_packet): conn_dict = OrderedDict() def clear_connection(): # finish connection which not close yet for conn in conn_dict.values(): conn.finish() cleanups.register(clear_connection) _filter = config.get_filter() count = 0 for tcp_pac in packet_parser.read_tcp_packet(produce_packet): # filter if not (_filter.by_ip(tcp_pac.source) or _filter.by_ip(tcp_pac.dest)): continue if not (_filter.by_port(tcp_pac.source_port) or _filter.by_port(tcp_pac.dest_port)): continue key = tcp_pac.gen_key() # we already have this conn if key in conn_dict: conn_dict[key].on_packet(tcp_pac) # conn closed. if conn_dict[key].closed(): conn_dict[key].finish() del conn_dict[key] # begin tcp connection. elif tcp_pac.syn and not tcp_pac.ack: conn_dict[key] = TcpConnection(tcp_pac) elif content_utils.is_request(tcp_pac.body): # tcp init before capture, we start from a possible http request header. conn_dict[key] = TcpConnection(tcp_pac) count += 1 if count % 100 == 0: # check timeout connection keys = [] for k, conn in conn_dict.items(): if tcp_pac.timestamp - conn.last_timestamp > 100 * 1000 * 100: conn.finish() keys.append(k) for k in keys: del conn_dict[k] clear_connection()
def do_parse(source): parser = argparse.ArgumentParser() if source == 'file': parser.add_argument("infile", nargs='?', default='-', help="the pcap file to parse, -(default value) means stdin") elif source == 'device': parser.add_argument("device", nargs='?', default="any", help="the network device to capture, any(default value) mean all device") parser.add_argument("-i", "--ip", help="only parse packages with specified source OR dest ip") parser.add_argument("-p", "--port", type=int, help="only parse packages with specified source OR dest port") parser.add_argument("-v", "--verbosity", help="increase output verbosity(-vv is recommended)", action="count") parser.add_argument("-g", "--group", help="group http request/response by connection", action="store_true") parser.add_argument("-o", "--output", help="output to file instead of stdout") parser.add_argument("-e", "--encoding", help="decode the data use specified encodings.") parser.add_argument("-b", "--beauty", help="output json in a pretty way.", action="store_true") parser.add_argument("-d", "--domain", help="filter http data by request domain") parser.add_argument("-u", "--uri", help="filter http data by request uri pattern") parser.add_argument("-m", "--method", help="filter http data by request method") parser.add_argument("-k", "--keyword", help="filter http data by body content") args = parser.parse_args() _filter = config.get_filter() _filter.ip = args.ip _filter.port = args.port _filter.domain = args.domain if isinstance(_filter.domain, six.text_type): _filter.domain = _filter.domain.encode() _filter.uri_pattern = args.uri if isinstance(_filter.uri_pattern, six.text_type): _filter.uri_pattern = _filter.uri_pattern.encode() _filter.method = args.method _filter.keyword = args.keyword filter_exp = 'tcp' if args.port: filter_exp += " port " + str(args.port) if args.ip: filter_exp = "host " + args.ip + " and " + filter_exp # deal with configs parse_config = config.get_config() if args.verbosity: parse_config.level = args.verbosity if args.encoding: parse_config.encoding = args.encoding parse_config.pretty = args.beauty parse_config.group = args.group if args.output: output_file = open(args.output, "w+") else: output_file = sys.stdout config.out = output_file try: if source == 'file': file_path = args.infile if not file_path: print("file name empty", file=sys.stderr) sys.exit(-1) infile = None try: if live_cap.has_pcap() and file_path != '-' and False: # now slow than pure python version... print("Use libpcap to pcap file, filter: {}".format(filter_exp), file=sys.stderr) producer = live_cap.libpcap_produce(filename=file_path, filter_exp=filter_exp) else: if file_path != '-': infile = io.open(file_path, "rb") else: infile = sys.stdin producer = parse_pcap_file(infile) run_parser(producer) finally: if infile is not None: infile.close() elif source == 'device': device = args.device if not device: print("device name empty", file=sys.stderr) sys.exit(-1) if not live_cap.has_pcap(): print("Libpcap not found, install it first", file=sys.stderr) print("Capture device: {}, filter: {}".format(device, filter_exp), file=sys.stderr) producer = live_cap.libpcap_produce(device=device, filter_exp=filter_exp) run_parser(producer) finally: if args.output: output_file.close()
def parse_(source): parser = argparse.ArgumentParser() if source == 'file': parser.add_argument("infile", nargs='?', default='-', help="the pcap file to parse, -(default value) means stdin") elif source == 'device': parser.add_argument("device", nargs='?', default="any", help="the network device to capture, any(default value) mean all device") parser.add_argument("-i", "--ip", help="only parse packages with specified source OR dest ip") parser.add_argument("-p", "--port", type=int, help="only parse packages with specified source OR dest port") parser.add_argument("-v", "--verbosity", help="increase output verbosity(-vv is recommended)", action="count") parser.add_argument("-g", "--group", help="group http request/response by connection", action="store_true") parser.add_argument("-o", "--output", help="output to file instead of stdout") parser.add_argument("-e", "--encoding", help="decode the data use specified encodings.") parser.add_argument("-b", "--beauty", help="output json in a pretty way.", action="store_true") parser.add_argument("-d", "--domain", help="filter http data by request domain") parser.add_argument("-u", "--uri", help="filter http data by request uri pattern") args = parser.parse_args() _filter = config.get_filter() _filter.ip = args.ip _filter.port = args.port _filter.domain = args.domain if isinstance(_filter.domain, six.text_type): _filter.domain = _filter.domain.encode() _filter.uri_pattern = args.uri if isinstance(_filter.uri_pattern, six.text_type): _filter.uri_pattern = _filter.uri_pattern.encode() filter_exp = 'tcp' if args.port: filter_exp += " port " + str(args.port) if args.ip: filter_exp = "host " + args.ip + " and " + filter_exp # deal with configs parse_config = config.get_config() if args.verbosity: parse_config.level = args.verbosity if args.encoding: parse_config.encoding = args.encoding parse_config.pretty = args.beauty parse_config.group = args.group if args.output: output_file = open(args.output, "w+") else: output_file = sys.stdout config.out = output_file try: if source == 'file': file_path = args.infile if not file_path: print("file name empty", file=sys.stderr) sys.exit(-1) infile = None try: if live_cap.has_pcap() and file_path != '-' and False: # now slow than pure python version... print("Use libpcap to pcap file, filter: {}".format(filter_exp), file=sys.stderr) producer = live_cap.libpcap_produce(filename=file_path, filter_exp=filter_exp) else: if file_path != '-': infile = io.open(file_path, "rb") else: infile = sys.stdin producer = parse_pcap_file(infile) run_parser(producer) finally: if infile is not None: infile.close() elif source == 'device': device = args.device if not device: print("device name empty", file=sys.stderr) sys.exit(-1) if not live_cap.has_pcap(): print("Libpcap not found, install it first", file=sys.stderr) print("Capture device: {}, filter: {}".format(device, filter_exp), file=sys.stderr) producer = live_cap.libpcap_produce(device=device, filter_exp=filter_exp) run_parser(producer) finally: if args.output: output_file.close()