Esempio n. 1
0
    def read_request(self, reader, context):
        """ read and output one http request. """
        if context.expect_header:
            # we are reading expect-100 body
            req_header = context.expect_header
            context.expect_header = None
        else:
            req_header = self.read_http_req_header(reader)
            if req_header is None:
                # reader finished, or error occurred, we skip all data.
                reader.skip_all()
                return False
            if req_header.expect:
                # it is expect:continue-100 request. save header for next body read
                context.expect_header = req_header
                return True
        # deal with body
        if not req_header.chunked:
            content = reader.read(req_header.content_len)
        else:
            content = self.read_chunked_body(reader)

        _filter = config.get_filter()
        show = all([
            _filter.by_domain(req_header.host),
            _filter.by_method(req_header.method),
            _filter.by_keyword(content),
            _filter.by_uri(req_header.uri)
        ])
        context.filtered = not show
        if show:
            self.processor.on_http_req(req_header, content)
        return True
Esempio n. 2
0
    def read_request(self, reader, context):
        """ read and output one http request. """
        if context.expect_header:
            # we are reading expect-100 body
            req_header = context.expect_header
            context.expect_header = None
        else:
            req_header = self.read_http_req_header(reader)
            if req_header is None:
                # reader finished, or error occurred, we skip all data.
                reader.skip_all()
                return False
            if req_header.expect:
                # it is expect:continue-100 request. save header for next body read
                context.expect_header = req_header
                return True
        # deal with body
        if not req_header.chunked:
            content = reader.read(req_header.content_len)
        else:
            content = self.read_chunked_body(reader)

        _filter = config.get_filter()
        show = _filter.by_domain(req_header.host) and _filter.by_uri(req_header.uri)
        context.filtered = not show
        if show:
            self.processor.on_http_req(req_header, content)
        return True
Esempio n. 3
0
def run_parser(produce_packet):
    conn_dict = OrderedDict()

    def clear_connection():
        # finish connection which not close yet
        for conn in conn_dict.values():
            conn.finish()

    cleanups.register(clear_connection)
    _filter = config.get_filter()
    count = 0
    for tcp_pac in packet_parser.read_tcp_packet(produce_packet):
        # filter
        if not (_filter.by_ip(tcp_pac.source) or _filter.by_ip(tcp_pac.dest)):
            continue
        if not (_filter.by_port(tcp_pac.source_port) or _filter.by_port(tcp_pac.dest_port)):
            continue

        key = tcp_pac.gen_key()
        # we already have this conn
        if key in conn_dict:
            conn_dict[key].on_packet(tcp_pac)
            # conn closed.
            if conn_dict[key].closed():
                conn_dict[key].finish()
                del conn_dict[key]

        # begin tcp connection.
        elif tcp_pac.syn and not tcp_pac.ack:
            conn_dict[key] = TcpConnection(tcp_pac)
        elif content_utils.is_request(tcp_pac.body):
            # tcp init before capture, we start from a possible http request header.
            conn_dict[key] = TcpConnection(tcp_pac)

        count += 1
        if count % 100 == 0:
            # check timeout connection
            keys = []
            for k, conn in conn_dict.items():
                if tcp_pac.timestamp - conn.last_timestamp > 100 * 1000 * 100:
                    conn.finish()
                    keys.append(k)
            for k in keys:
                del conn_dict[k]

    clear_connection()
Esempio n. 4
0
def do_parse(source):
    parser = argparse.ArgumentParser()
    if source == 'file':
        parser.add_argument("infile", nargs='?', default='-',
                            help="the pcap file to parse, -(default value) means stdin")
    elif source == 'device':
        parser.add_argument("device", nargs='?', default="any",
                            help="the network device to capture, any(default value) mean all device")
    parser.add_argument("-i", "--ip", help="only parse packages with specified source OR dest ip")
    parser.add_argument("-p", "--port", type=int,
                        help="only parse packages with specified source OR dest port")
    parser.add_argument("-v", "--verbosity", help="increase output verbosity(-vv is recommended)",
                        action="count")
    parser.add_argument("-g", "--group", help="group http request/response by connection",
                        action="store_true")
    parser.add_argument("-o", "--output", help="output to file instead of stdout")
    parser.add_argument("-e", "--encoding", help="decode the data use specified encodings.")
    parser.add_argument("-b", "--beauty", help="output json in a pretty way.", action="store_true")
    parser.add_argument("-d", "--domain", help="filter http data by request domain")
    parser.add_argument("-u", "--uri", help="filter http data by request uri pattern")
    parser.add_argument("-m", "--method", help="filter http data by request method")
    parser.add_argument("-k", "--keyword", help="filter http data by body content")

    args = parser.parse_args()

    _filter = config.get_filter()
    _filter.ip = args.ip
    _filter.port = args.port
    _filter.domain = args.domain
    if isinstance(_filter.domain, six.text_type):
        _filter.domain = _filter.domain.encode()
    _filter.uri_pattern = args.uri
    if isinstance(_filter.uri_pattern, six.text_type):
        _filter.uri_pattern = _filter.uri_pattern.encode()
    _filter.method = args.method
    _filter.keyword = args.keyword

    filter_exp = 'tcp'
    if args.port:
        filter_exp += " port " + str(args.port)
    if args.ip:
        filter_exp = "host " + args.ip + " and " + filter_exp

    # deal with configs
    parse_config = config.get_config()
    if args.verbosity:
        parse_config.level = args.verbosity
    if args.encoding:
        parse_config.encoding = args.encoding
    parse_config.pretty = args.beauty
    parse_config.group = args.group

    if args.output:
        output_file = open(args.output, "w+")
    else:
        output_file = sys.stdout

    config.out = output_file

    try:
        if source == 'file':
            file_path = args.infile
            if not file_path:
                print("file name empty", file=sys.stderr)
                sys.exit(-1)
            infile = None
            try:
                if live_cap.has_pcap() and file_path != '-' and False:
                    # now slow than pure python version...
                    print("Use libpcap to pcap file, filter: {}".format(filter_exp),
                          file=sys.stderr)
                    producer = live_cap.libpcap_produce(filename=file_path, filter_exp=filter_exp)
                else:
                    if file_path != '-':
                        infile = io.open(file_path, "rb")
                    else:
                        infile = sys.stdin
                    producer = parse_pcap_file(infile)
                run_parser(producer)
            finally:
                if infile is not None:
                    infile.close()
        elif source == 'device':
            device = args.device
            if not device:
                print("device name empty", file=sys.stderr)
                sys.exit(-1)
            if not live_cap.has_pcap():
                print("Libpcap not found, install it first", file=sys.stderr)
            print("Capture device: {}, filter: {}".format(device, filter_exp), file=sys.stderr)
            producer = live_cap.libpcap_produce(device=device, filter_exp=filter_exp)
            run_parser(producer)
    finally:
        if args.output:
            output_file.close()
Esempio n. 5
0
def parse_(source):
    parser = argparse.ArgumentParser()
    if source == 'file':
        parser.add_argument("infile", nargs='?', default='-',
                            help="the pcap file to parse, -(default value) means stdin")
    elif source == 'device':
        parser.add_argument("device", nargs='?', default="any",
                            help="the network device to capture, any(default value) mean all device")
    parser.add_argument("-i", "--ip", help="only parse packages with specified source OR dest ip")
    parser.add_argument("-p", "--port", type=int,
                        help="only parse packages with specified source OR dest port")
    parser.add_argument("-v", "--verbosity", help="increase output verbosity(-vv is recommended)",
                        action="count")
    parser.add_argument("-g", "--group", help="group http request/response by connection",
                        action="store_true")
    parser.add_argument("-o", "--output", help="output to file instead of stdout")
    parser.add_argument("-e", "--encoding", help="decode the data use specified encodings.")
    parser.add_argument("-b", "--beauty", help="output json in a pretty way.", action="store_true")
    parser.add_argument("-d", "--domain", help="filter http data by request domain")
    parser.add_argument("-u", "--uri", help="filter http data by request uri pattern")

    args = parser.parse_args()

    _filter = config.get_filter()
    _filter.ip = args.ip
    _filter.port = args.port
    _filter.domain = args.domain
    if isinstance(_filter.domain, six.text_type):
        _filter.domain = _filter.domain.encode()
    _filter.uri_pattern = args.uri
    if isinstance(_filter.uri_pattern, six.text_type):
        _filter.uri_pattern = _filter.uri_pattern.encode()

    filter_exp = 'tcp'
    if args.port:
        filter_exp += " port " + str(args.port)
    if args.ip:
        filter_exp = "host " + args.ip + " and " + filter_exp

    # deal with configs
    parse_config = config.get_config()
    if args.verbosity:
        parse_config.level = args.verbosity
    if args.encoding:
        parse_config.encoding = args.encoding
    parse_config.pretty = args.beauty
    parse_config.group = args.group

    if args.output:
        output_file = open(args.output, "w+")
    else:
        output_file = sys.stdout

    config.out = output_file

    try:
        if source == 'file':
            file_path = args.infile
            if not file_path:
                print("file name empty", file=sys.stderr)
                sys.exit(-1)
            infile = None
            try:
                if live_cap.has_pcap() and file_path != '-' and False:
                    # now slow than pure python version...
                    print("Use libpcap to pcap file, filter: {}".format(filter_exp),
                          file=sys.stderr)
                    producer = live_cap.libpcap_produce(filename=file_path, filter_exp=filter_exp)
                else:
                    if file_path != '-':
                        infile = io.open(file_path, "rb")
                    else:
                        infile = sys.stdin
                    producer = parse_pcap_file(infile)
                run_parser(producer)
            finally:
                if infile is not None:
                    infile.close()
        elif source == 'device':
            device = args.device
            if not device:
                print("device name empty", file=sys.stderr)
                sys.exit(-1)
            if not live_cap.has_pcap():
                print("Libpcap not found, install it first", file=sys.stderr)
            print("Capture device: {}, filter: {}".format(device, filter_exp), file=sys.stderr)
            producer = live_cap.libpcap_produce(device=device, filter_exp=filter_exp)
            run_parser(producer)
    finally:
        if args.output:
            output_file.close()