class Response_Parser():
    def __init__(self):
        self.parser = HttpParser()
        self.len_response = 0
        self.len_body = 0
        self.body = None
    def parse(self, raw_response):
        self.len_response = len(bytearray(raw_response))
        self.parser.execute(raw_response, self.len_response)
        self.body = self.parser.recv_body()
        self.len_body = len(bytearray(self.body))

    def get_all_keys(self):
        """Get All the key in request headers."""
        return self.parser.get_headers().keys()

    def get_keys(self, *args):
        header_keys = {}
        for key in args:
            header_keys[key] = self.parser.get_headers().get(key, '-')
        return header_keys

    def get_reponse(self, *args):
        values = self.get_keys(*args)
        status_code = self.parser.get_status_code()
        obj = HTTP_Response(status_code, values, self.len_response, self.len_body)
        return obj

    def get_body(self):
        return self.body
Example #2
0
def post_report(sock):
    st = os.statvfs(haystack_path)
    available_size = st.f_bavail * st.f_frsize
    obj = {}
    obj["listenip"] = listenip
    obj["listenport"] = listenport
    obj["disk_available_size"] = available_size
    obj["master"] = master
    obj["groupid"] = groupid
    obj["last_fileno"] = haystack.haystack_last_fileno
    body = json.dumps(obj)
    sock.send("POST /report HTTP/1.1\r\n")
    sock.send("Host: %s:%d\r\n" % (track.ip, track.port))
    sock.send("Content-Length: %d\r\n" % len(body))
    sock.send("Content-Type: application/json\r\n")
    sock.send("Connection: keep-alive\r\n")
    sock.send("\r\n")
    sock.send(body)

    parser = HttpParser()
    while True:
        data = sock.recv(1024)
        if not data:
            return False

        recved = len(data)
        nparsed = parser.execute(data, recved)
        assert nparsed == recved
        if parser.is_message_complete():
            break

    return parser.get_status_code() == 200
Example #3
0
def post_sync(sock, masterip, masterport):
    obj = {"last_fileno": haystack.haystack_last_fileno}
    body = json.dumps(obj)
    sock.send("POST /sync HTTP/1.1\r\n")
    sock.send("Host: %s:%d\r\n" % (masterip, masterport))
    sock.send("Content-Length: %d\r\n" % len(body))
    sock.send("Content-Type: application/json\r\n")
    sock.send("Connection: keep-alive\r\n")
    sock.send("\r\n")
    sock.send(body)

    parser = HttpParser()
    while True:
        #!!!ugly prevent recveive next http request
        data = sock.recv(1)
        if not data:
            return False

        recved = len(data)
        nparsed = parser.execute(data, recved)
        assert nparsed == recved
        if parser.is_message_complete():
            break

    return parser.get_status_code() == 200
Example #4
0
 def __init__(self, raw):
     resp = HttpParser()
     resp.execute(raw.response, len(raw.response))
     self.headers = resp.get_headers()
     self.body = "".join(resp._body)
     self.raw = raw
     self.code = resp.get_status_code()
     self._json = None
Example #5
0
File: http.py Project: HVF/diesel
    def request(self, method, url, headers={}, body=None, timeout=None):
        '''Issues a `method` request to `path` on the
        connected server.  Sends along `headers`, and
        body.

        Very low level--you must set "host" yourself,
        for example.  It will set Content-Length,
        however.
        '''
        url_info = urlparse(url)
        fake_wsgi = dict(
        (cgi_name(n), v) for n, v in headers.iteritems())
        fake_wsgi.update({
            'HTTP_METHOD' : method,
            'SCRIPT_NAME' : '',
            'PATH_INFO' : url_info[2],
            'QUERY_STRING' : url_info[4],
            'wsgi.version' : (1,0),
            'wsgi.url_scheme' : 'http', # XXX incomplete
            'wsgi.input' : cStringIO.StringIO(body or ''),
            'wsgi.errors' : FileLikeErrorLogger(hlog),
            'wsgi.multithread' : False,
            'wsgi.multiprocess' : False,
            'wsgi.run_once' : False,
            })
        req = Request(fake_wsgi)

        timeout_handler = TimeoutHandler(timeout or 60)

        send('%s %s HTTP/1.1\r\n%s' % (req.method, req.url, str(req.headers)))

        if body:
            send(body)

        h = HttpParser()
        body = []
        data = None
        while True:
            if data:
                used = h.execute(data, len(data))
                if h.is_headers_complete():
                    body.append(h.recv_body())
                if h.is_message_complete():
                    data = data[used:]
                    break
            ev, val = first(receive_any=True, sleep=timeout_handler.remaining())
            if ev == 'sleep': timeout_handler.timeout()
            data = val

        resp = Response(
            response=''.join(body),
            status=h.get_status_code(),
            headers=h.get_headers(),
            )

        return resp
class Session:
  def __init__(self, current_key_hex, partner_key_hex):
    # self.body = []
    self.parser = HttpParser(kind=2, decompress=True)
    self.data_bytes = 0
    self.total_bytes = 0
    self.current_key_hex = current_key_hex
    self.partner_key_hex = partner_key_hex
    self.is_request = None
    self.service = None

  def getPartner(self):
    return sessions[partner_key_hex]

  def getService(self):
    if (self.is_request == False):
      return self.getPartner().getService()
    if (self.is_request is None):
      return '_unknown'
    if (self.service is None):
      self.service = getServiceForQS(self.parser.get_query_string())
    return self.service

  def eat(self, payload_string, bytes_sent):
    received_len = len(payload_string)
    self.data_bytes += received_len
    self.total_bytes += bytes_sent
    parsed_len = self.parser.execute(payload_string, received_len)
    # assert received_len == parsed_len

    # if self.parser.is_headers_complete():
    #   eprint(self.parser.get_headers())

    # if self.parser.is_partial_body():
    #   self.body.append(self.parser.recv_body())

    # if self.parser.is_message_complete():
    #   eprint("".join(self.body))

    if self.parser.get_status_code() is not 0:
      self.is_request = False
      addBytesOutboundFromService(bytes_sent, self.getService())
      # eprint(payload_string)
    elif self.parser.is_message_begin():
      self.is_request = True
      addBytesInboundToService(bytes_sent, self.getService())
    else:
      addBytesUnknownboundToService(bytes_sent, self.getService())

    # if (self.parser.is_headers_complete() and not self.parser.is_message_complete()):
    #   eprint("expected: %s, so far: %d" % (self.parser.get_headers().get('CONTENT-LENGTH'), self.data_bytes))

    if self.parser.is_message_complete():
      eprint("end!")
Example #7
0
def process(indir, outdir):
    findstr = os.path.join(indir, '*')
    for fn in glob.glob(findstr):
        print fn
        with open(fn, 'rb') as f:
            http_bin = f.read()

        n = 0
        while n < len(http_bin):

            http = HttpParser()
            nparsed = http.execute(http_bin[n:], len(http_bin) - n)

            if not http.is_message_complete():
                break

            if http.get_path() != '':
                # send

                http_method = http_bin[n:].split()[
                    0]  #http.get_method() -- seems bugged
                http_path = http_bin[n:].split()[1]
                http_request = parse_http_packet(http.get_headers(),
                                                 http.recv_body())
                http_hostname = 'unknown'
                if 'Host' in http.get_headers():
                    http_hostname = http.get_headers()['Host']
                print http_hostname

                nparsed -= 1

                full_http = http_method + ' ' + http_path + '\n'
                full_http += http_request + '\n'

                save_http_packet(outdir, os.path.basename(fn), http_hostname,
                                 http_path, 'send', full_http)
            else:
                # recv

                http_status = http.get_status_code()
                http_reply = parse_http_packet(http.get_headers(),
                                               http.recv_body())

                full_http += str(http_status) + '\n'
                full_http += http_reply

                save_http_packet(outdir, os.path.basename(fn), http_hostname,
                                 '', 'recv', full_http)

            n += nparsed
Example #8
0
def heartbeat(sock):
    ip, port = sock.getpeername()
    parser = HttpParser()
    sock.send("GET /ping HTTP/1.1\r\nHost: %s:%d\r\n\r\n" % (ip, port))

    while True:
        data = sock.recv(1024)
        if not data:
            return False

        recved = len(data)
        nparsed = parser.execute(data, recved)
        assert nparsed == recved
        if parser.is_message_complete():
            break

    return parser.get_status_code() == 200
Example #9
0
    def makeRequest(self, host, url="/", port=80, method='GET', headers=None, postdata=None):
        assert self.e is not None
        evSet = self.e.wait()  # noqa: F841
        # log.debug("Generating raw http request")
        self.s.connect((host, port))

        if headers is None:
            headers = {
                    "Accept": "*/*",
                    "User-Agent": self.useragent
            }

        req = self.rawHttpReq(host, url, method, headers, postdata)

        self.s.sendall(req.encode())

        h = []
        body = []
        p = HttpParser()
        tlen = 0

        while True:
            data = self.s.recv(2048)

            if not data:
                break

            rlen = len(data)
            tlen += rlen
            nparsed = p.execute(data, rlen)
            assert nparsed == rlen

            if p.is_headers_complete():
                h = p.get_headers()
                # log.debug(p.get_headers())
            if p.is_partial_body():
                body.append(p.recv_body())

            if p.is_message_complete():
                break

        self.s.close()

        res = {'status': p.get_status_code(), 'length': tlen, 'headers': h, 'body': body, 'request': req}
        print(res)
Example #10
0
    def receive(self):
        h = HttpParser()
        body = []
        data = None
        while True:
            if data:
                used = h.execute(data, len(data))
                if h.is_headers_complete():
                    body.append(h.recv_body())
                if h.is_message_complete():
                    data = data[used:]
                    break
            data = self.s.recv(BUFSIZE)

        return Response(response=''.join(body),
                        status=h.get_status_code(),
                        headers=h.get_headers(),
                        )
Example #11
0
    def parse_html(self):
        try:
            resolve_ip = ''
            data = []
            fitler_list = ['*', '> ', '< ', '{']
            for item in self.result.split("\n"):
                if 'Trying' in item:
                    resolve_ip = item.replace('*', "").replace(
                        "Trying", "").replace("...", "").strip()
                    log.logger.info('resolve_ip: %s ' % (resolve_ip))

                matching = [s for s in fitler_list if s in item[:2]]
                if len(matching) == 0:
                    data.append(item.encode('utf-8'))
            parsing_string = b("\r\n").join(data)
            p = HttpParser()
            p.execute(parsing_string, len(parsing_string))
            status_code = str(p.get_status_code())
            header_obj = p.get_headers()
            #body = str(p.recv_body())

            header_list = []
            if resolve_ip:
                header_list.append('%s:%s' %
                                   ("resolve ip", resolve_ip.strip()))
            for key, value in header_obj.items():
                header_list.append('%s:%s' % (key, value))
            header = ("<br/>").join(header_list)

            body = self.content["result"]

            log.logger.info('resolve_ip :%s ' % (resolve_ip))
            log.logger.info('status_code :%s ' % (status_code))
            log.logger.info('header :%s ' % (header))
            log.logger.info('body :%s ' % (body))

            return status_code, header, body
        except Exception as e:
            log.logger.info('Exception: %s ' % (str(e)))
            return None, None, str(e)
Example #12
0
class HttpStream(object):
    """ An HTTP parser providing higher-level access to a readable,
    sequential io.RawIOBase object. You can use implementions of
    http_parser.reader (IterReader, StringReader, SocketReader) or
    create your own.
    """

    def __init__(self, stream, kind=HTTP_BOTH, decompress=False):
        """ constructor of HttpStream.

        :attr stream: an io.RawIOBase object
        :attr kind: Int,  could be 0 to parseonly requests,
        1 to parse only responses or 2 if we want to let
        the parser detect the type.
        """
        self.parser = HttpParser(kind=kind, decompress=decompress)
        self.stream = stream

    def _check_headers_complete(self):
        if self.parser.is_headers_complete():
            return

        while True:
            try:
                next(self)
            except StopIteration:
                if self.parser.is_headers_complete():
                    return
                raise NoMoreData("Can't parse headers")

            if self.parser.is_headers_complete():
                return


    def _wait_status_line(self, cond):
        if self.parser.is_headers_complete():
            return True

        data = []
        if not cond():
            while True:
                try:
                    d = next(self)
                    data.append(d)
                except StopIteration:
                    if self.parser.is_headers_complete():
                        return True
                    raise BadStatusLine(b"".join(data))
                if cond():
                    return True
        return True

    def _wait_on_url(self):
        return self._wait_status_line(self.parser.get_url)

    def _wait_on_status(self):
        return self._wait_status_line(self.parser.get_status_code)

    def url(self):
        """ get full url of the request """
        self._wait_on_url()
        return self.parser.get_url()

    def path(self):
        """ get path of the request (url without query string and
        fragment """
        self._wait_on_url()
        return self.parser.get_path()

    def query_string(self):
        """ get query string of the url """
        self._wait_on_url()
        return self.parser.get_query_string()

    def fragment(self):
        """ get fragment of the url """
        self._wait_on_url()
        return self.parser.get_fragment()

    def version(self):
        self._wait_on_status()
        return self.parser.get_version()

    def status_code(self):
        """ get status code of a response as integer """
        self._wait_on_status()
        return self.parser.get_status_code()

    def status(self):
        """ return complete status with reason """
        status_code = self.status_code()
        reason = status_reasons.get(int(status_code), 'unknown')
        return "%s %s" % (status_code, reason)


    def method(self):
        """ get HTTP method as string"""
        self._wait_on_status()
        return self.parser.get_method()

    def headers(self):
        """ get request/response headers, headers are returned in a
        OrderedDict that allows you to get value using insensitive
        keys."""
        self._check_headers_complete()
        return self.parser.get_headers()

    def should_keep_alive(self):
        """ return True if the connection should be kept alive
        """
        self._check_headers_complete()
        return self.parser.should_keep_alive()

    def is_chunked(self):
        """ return True if Transfer-Encoding header value is chunked"""
        self._check_headers_complete()
        return self.parser.is_chunked()

    def wsgi_environ(self, initial=None):
        """ get WSGI environ based on the current request.

        :attr initial: dict, initial values to fill in environ.
        """
        self._check_headers_complete()
        return self.parser.get_wsgi_environ()

    def body_file(self, buffering=None, binary=True, encoding=None,
            errors=None, newline=None):
        """ return the body as a buffered stream object. If binary is
        true an io.BufferedReader will be returned, else an
        io.TextIOWrapper.
        """
        self._check_headers_complete()

        if buffering is None:
            buffering = -1
        if buffering < 0:
            buffering = DEFAULT_BUFFER_SIZE

        raw = HttpBodyReader(self)
        buf = BufferedReader(raw, buffering)
        if binary:
            return buf
        text = TextIOWrapper(buf, encoding, errors, newline)
        return text

    def body_string(self, binary=True, encoding=None, errors=None,
            newline=None):
        """ return body as string """
        return self.body_file(binary=binary, encoding=encoding,
                newline=newline).read()

    def __iter__(self):
        return self

    def __next__(self):
        if self.parser.is_message_complete():
            raise StopIteration

        # fetch data
        b = bytearray(DEFAULT_BUFFER_SIZE)
        recved = self.stream.readinto(b)
        if recved is None:
            raise NoMoreData("no more data")

        del b[recved:]
        to_parse = bytes(b)
        # parse data
        nparsed = self.parser.execute(to_parse, recved)
        if nparsed != recved and not self.parser.is_message_complete():
            raise ParserError("nparsed != recved (%s != %s) [%s]" % (nparsed,
                recved, bytes_to_str(to_parse)))

        if recved == 0:
            raise StopIteration

        return to_parse

    next = __next__
Example #13
0
    def request(self, method, url, headers=None, body=None, timeout=None):
        '''Issues a `method` request to `path` on the
        connected server.  Sends along `headers`, and
        body.

        Very low level--you must set "host" yourself,
        for example.  It will set Content-Length,
        however.
        '''
        headers = headers or {}
        url_info = urlparse(url)
        fake_wsgi = dict(
        (cgi_name(n), str(v).strip()) for n, v in headers.iteritems())

        if body and 'CONTENT_LENGTH' not in fake_wsgi:
            # If the caller hasn't set their own Content-Length but submitted
            # a body, we auto-set the Content-Length header here.
            fake_wsgi['CONTENT_LENGTH'] = str(len(body))

        fake_wsgi.update({
            'REQUEST_METHOD' : method,
            'SCRIPT_NAME' : '',
            'PATH_INFO' : url_info[2],
            'QUERY_STRING' : url_info[4],
            'wsgi.version' : (1,0),
            'wsgi.url_scheme' : 'http', # XXX incomplete
            'wsgi.input' : cStringIO.StringIO(body or ''),
            'wsgi.errors' : FileLikeErrorLogger(hlog),
            'wsgi.multithread' : False,
            'wsgi.multiprocess' : False,
            'wsgi.run_once' : False,
            })
        req = Request(fake_wsgi)

        timeout_handler = TimeoutHandler(timeout or 60)

        url = str(req.path)
        if req.query_string:
            url += '?' + str(req.query_string)

        send('%s %s HTTP/1.1\r\n%s' % (req.method, url, str(req.headers)))

        if body:
            send(body)

        h = HttpParser()
        body = []
        data = None
        while True:
            if data:
                used = h.execute(data, len(data))
                if h.is_headers_complete():
                    body.append(h.recv_body())
                if h.is_message_complete():
                    data = data[used:]
                    break
            ev, val = first(receive_any=True, sleep=timeout_handler.remaining())
            if ev == 'sleep': timeout_handler.timeout()
            data = val

        resp = Response(
            response=''.join(body),
            status=h.get_status_code(),
            headers=h.get_headers(),
            )

        return resp
Example #14
0
File: http.py Project: bjornua/dna
class HttpStream(object):
    """ An HTTP parser providing higher-level access to a readable,
    sequential io.RawIOBase object. You can use implementions of
    http_parser.reader (IterReader, StringReader, SocketReader) or 
    create your own.
    """
    def __init__(self, stream, kind=HTTP_BOTH, decompress=False):
        """ constructor of HttpStream. 

        :attr stream: an io.RawIOBase object
        :attr kind: Int,  could be 0 to parseonly requests, 
        1 to parse only responses or 2 if we want to let
        the parser detect the type.
        """
        self.parser = HttpParser(kind=kind, decompress=decompress)
        self.stream = stream

    def _check_headers_complete(self):
        if self.parser.is_headers_complete():
            return

        while True:
            try:
                data = self.next()
            except StopIteration:
                if self.parser.is_headers_complete():
                    return
                raise NoMoreData()

            if self.parser.is_headers_complete():
                return

    def url(self):
        """ get full url of the request """
        self._check_headers_complete()
        return self.parser.get_url()

    def path(self):
        """ get path of the request (url without query string and
        fragment """
        self._check_headers_complete()
        return self.parser.get_path()

    def query_string(self):
        """ get query string of the url """
        self._check_headers_complete()
        return self.parser.get_query_string()

    def fragment(self):
        """ get fragment of the url """
        self._check_headers_complete()
        return self.parser.get_fragment()

    def version(self):
        self._check_headers_complete()
        return self.parser.get_version()

    def status_code(self):
        """ get status code of a response as integer """
        self._check_headers_complete()
        return self.parser.get_status_code()

    def status(self):
        """ return complete status with reason """
        status_code = self.status_code()
        reason = status_reasons.get(int(status_code), 'unknown')
        return "%s %s" % (status_code, reason)

    def method(self):
        """ get HTTP method as string"""
        self._check_headers_complete()
        return self.parser.get_method()

    def headers(self):
        """ get request/response headers, headers are returned in a
        OrderedDict that allows you to get value using insensitive
        keys."""
        self._check_headers_complete()
        return self.parser.get_headers()

    def should_keep_alive(self):
        """ return True if the connection should be kept alive
        """
        self._check_headers_complete()
        return self.parser.should_keep_alive()

    def is_chunked(self):
        """ return True if Transfer-Encoding header value is chunked"""
        self._check_headers_complete()
        return self.parser.is_chunked()

    def wsgi_environ(self, initial=None):
        """ get WSGI environ based on the current request.
        
        :attr initial: dict, initial values to fill in environ.
        """
        self._check_headers_complete()
        return self.parser.get_wsgi_environ()

    def body_file(self,
                  buffering=None,
                  binary=True,
                  encoding=None,
                  errors=None,
                  newline=None):
        """ return the body as a buffered stream object. If binary is
        true an io.BufferedReader will be returned, else an
        io.TextIOWrapper.
        """
        self._check_headers_complete()

        if buffering is None:
            buffering = -1
        if buffering < 0:
            buffering = DEFAULT_BUFFER_SIZE

        raw = HttpBodyReader(self)
        buffer = BufferedReader(raw, buffering)
        if binary:
            return buffer
        text = TextIOWrapper(buffer, encoding, errors, newline)
        return text

    def body_string(self,
                    binary=True,
                    encoding=None,
                    errors=None,
                    newline=None):
        """ return body as string """
        return self.body_file(binary=binary,
                              encoding=encoding,
                              newline=newline).read()

    def __iter__(self):
        return self

    def next(self):
        if self.parser.is_message_complete():
            raise StopIteration

        # fetch data
        b = bytearray(DEFAULT_BUFFER_SIZE)
        recved = self.stream.readinto(b)
        if recved is None:
            raise NoMoreData("no more data")

        del b[recved:]

        # parse data
        nparsed = self.parser.execute(bytes(b), recved)
        if nparsed != recved and not self.parser.is_message_complete():
            raise ParserError("nparsed != recved")

        if recved == 0:
            raise StopIteration

        return bytes(b)
Example #15
0
    def request(self, method, url, headers=None, body=None, timeout=None):
        """Issues a `method` request to `path` on the
        connected server.  Sends along `headers`, and
        body.

        Very low level--you must set "host" yourself,
        for example.  It will set Content-Length,
        however.
        """
        headers = headers or {}
        url_info = urlparse(url)
        fake_wsgi = dict((cgi_name(n), str(v).strip()) for n, v in headers.iteritems())

        if body and "CONTENT_LENGTH" not in fake_wsgi:
            # If the caller hasn't set their own Content-Length but submitted
            # a body, we auto-set the Content-Length header here.
            fake_wsgi["CONTENT_LENGTH"] = str(len(body))

        fake_wsgi.update(
            {
                "REQUEST_METHOD": method,
                "SCRIPT_NAME": "",
                "PATH_INFO": url_info[2],
                "QUERY_STRING": url_info[4],
                "wsgi.version": (1, 0),
                "wsgi.url_scheme": "http",  # XXX incomplete
                "wsgi.input": cStringIO.StringIO(body or ""),
                "wsgi.errors": FileLikeErrorLogger(hlog),
                "wsgi.multithread": False,
                "wsgi.multiprocess": False,
                "wsgi.run_once": False,
            }
        )
        req = Request(fake_wsgi)

        timeout_handler = TimeoutHandler(timeout or 60)

        url = str(req.path)
        if req.query_string:
            url += "?" + str(req.query_string)

        send("%s %s HTTP/1.1\r\n%s" % (req.method, url, str(req.headers)))

        if body:
            send(body)

        h = HttpParser()
        body = []
        data = None
        while True:
            if data:
                used = h.execute(data, len(data))
                if h.is_headers_complete():
                    body.append(h.recv_body())
                if h.is_message_complete():
                    data = data[used:]
                    break
            ev, val = first(receive_any=True, sleep=timeout_handler.remaining())
            if ev == "sleep":
                timeout_handler.timeout()
            data = val

        resp = Response(response="".join(body), status=h.get_status_code(), headers=h.get_headers())

        return resp
Example #16
0
class MitmProtocol(asyncio.Protocol):
    ''' Handles details of MITMing a TLS connection. '''

    def __init__(self, loop, http_version, proxy):
        ''' Constructor. '''

        self._http_version = http_version
        self._loop = loop
        self._parser = HttpParser()
        self._proxy = proxy
        self._received = asyncio.Future()
        self._body = b''

    def connection_made(self, transport):
        ''' Save a reference to the transport. '''

        log.debug('MITM connection opened.')
        self._transport = transport
        cert = self._transport.get_extra_info('peercert')
        log.debug('MITM upstream certificate: {}'.format(cert))
        self._loop.call_soon(self._proxy.start_tls, self._http_version)

    def connection_lost(self, exc):
        log.debug('MITM connection closed.')
        self._received.cancel()

    def data_received(self, data):
        ''' Accumulate request data. '''

        log.debug('MITM data received: {}'.format(data))
        self._parser.execute(data, len(data))

        if self._parser.is_partial_body():
            self._body += self._parser.recv_body()

        if self._parser.is_message_complete():
            version = self._parser.get_version()
            status = self._parser.get_status_code()
            reason = None # For some reason, the parser doesn't expose this :(
            headers = self._parser.get_headers()

            log.debug('MITM upstream status: {}'.format(status))
            log.debug('MITM upstream headers: {}'.format(headers))
            log.debug('MITM upstream body: {}...'.format(self._body[:1000]))

            self._received.set_result(
                (version, status, reason, headers, self._body)
            )
            self._transport.close()

    def forward(self, data):
        ''' Forward data to upstream host. '''

        log.debug('MITM sending data: {}'.format(data))
        self._transport.write(data)

    @asyncio.coroutine
    def receive(self):
        ''' Read data received by this MITM instance. '''

        response = yield from self._received
        return response
Example #17
0
async def fetch(
    url: str,
    method: str = "GET",
    headers=None,
    body: Optional[bytes] = None,
    connect_timeout=DEFAULT_CONNECT_TIMEOUT,
    request_timeout=DEFAULT_REQUEST_TIMEOUT,
    resolver=resolve,
    max_buffer_size=DEFAULT_BUFFER_SIZE,
    follow_redirects: bool = False,
    max_redirects=DEFAULT_MAX_REDIRECTS,
    validate_cert=config.http_client.validate_certs,
    allow_proxy: bool = False,
    proxies=None,
    user: Optional[str] = None,
    password: Optional[str] = None,
    content_encoding: Optional[str] = None,
    eof_mark: Optional[bytes] = None,
) -> Tuple[int, Dict[str, Any], bytes]:
    """

    :param url: Fetch URL
    :param method: request method "GET", "POST", "PUT" etc
    :param headers: Dict of additional headers
    :param body: Request body for POST and PUT request
    :param connect_timeout:
    :param request_timeout:
    :param resolver:
    :param follow_redirects:
    :param max_redirects:
    :param validate_cert:
    :param allow_proxy:
    :param proxies:
    :param user:
    :param password:
    :param max_buffer_size:
    :param content_encoding:
    :param eof_mark: Do not consider connection reset as error if
      eof_mark received (string or list)
    :return: code, headers, body
    """
    def get_connect_options():
        opts = {}
        if use_tls and not proxy:
            ctx = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
            if validate_cert:
                ctx.check_hostname = True
                ctx.verify_mode = ssl.CERT_REQUIRED
            else:
                ctx.check_hostname = False
                ctx.verify_mode = ssl.CERT_NONE
            opts["ssl"] = ctx
        return opts

    metrics["httpclient_requests", ("method", method.lower())] += 1
    #
    if eof_mark:
        eof_mark = smart_bytes(eof_mark)
    # Detect proxy when necessary
    u = urlparse(str(url))
    use_tls = u.scheme == "https"
    proto = "HTTPS" if use_tls else "HTTP"
    logger.debug("%s %s %s", proto, method, url)
    if ":" in u.netloc:
        host, port = u.netloc.rsplit(":")
        port = int(port)
    else:
        host = u.netloc
        port = DEFAULT_PORTS.get(u.scheme)
        if not port:
            return ERR_TIMEOUT, {}, b"Cannot resolve port for scheme: %s" % smart_bytes(
                u.scheme)
    if is_ipv4(host):
        addr = host
    else:
        addr = await resolver(host)
    if not addr:
        return ERR_TIMEOUT, {}, "Cannot resolve host: %s" % host
    # Detect proxy server
    if allow_proxy:
        proxy = (proxies or SYSTEM_PROXIES).get(u.scheme)
    else:
        proxy = None
    # Connect
    reader, writer = None, None
    if proxy:
        connect_address = proxy
    elif isinstance(addr, tuple):
        connect_address = addr
    else:
        connect_address = (addr, port)
    try:
        try:
            if proxy:
                logger.debug("Connecting to proxy %s:%s", connect_address[0],
                             connect_address[1])
            reader, writer = await asyncio.wait_for(
                asyncio.open_connection(connect_address[0], connect_address[1],
                                        **get_connect_options()),
                connect_timeout,
            )
        except ConnectionRefusedError:
            metrics["httpclient_timeouts"] += 1
            return ERR_TIMEOUT, {}, b"Connection refused"
        except OSError as e:
            metrics["httpclient_timeouts"] += 1
            return ERR_TIMEOUT, {}, b"Connection error: %s" % smart_bytes(e)
        except asyncio.TimeoutError:
            metrics["httpclient_timeouts"] += 1
            return ERR_TIMEOUT, {}, b"Connection timed out"
        # Proxy CONNECT
        if proxy:
            logger.debug("Sending CONNECT %s:%s", addr, port)
            # Send CONNECT request
            req = b"CONNECT %s:%s HTTP/1.1\r\nUser-Agent: %s\r\n\r\n" % (
                smart_bytes(addr),
                smart_bytes(port),
                smart_bytes(DEFAULT_USER_AGENT),
            )
            writer.write(smart_bytes(req))
            try:
                await asyncio.wait_for(writer.drain(), request_timeout)
            except asyncio.TimeoutError:
                metrics["httpclient_proxy_timeouts"] += 1
                return ERR_TIMEOUT, {}, b"Timed out while sending request to proxy"
            # Wait for proxy response
            parser = HttpParser()
            while not parser.is_headers_complete():
                try:
                    data = await asyncio.wait_for(reader.read(max_buffer_size),
                                                  request_timeout)
                except asyncio.TimeoutError:
                    metrics["httpclient_proxy_timeouts"] += 1
                    return ERR_TIMEOUT, {}, b"Timed out while sending request to proxy"
                received = len(data)
                parsed = parser.execute(data, received)
                if parsed != received:
                    return ERR_PARSE_ERROR, {}, b"Parse error"
            code = parser.get_status_code()
            logger.debug("Proxy response: %s", code)
            if not 200 <= code <= 299:
                return code, parser.get_headers(), "Proxy error: %s" % code
        # Process request
        body = body or ""
        content_type = "application/binary"
        if not isinstance(body, (str, bytes)):
            body = smart_text(orjson.dumps(body))
            content_type = "text/json"
        body = smart_bytes(body)  # Here and below body is binary
        h = {
            "Host": str(u.netloc),
            "Connection": "close",
            "User-Agent": DEFAULT_USER_AGENT
        }
        if body and content_encoding:
            if content_encoding == CE_DEFLATE:
                # Deflate compression
                h["Content-Encoding"] = CE_DEFLATE
                compress = zlib.compressobj(
                    zlib.Z_DEFAULT_COMPRESSION,
                    zlib.DEFLATED,
                    -zlib.MAX_WBITS,
                    zlib.DEF_MEM_LEVEL,
                    zlib.Z_DEFAULT_STRATEGY,
                )
                body = compress.compress(body) + compress.flush()
            elif content_encoding == CE_GZIP:
                # gzip compression
                h["Content-Encoding"] = CE_GZIP
                compress = zlib.compressobj(6, zlib.DEFLATED, -zlib.MAX_WBITS,
                                            zlib.DEF_MEM_LEVEL, 0)
                crc = zlib.crc32(body, 0) & 0xFFFFFFFF
                body = b"\x1f\x8b\x08\x00%s\x02\xff%s%s%s%s" % (
                    to32u(int(time.time())),
                    compress.compress(body),
                    compress.flush(),
                    to32u(crc),
                    to32u(len(body)),
                )
        if method in REQUIRE_LENGTH_METHODS:
            h["Content-Length"] = str(len(body))
            h["Content-Type"] = content_type
        if user and password:
            # Include basic auth header
            uh = smart_text("%s:%s" % (user, password))
            h["Authorization"] = b"Basic %s" % codecs.encode(
                uh.encode("utf-8"), "base64").strip()
        if headers:
            h.update(headers)
        path = u.path
        if u.query:
            path += "?%s" % u.query
        req = b"%s %s HTTP/1.1\r\n%s\r\n\r\n%s" % (
            smart_bytes(method),
            smart_bytes(path),
            b"\r\n".join(b"%s: %s" % (smart_bytes(k), smart_bytes(h[k]))
                         for k in h),
            body,
        )
        try:
            writer.write(req)
            await asyncio.wait_for(writer.drain(), request_timeout)
        except ConnectionResetError:
            metrics["httpclient_timeouts"] += 1
            return ERR_TIMEOUT, {}, b"Connection reset while sending request"
        except asyncio.TimeoutError:
            metrics["httpclient_timeouts"] += 1
            return ERR_TIMEOUT, {}, b"Timed out while sending request"
        parser = HttpParser()
        response_body: List[bytes] = []
        while not parser.is_message_complete():
            try:
                data = await asyncio.wait_for(reader.read(max_buffer_size),
                                              request_timeout)
                is_eof = not data
            except (asyncio.IncompleteReadError, ConnectionResetError):
                is_eof = True
            except asyncio.TimeoutError:
                metrics["httpclient_timeouts"] += 1
                return ERR_READ_TIMEOUT, {}, b"Request timed out"
            if is_eof:
                if eof_mark and response_body:
                    # Check if EOF mark is in received data
                    response_body = [b"".join(response_body)]
                    if isinstance(eof_mark, str):
                        if eof_mark in response_body[0]:
                            break
                    else:
                        found = False
                        for m in eof_mark:
                            if m in response_body[0]:
                                found = True
                                break
                        if found:
                            break
                metrics["httpclient_timeouts"] += 1
                return ERR_READ_TIMEOUT, {}, b"Connection reset"
            received = len(data)
            parsed = parser.execute(data, received)
            if parsed != received:
                return ERR_PARSE_ERROR, {}, b"Parse error"
            if parser.is_partial_body():
                response_body += [parser.recv_body()]
        code = parser.get_status_code()
        parsed_headers = parser.get_headers()
        logger.debug("HTTP Response %s", code)
        if 300 <= code <= 399 and follow_redirects:
            # Process redirects
            if max_redirects > 0:
                new_url = parsed_headers.get("Location")
                if not new_url:
                    return ERR_PARSE_ERROR, {}, b"No Location header"
                logger.debug("HTTP redirect %s %s", code, new_url)
                return await fetch(
                    new_url,
                    method="GET",
                    headers=headers,
                    connect_timeout=connect_timeout,
                    request_timeout=request_timeout,
                    resolver=resolver,
                    max_buffer_size=max_buffer_size,
                    follow_redirects=follow_redirects,
                    max_redirects=max_redirects - 1,
                    validate_cert=validate_cert,
                    allow_proxy=allow_proxy,
                    proxies=proxies,
                )
            else:
                return 404, {}, b"Redirect limit exceeded"
        # @todo: Process gzip and deflate Content-Encoding
        return code, parsed_headers, b"".join(response_body)
    finally:
        if writer:
            writer.close()
            try:
                await writer.wait_closed()
            except ConnectionResetError:
                pass
Example #18
0
class TitleFetcher:
    status_code = 0
    followed_times = 0  # 301, 302
    finder = None
    addr = None
    stream = None
    max_follows = 10
    timeout = 15
    _finished = False
    _cookie = None
    _connected = False
    _redirected_stream = None
    _content_finders = (TitleFinder, PNGFinder, JPEGFinder, GIFFinder)
    _url_finders = ()

    def __init__(
        self,
        url,
        callback,
        timeout=None,
        max_follows=None,
        io_loop=None,
        content_finders=None,
        url_finders=None,
        referrer=None,
        run_at_init=True,
    ):
        '''
    url: the (full) url to fetch
    callback: called with title or MediaType or an instance of SingletonFactory
    timeout: total time including redirection before giving up
    max_follows: max redirections

    may raise:
    <UnicodeError: label empty or too long> in host preparation
    '''
        self._callback = callback
        self.referrer = referrer
        if max_follows is not None:
            self.max_follows = max_follows

        if timeout is not None:
            self.timeout = timeout
        if hasattr(tornado.ioloop, 'current'):
            default_io_loop = tornado.ioloop.IOLoop.current
        else:
            default_io_loop = tornado.ioloop.IOLoop.instance
        self.io_loop = io_loop or default_io_loop()

        if content_finders is not None:
            self._content_finders = content_finders
        if url_finders is not None:
            self._url_finders = url_finders

        self.origurl = url
        self.url_visited = []
        if run_at_init:
            self.run()

    def run(self):
        if self.url_visited:
            raise Exception("can't run again")
        else:
            self.start_time = self.io_loop.time()
            self._timeout = self.io_loop.add_timeout(
                self.timeout + self.start_time,
                self.on_timeout,
            )
            try:
                self.new_url(self.origurl)
            except:
                self.io_loop.remove_timeout(self._timeout)
                raise

    def on_timeout(self):
        logger.debug('%s: request timed out', self.origurl)
        self.run_callback(Timeout)

    def parse_url(self, url):
        '''parse `url`, set self.host and return address and stream class'''
        self.url = u = urlsplit(url)
        self.host = u.netloc

        if u.scheme == 'http':
            addr = u.hostname, u.port or 80
            stream = tornado.iostream.IOStream
        elif u.scheme == 'https':
            addr = u.hostname, u.port or 443
            stream = tornado.iostream.SSLIOStream
        else:
            raise ValueError('bad url: %r' % url)

        return addr, stream

    def new_connection(self, addr, StreamClass):
        '''set self.addr, self.stream and connect to host'''
        s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        self.addr = addr
        self.stream = StreamClass(s)
        logger.debug('%s: connecting to %s...', self.origurl, addr)
        self.stream.set_close_callback(self.before_connected)
        self.stream.connect(addr, self.send_request)

    def new_url(self, url):
        self.url_visited.append(url)
        self.fullurl = url

        for finder in self._url_finders:
            f = finder.match_url(url, self)
            if f:
                self.finder = f
                f()
                return

        addr, StreamClass = self.parse_url(url)
        if addr != self.addr:
            if self.stream:
                self.stream.close()
            self.new_connection(addr, StreamClass)
        else:
            logger.debug('%s: try to reuse existing connection to %s',
                         self.origurl, self.addr)
            try:
                self.send_request(nocallback=True)
            except tornado.iostream.StreamClosedError:
                logger.debug(
                    '%s: server at %s doesn\'t like keep-alive, will reconnect.',
                    self.origurl, self.addr)
                # The close callback should have already run
                self.stream.close()
                self.new_connection(addr, StreamClass)

    def run_callback(self, arg):
        self.io_loop.remove_timeout(self._timeout)
        self._finished = True
        if self.stream:
            self.stream.close()
        self._callback(arg, self)

    def send_request(self, nocallback=False):
        self._connected = True
        req = [
            'GET %s HTTP/1.1',
            'Host: %s',
            # t.co will return 200 and use js/meta to redirect using the following :-(
            # 'User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:16.0) Gecko/20100101 Firefox/16.0',
            'User-Agent: %s' % UserAgent,
            'Accept: text/html,application/xhtml+xml;q=0.9,*/*;q=0.7',
            'Accept-Language: zh-cn,zh;q=0.7,en;q=0.3',
            'Accept-Charset: utf-8,gb18030;q=0.7,*;q=0.7',
            'Accept-Encoding: gzip, deflate',
            'Connection: keep-alive',
        ]
        if self.referrer is not None:
            req.append('Referer: ' + self.referrer.replace('%', '%%'))
        path = self.url.path or '/'
        if self.url.query:
            path += '?' + self.url.query
        req = '\r\n'.join(req) % (
            path,
            self._prepare_host(self.host),
        )
        if self._cookie:
            req += '\r\n' + self._cookie
        req += '\r\n\r\n'
        self.stream.write(req.encode())
        self.headers_done = False
        self.parser = HttpParser(decompress=True)
        if not nocallback:
            self.stream.read_until_close(
                # self.addr and self.stream may have been changed when close callback is run
                partial(self.on_data,
                        close=True,
                        addr=self.addr,
                        stream=self.stream),
                streaming_callback=self.on_data,
            )

    def _prepare_host(self, host):
        host = encodings.idna.nameprep(host)
        return b'.'.join(
            encodings.idna.ToASCII(x) if x else b''
            for x in host.split('.')).decode('ascii')

    def on_data(self, data, close=False, addr=None, stream=None):
        if close:
            logger.debug('%s: connection to %s closed.', self.origurl, addr)

        if self.stream.error:
            self.run_callback(self.stream.error)
            return

        if (close and stream
                and self._redirected_stream is stream) or self._finished:
            # The connection is closing, and we are being redirected or we're done.
            self._redirected_stream = None
            return

        recved = len(data)
        logger.debug('%s: received data: %d bytes', self.origurl, recved)

        p = self.parser
        nparsed = p.execute(data, recved)
        if close:
            # feed EOF
            p.execute(b'', 0)

        if not self.headers_done and p.is_headers_complete():
            if not self.on_headers_done():
                return

        if p.is_partial_body():
            chunk = p.recv_body()
            if self.finder is None:
                # redirected but has body received
                return
            t = self.feed_finder(chunk)
            if t is not None:
                self.run_callback(t)
                return

        if p.is_message_complete():
            if self.finder is None:
                # redirected but has body received
                return
            t = self.feed_finder(None)
            # if title not found, t is None
            self.run_callback(t)
        elif close:
            self.run_callback(self.stream.error or ConnectionClosed)

    def before_connected(self):
        '''check if something wrong before connected'''
        if not self._connected and not self._finished:
            self.run_callback(self.stream.error)

    def process_cookie(self):
        setcookie = self.headers.get('Set-Cookie', None)
        if not setcookie:
            return

        cookies = [
            c.rsplit(None, 1)[-1] for c in setcookie.split('; expires')[:-1]
        ]
        self._cookie = 'Cookie: ' + '; '.join(cookies)

    def on_headers_done(self):
        '''returns True if should proceed, None if should stop for current chunk'''
        self.headers_done = True
        self.headers = self.parser.get_headers()

        self.status_code = self.parser.get_status_code()
        if self.status_code in (301, 302):
            self.process_cookie()  # or we may be redirecting to a loop
            logger.debug('%s: redirect to %s', self.origurl,
                         self.headers['Location'])
            self.followed_times += 1
            if self.followed_times > self.max_follows:
                self.run_callback(TooManyRedirection)
            else:
                newurl = urljoin(self.fullurl, self.headers['Location'])
                self._redirected_stream = self.stream
                self.new_url(newurl)
            return

        try:
            l = int(self.headers.get('Content-Length', None))
        except (ValueError, TypeError):
            l = None

        ctype = self.headers.get('Content-Type', 'text/html')
        mt = defaultMediaType._replace(type=ctype, size=l)
        for finder in self._content_finders:
            f = finder.match_type(mt)
            if f:
                self.finder = f
                break
        else:
            self.run_callback(mt)
            return

        return True

    def feed_finder(self, chunk):
        '''feed data to finder, return the title if found'''
        t = self.finder(chunk)
        if t is not None:
            return t
Example #19
0
class TitleFetcher:
  default_charset = 'UTF-8'
  meta_charset = re.compile(br'<meta\s+http-equiv="?content-type"?\s+content="?[^;]+;\s*charset=([^">]+)"?\s*/?>|<meta\s+charset="?([^">/"]+)"?\s*/?>', re.IGNORECASE)
  charset = None
  status_code = 0
  followed_times = 0 # 301, 302
  addr = None
  stream = None
  max_follows = 10
  timeout = 15
  _finished = False
  _cookie = None
  _connected = False
  _redirecting = False

  def __init__(self, url, callback,
               timeout=None, max_follows=None, io_loop=None):
    '''
    url: the (full) url to fetch
    callback: called with title or MediaType or an instance of SingletonFactory
    timeout: total time including redirection before giving up
    max_follows: max redirections
    '''
    self._callback = callback
    if max_follows is not None:
      self.max_follows = max_follows

    if timeout is not None:
      self.timeout = timeout
    self.io_loop = io_loop or tornado.ioloop.IOLoop.instance()

    self.start_time = self.io_loop.time()
    self._timeout = self.io_loop.add_timeout(
      self.timeout + self.start_time,
      self.on_timeout,
    )
    self.origurl = url
    self.new_url(url)

  def on_timeout(self):
    self.run_callback(Timeout)

  def parse_url(self, url):
    '''parse `url`, set self.host and return address and stream class'''
    self.url = u = urlsplit(url)
    self.host = u.netloc

    if u.scheme == 'http':
      addr = u.hostname, u.port or 80
      stream = tornado.iostream.IOStream
    elif u.scheme == 'https':
      addr = u.hostname, u.port or 443
      stream = tornado.iostream.SSLIOStream
    else:
      raise ValueError('bad url: %r' % url)

    return addr, stream

  def new_connection(self, addr, StreamClass):
    '''set self.addr, self.stream and connect to host'''
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    self.addr = addr
    self.stream = StreamClass(s)
    logger.debug('%s: connecting to %s...', self.origurl, addr)
    self.stream.set_close_callback(self.before_connected)
    self.stream.connect(addr, self.send_request)

  def new_url(self, url):
    self.fullurl = url
    addr, StreamClass = self.parse_url(url)
    if addr != self.addr:
      if self.stream:
        self.stream.close()
      self.new_connection(addr, StreamClass)
    else:
      logger.debug('%s: try to reuse existing connection to %s', self.origurl, self.addr)
      try:
        self.send_request(nocallback=True)
      except tornado.iostream.StreamClosedError:
        logger.debug('%s: server at %s doesn\'t like keep-alive, will reconnect.', self.origurl, self.addr)
        # The close callback should have already run
        self.stream.close()
        self.new_connection(addr, StreamClass)

  def run_callback(self, arg):
    self.io_loop.remove_timeout(self._timeout)
    self._finished = True
    self.stream.close()
    self._callback(arg, self)

  def send_request(self, nocallback=False):
    self._connected = True
    req = ('GET %s HTTP/1.1',
           'Host: %s',
           # t.co will return 200 and use js/meta to redirect using the following :-(
           # 'User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:16.0) Gecko/20100101 Firefox/16.0',
           'User-Agent: FetchTitle/1.0',
           'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.7',
           'Accept-Language: zh-cn,zh;q=0.7,en;q=0.3',
           'Accept-Charset: utf-8,gb18030;q=0.7,*;q=0.7',
           'Accept-Encoding: gzip, deflate',
           'Connection: keep-alive',
          )
    path = self.url.path or '/'
    if self.url.query:
      path += '?' + self.url.query
    req = '\r\n'.join(req) % (
      path, self.host,
    )
    if self._cookie:
      req += '\r\n' + self._cookie
    req += '\r\n\r\n'
    self.stream.write(req.encode())
    self.headers_done = False
    self.parser = HttpParser(decompress=True)
    if not nocallback:
      self.stream.read_until_close(
        # self.addr will have been changed when close callback is run
        partial(self.on_data, close=True, addr=self.addr),
        streaming_callback=self.on_data,
      )

  def on_data(self, data, close=False, addr=None):
    if close:
      logger.debug('%s: connection to %s closed.', self.origurl, addr)

    if (close and self._redirecting) or self._finished:
      # The connection is closing, and we are being redirected or we're done.
      self._redirecting = False
      return

    recved = len(data)
    logger.debug('%s: received data: %d bytes', self.origurl, recved)

    p = self.parser
    nparsed = p.execute(data, recved)
    if close:
      # feed EOF
      p.execute(b'', 0)

    if not self.headers_done and p.is_headers_complete():
      if not self.on_headers_done():
        return

    if p.is_partial_body():
      chunk = p.recv_body()
      if not self.charset:
        m = self.meta_charset.search(chunk)
        if m:
          self.charset = (m.group(1) or m.group(2)).decode('latin1')
      t = self.feed_finder(chunk)
      if t:
        self.run_callback(t)
        return

    if p.is_message_complete():
      t = self.feed_finder(None)
      # if title not found, t is None
      self.run_callback(t)
    elif close:
      self.run_callback(self.stream.error or ConnectionClosed)

  def before_connected(self):
    '''check if something wrong before connected'''
    if not self._connected and not self._finished:
      self.run_callback(self.stream.error)

  def process_cookie(self):
    setcookie = self.headers.get('Set-Cookie', None)
    if not setcookie:
      return

    cookies = [c.rsplit(None, 1)[-1] for c in setcookie.split('; expires')[:-1]]
    self._cookie = 'Cookie: ' + '; '.join(cookies)

  def on_headers_done(self):
    '''returns True if should proceed, None if should stop for current chunk'''
    self.headers_done = True
    self.headers = self.parser.get_headers()

    self.status_code = self.parser.get_status_code()
    if self.status_code in (301, 302):
      self.process_cookie() # or we may be redirecting to a loop
      logger.debug('%s: redirect to %s', self.origurl, self.headers['Location'])
      self.followed_times += 1
      if self.followed_times > self.max_follows:
        self.run_callback(TooManyRedirection)
      else:
        newurl = urljoin(self.fullurl, self.headers['Location'])
        self._redirecting = True
        self.new_url(newurl)
      return

    ctype = self.headers.get('Content-Type', 'text/html')
    if ctype.find('html') == -1:
      try:
        l = int(self.headers.get('Content-Length', None))
      except (ValueError, TypeError):
        l = None
      mt = defaultMediaType._replace(type=ctype, size=l)
      ctype = ctype.split(';', 1)[0]
      if ctype == 'image/png':
        self.finder = PNGFinder(mt)
      elif ctype == 'image/jpeg':
        self.finder = JPEGFinder(mt)
      elif ctype == 'image/gif':
        self.finder = GIFFinder(mt)
      else:
        self.run_callback(mt)
        return
    else:
      self.finder = TitleFinder()
      pos = ctype.find('charset=')
      if pos > 0:
        self.charset = ctype[pos+8:]

    return True

  def feed_finder(self, chunk):
    '''feed data to TitleFinder, return the title if found'''
    t = self.finder(chunk)
    if t:
      if self.charset is None:
        self.charset = self.default_charset
      if isinstance(t, bytes):
        try:
          title = replaceEntities(t.decode(self.charset))
          return title
        except (UnicodeDecodeError, LookupError):
          return t
      else:
        return t
Example #20
0
class HttpProxyProtocol(asyncio.Protocol):
    ''' Implement HTTP(S) proxy behavior. '''

    def __init__(self, loop, config, token_store):
        ''' Constructor. '''

        self._parser = HttpParser()
        self._body = b''
        self._config = config
        self._loop = loop
        self._mitm = None
        self._mitm_host = None
        self._token_store = token_store

        self._instagram = InstagramApi(
            client_id=config['Instagram']['ClientID'],
            client_secret=config['Instagram']['ClientSecret'],
        )

        self._twitter = TwitterApi(
            consumer_key=config['Twitter']['ConsumerKey'],
            consumer_secret=config['Twitter']['ConsumerSecret'],
            app_token=config['Twitter']['AppToken'],
            app_token_secret=config['Twitter']['AppTokenSecret'],
        )

    def connection_made(self, transport):
        ''' Save a reference to the transport so that we can send a reply. '''
        log.debug('Connection opened.')
        self._transport = transport

    def connection_lost(self, exc):
        log.debug('Connection closed.')

    def data_received(self, data):
        ''' Parse incoming HTTP request. '''

        log.debug('Data received: {}'.format(data))
        self._parser.execute(data, len(data))

        if self._parser.is_partial_body():
            self._body += self._parser.recv_body()

        if self._parser.is_message_complete():
            method = self._parser.get_method()
            uri = self._parser.get_url()
            version = self._parser.get_version()
            headers = self._parser.get_headers()
            content_type = headers.get('Content-type', '')
            charset = _get_charset(content_type)
            body = self._body.decode(charset)

            log.debug('Client charset: {}'.format(charset))
            log.debug('Client status: method={} uri={} version={}' \
                      .format(method, uri, version))
            log.debug('Client headers: {}'.format(headers))
            log.debug('Client body: {}...'.format(body[:1000]))

            if method == 'CONNECT':
                asyncio.async(self._start_mitm(uri, version))
                self._parser = HttpParser()
            else:
                asyncio.async(
                    self._request_upstream(
                        method,
                        uri,
                        version,
                        headers,
                        body
                    )
                )


    def start_tls(self, version):
        '''
        Initiate TLS session with the client.

        This part is completely hacky! We mess around with the
        transport's internals in order to wrap the current transport in TLS.
        Python doesn't have an official way to do this, although it *might*
        get fixed in 3.6: http://bugs.python.org/issue23749
        '''

        log.debug('The proxy is starting TLS with its client.')

        status_line = 'HTTP/{}.{} {} {}\r\n\r\n' \
                      .format(version[0], version[1], 200, 'OK')
        self._transport.write(status_line.encode('ascii'))

        ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
        ssl_context.set_ciphers('HIGH:!aNull:!eNull')
        ssl_context.load_cert_chain('ssl/server.crt', 'ssl/server.key')

        original_socket = self._transport._sock
        self._transport = self._loop._make_ssl_transport(
            original_socket,
            self,
            ssl_context,
            server_side=True
        )

    @asyncio.coroutine
    def _request_upstream(self, method, uri, version, headers, body):
        ''' Forward the request to the upstream server. '''

        try:
            yield from self._request_upstream_helper(
                method, uri, version, headers, body
            )
        except Exception:
            charset = _get_charset(headers.get('Content-type', ''))
            response = render_http_response(
                version,
                500,
                'PROXY ERROR',
                {'Content-type': 'text/plain; charset={}'.format(charset)},
                traceback.format_exc().encode(charset)
            )
            self._transport.write(response)
            self._transport.close()
            raise

    @asyncio.coroutine
    def _request_upstream_helper(self, method, uri, version, headers, body):
        ''' Forward the request to the upstream server. '''

        log.debug('_request_upstream(): method={}, uri={}' \
                  .format(method, uri))

        if self._mitm_host:
            parsed = urlparse(uri)
            url = 'https://{}{}'.format(self._mitm_host, parsed.path)
        else:
            url = uri

        token, remaining = self._token_store.dispense(url)
        log.debug('Signing request with {} token: {}.'
                  .format(token.site, token.public))

        if 'instagram' in url:
            qp = parse_qs(parsed.query)
            qp['access_token'] = token.public

            qp['sig'] = self._instagram.oauth_sign(
                method=method,
                url=url,
                token=token,
                query_params=qp,
                body_params=parse_qs(body)
            )

            params = ['{}={}'.format(quote(k.encode('utf8')), quote(v.encode('utf8'))) for k,v in qp.items()]
            uri = '{}?{}'.format(parsed.path, '&'.join(params))
            log.debug('Signed instagram URL: {}'.format(uri))
        elif 'twitter' in url:
            headers['Authorization'] = self._twitter.oauth_sign(
                method=method,
                url=url,
                token=token.public,
                token_secret=token.secret,
                query_params=parse_qs(parsed.query),
                body_params=parse_qs(body)
            )
        else:
            raise ValueError('No signing algorithm known for URL: {}'
                             .format(url))

        if self._mitm is None:
            url = urlparse(uri)
            host = url.hostname
            port = url.port

            if port is None:
                port = 80 if url.scheme == 'http' else 443

            log.debug('Connecting to upstream (plaintext).')
            upstream = yield from asyncio.open_connection(host, port)
            upstream_reader, upstream_writer = upstream
            request = render_http_request(method, uri, version, headers, body)
            upstream_writer.write(request)

            response = b''
            parser = HttpParser()

            while True:
                if not parser.is_headers_complete():
                    data = yield from upstream_reader.readline()
                else:
                    data = yield from upstream_reader.read(
                        int(parser.get_headers()['Content-Length'])
                    )

                log.debug('Received plaintext from upstream: {}'.format(data))
                parser.execute(data, len(data))

                if parser.is_partial_body():
                    body += parser.recv_body()

                if parser.is_message_complete():
                    version = self._parser.get_version()
                    status = self._parser.get_status_code()
                    reason = None # For some reason, the parser doesn't expose this :(
                    headers = self._parser.get_headers()

                    if status == 200:
                        self._token_store.update_rate_limit(url, headers)

                    log.debug('Plaintext upstream status: {}'.format(status))
                    log.debug('Plaintext upstream headers: {}'.format(headers))
                    log.debug('Plaintext upstream body: {}...'.format(body[:1000]))

                    response = render_http_response(
                        version, status, reason, headers, body
                    )

                    break

            upstream_writer.close()

        else:
            upstream_write = self._mitm.forward
            request = render_http_request(method, uri, version, headers, body)
            upstream_write(request)
            response = yield from self._mitm.receive()
            version, status, reason, headers, body = response

            if status == 200:
                self._token_store.update_rate_limit(token, url, headers)

            response = render_http_response(
                version, status, reason, headers, body
            )

        # Forward the upstream response to the client.
        self._transport.write(response)
        self._transport.close()

    def _set_header(self, key, value):
        ''' Set a header value. '''

        key = key.strip().upper()
        value = value.strip()
        self._headers[key] = value

    @asyncio.coroutine
    def _start_mitm(self, uri, version):
        ''' MITM a connection to the upstream server. '''

        log.debug('The proxy is starting an MITM connection.')
        host, port = uri.split(':')
        port = int(port)
        self._mitm_host = host

        _, self._mitm = yield from self._loop.create_connection(
            lambda: MitmProtocol(self._loop, version, self),
            host,
            port,
            ssl = ssl.create_default_context()
        )
Example #21
0
class TitleFetcher:
  status_code = 0
  followed_times = 0 # 301, 302
  finder = None
  addr = None
  stream = None
  max_follows = 10
  timeout = 15
  _finished = False
  _cookie = None
  _connected = False
  _redirected_stream = None
  _content_finders = (TitleFinder, PNGFinder, JPEGFinder, GIFFinder)
  _url_finders = ()

  def __init__(self, url, callback,
               timeout=None, max_follows=None, io_loop=None,
               content_finders=None, url_finders=None
              ):
    '''
    url: the (full) url to fetch
    callback: called with title or MediaType or an instance of SingletonFactory
    timeout: total time including redirection before giving up
    max_follows: max redirections
    '''
    self._callback = callback
    if max_follows is not None:
      self.max_follows = max_follows

    if timeout is not None:
      self.timeout = timeout
    if hasattr(tornado.ioloop, 'current'):
        default_io_loop = tornado.ioloop.IOLoop.current
    else:
        default_io_loop = tornado.ioloop.IOLoop.instance
    self.io_loop = io_loop or default_io_loop()

    if content_finders is not None:
      self._content_finders = content_finders
    if url_finders is not None:
      self._url_finders = url_finders

    self.start_time = self.io_loop.time()
    self._timeout = self.io_loop.add_timeout(
      self.timeout + self.start_time,
      self.on_timeout,
    )
    self.origurl = url
    self.url_visited = []
    self.new_url(url)

  def on_timeout(self):
    self.run_callback(Timeout)

  def parse_url(self, url):
    '''parse `url`, set self.host and return address and stream class'''
    self.url = u = urlsplit(url)
    self.host = u.netloc

    if u.scheme == 'http':
      addr = u.hostname, u.port or 80
      stream = tornado.iostream.IOStream
    elif u.scheme == 'https':
      addr = u.hostname, u.port or 443
      stream = tornado.iostream.SSLIOStream
    else:
      raise ValueError('bad url: %r' % url)

    return addr, stream

  def new_connection(self, addr, StreamClass):
    '''set self.addr, self.stream and connect to host'''
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    self.addr = addr
    self.stream = StreamClass(s)
    logger.debug('%s: connecting to %s...', self.origurl, addr)
    self.stream.set_close_callback(self.before_connected)
    self.stream.connect(addr, self.send_request)

  def new_url(self, url):
    self.url_visited.append(url)
    self.fullurl = url

    for finder in self._url_finders:
      f = finder.match_url(url, self)
      if f:
        self.finder = f
        f()
        return

    addr, StreamClass = self.parse_url(url)
    if addr != self.addr:
      if self.stream:
        self.stream.close()
      self.new_connection(addr, StreamClass)
    else:
      logger.debug('%s: try to reuse existing connection to %s', self.origurl, self.addr)
      try:
        self.send_request(nocallback=True)
      except tornado.iostream.StreamClosedError:
        logger.debug('%s: server at %s doesn\'t like keep-alive, will reconnect.', self.origurl, self.addr)
        # The close callback should have already run
        self.stream.close()
        self.new_connection(addr, StreamClass)

  def run_callback(self, arg):
    self.io_loop.remove_timeout(self._timeout)
    self._finished = True
    if self.stream:
      self.stream.close()
    self._callback(arg, self)

  def send_request(self, nocallback=False):
    self._connected = True
    req = ('GET %s HTTP/1.1',
           'Host: %s',
           # t.co will return 200 and use js/meta to redirect using the following :-(
           # 'User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:16.0) Gecko/20100101 Firefox/16.0',
           'User-Agent: %s' % UserAgent,
           'Accept: text/html,application/xhtml+xml;q=0.9,*/*;q=0.7',
           'Accept-Language: zh-cn,zh;q=0.7,en;q=0.3',
           'Accept-Charset: utf-8,gb18030;q=0.7,*;q=0.7',
           'Accept-Encoding: gzip, deflate',
           'Connection: keep-alive',
          )
    path = self.url.path or '/'
    if self.url.query:
      path += '?' + self.url.query
    req = '\r\n'.join(req) % (
      path, self._prepare_host(self.host),
    )
    if self._cookie:
      req += '\r\n' + self._cookie
    req += '\r\n\r\n'
    self.stream.write(req.encode())
    self.headers_done = False
    self.parser = HttpParser(decompress=True)
    if not nocallback:
      self.stream.read_until_close(
        # self.addr will have been changed when close callback is run
        partial(self.on_data, close=True, addr=self.addr),
        streaming_callback=self.on_data,
      )

  def _prepare_host(self, host):
    host = encodings.idna.nameprep(host)
    return b'.'.join(encodings.idna.ToASCII(x) for x in host.split('.')).decode('ascii')

  def on_data(self, data, close=False, addr=None):
    if close:
      logger.debug('%s: connection to %s closed.', self.origurl, addr)

    if (close and self._redirected_stream is self.stream) or self._finished:
      # The connection is closing, and we are being redirected or we're done.
      self._redirected_stream = None
      return

    recved = len(data)
    logger.debug('%s: received data: %d bytes', self.origurl, recved)

    p = self.parser
    nparsed = p.execute(data, recved)
    if close:
      # feed EOF
      p.execute(b'', 0)

    if not self.headers_done and p.is_headers_complete():
      if not self.on_headers_done():
        return

    if p.is_partial_body():
      chunk = p.recv_body()
      if self.finder is None:
        # redirected but has body received
        return
      t = self.feed_finder(chunk)
      if t is not None:
        self.run_callback(t)
        return

    if p.is_message_complete():
      if self.finder is None:
        # redirected but has body received
        return
      t = self.feed_finder(None)
      # if title not found, t is None
      self.run_callback(t)
    elif close:
      self.run_callback(self.stream.error or ConnectionClosed)

  def before_connected(self):
    '''check if something wrong before connected'''
    if not self._connected and not self._finished:
      self.run_callback(self.stream.error)

  def process_cookie(self):
    setcookie = self.headers.get('Set-Cookie', None)
    if not setcookie:
      return

    cookies = [c.rsplit(None, 1)[-1] for c in setcookie.split('; expires')[:-1]]
    self._cookie = 'Cookie: ' + '; '.join(cookies)

  def on_headers_done(self):
    '''returns True if should proceed, None if should stop for current chunk'''
    self.headers_done = True
    self.headers = self.parser.get_headers()

    self.status_code = self.parser.get_status_code()
    if self.status_code in (301, 302):
      self.process_cookie() # or we may be redirecting to a loop
      logger.debug('%s: redirect to %s', self.origurl, self.headers['Location'])
      self.followed_times += 1
      if self.followed_times > self.max_follows:
        self.run_callback(TooManyRedirection)
      else:
        newurl = urljoin(self.fullurl, self.headers['Location'])
        self._redirected_stream = self.stream
        self.new_url(newurl)
      return

    try:
      l = int(self.headers.get('Content-Length', None))
    except (ValueError, TypeError):
      l = None

    ctype = self.headers.get('Content-Type', 'text/html')
    mt = defaultMediaType._replace(type=ctype, size=l)
    for finder in self._content_finders:
      f = finder.match_type(mt)
      if f:
        self.finder = f
        break
    else:
      self.run_callback(mt)
      return

    return True

  def feed_finder(self, chunk):
    '''feed data to TitleFinder, return the title if found'''
    t = self.finder(chunk)
    if t is not None:
      return t
    def parse(self):
        data = [{
            'label':
            '以太网帧头部 / Ethernet Headers',
            'value':
            '',
            'bold':
            True,
            'children': [{
                'label': '目的端 MAC 地址',
                'value': self.ethHeader.destMac
            }, {
                'label': '发送端 MAC 地址',
                'value': self.ethHeader.sourceMac
            }, {
                'label':
                '帧类型',
                'value':
                '%s (0x%s)' % (self.ethHeader.type, self.ethHeader.type_code)
            }]
        }]

        if self.protocol == 'ARP':
            data.append({
                'label':
                'ARP 消息 / Address Resolution Protocol',
                'value':
                '',
                'bold':
                True,
                'children': [{
                    'label':
                    '硬件类型',
                    'value':
                    '%s (%s)' % (self.arpBody.hardware_type,
                                 self.arpBody.hardware_type_code)
                }, {
                    'label':
                    '协议类型',
                    'value':
                    '%s (0x%s)' % (self.arpBody.protocol_type,
                                   self.arpBody.protocol_type_code)
                }, {
                    'label': '硬件地址长度',
                    'value': str(self.arpBody.hardware_size)
                }, {
                    'label': '协议地址长度',
                    'value': str(self.arpBody.protocol_size)
                }, {
                    'label':
                    '操作码',
                    'value':
                    '%s (%s)' %
                    (self.arpBody.operation, self.arpBody.operation_code)
                }, {
                    'label': '发送端 MAC 地址',
                    'value': self.arpBody.sender_mac_address
                }, {
                    'label': '发送端 IP 地址',
                    'value': self.arpBody.sender_ip_address
                }, {
                    'label': '目的端 MAC 地址',
                    'value': self.arpBody.target_mac_address
                }, {
                    'label': '目的端 IP 地址',
                    'value': self.arpBody.target_ip_address
                }]
            })
        else:

            if self.ipHeader.version == 4:
                self.ipHeader.verifyChecksum = verifyChecksum(
                    self.ipHeader.header_raw, [], '').verifyChecksum
                data.append({
                    'label':
                    'IPv4 头部 / IPv4 Header',
                    'value':
                    '',
                    'bold':
                    True,
                    'children': [{
                        'label': '协议版本',
                        'value': self.ipHeader.version
                    }, {
                        'label':
                        '头部长度',
                        'value':
                        str(self.ipHeader.header_length) + ' Bytes'
                    }, {
                        'label':
                        '服务类型',
                        'value':
                        '0x%s' % (self.ipHeader.differentiated_services)
                    }, {
                        'label': '来源 IP',
                        'value': self.ipHeader.source_ip
                    }, {
                        'label': '目标 IP',
                        'value': self.ipHeader.dest_ip
                    }, {
                        'label': '总长度',
                        'value': self.ipHeader.total_length
                    }, {
                        'label':
                        '标识',
                        'value':
                        '0x%s (%s)' % (self.ipHeader.identification,
                                       self.ipHeader.identification_int)
                    }, {
                        'label':
                        '标志',
                        'value':
                        '%s' % (self.ipHeader.flags.raw),
                        'children': [{
                            'label':
                            '保留位',
                            'value':
                            '%s | %s... .... .... ....' %
                            (self.ipHeader.flags.reserved,
                             int(self.ipHeader.flags.reserved))
                        }, {
                            'label':
                            'Don\'t fragment',
                            'value':
                            '%s | .%s.. .... .... ....' %
                            (self.ipHeader.flags.fragment,
                             int(self.ipHeader.flags.fragment))
                        }, {
                            'label':
                            'More fragments',
                            'value':
                            '%s | ..%s. .... .... ....' %
                            (self.ipHeader.flags.more_fragment,
                             int(self.ipHeader.flags.more_fragment))
                        }, {
                            'label':
                            '分段偏移',
                            'value':
                            '%s | ...%s' %
                            (self.ipHeader.flags.fragment_offset,
                             self.ipHeader.flags.fragment_offset_bin)
                        }]
                    }, {
                        'label': '生存期',
                        'value': self.ipHeader.time_to_live
                    }, {
                        'label':
                        '协议',
                        'value':
                        '%s (%s)' %
                        (self.ipHeader.protocol, self.ipHeader.protocol_code)
                    }, {
                        'label':
                        '校验和',
                        'value':
                        '0x%s (%s)' % (self.ipHeader.origin_checksum, '校验' + {
                            True: '通过',
                            False: '失败'
                        }[self.ipHeader.verifyChecksum])
                    }]
                })

            else:
                ipv6_header = {
                    'label':
                    'IPv6 头部 / IPv6 Header',
                    'value':
                    '',
                    'bold':
                    True,
                    'children': [{
                        'label': '协议版本',
                        'value': self.ipHeader.version
                    }, {
                        'label': '通信分类',
                        'value': '0x%s' % (self.ipHeader._class)
                    }, {
                        'label': '流标签',
                        'value': '0x%s' % (self.ipHeader.float_label)
                    }, {
                        'label': '有效载荷长度',
                        'value': self.ipHeader.payload_length
                    }, {
                        'label':
                        '下一头部类型',
                        'value':
                        '%s (%s)' % (self.ipHeader.next_header,
                                     self.ipHeader.next_header_code)
                    }, {
                        'label': '跳数限制',
                        'value': self.ipHeader.hop_limit
                    }, {
                        'label': '源 IP',
                        'value': self.ipHeader.source_ip
                    }, {
                        'label': '目的 IP',
                        'value': self.ipHeader.dest_ip
                    }]
                }

                for option in self.ipHeader.options:
                    ipv6_header['children'].append({
                        'label':
                        consts.protocol_types[str(option['code'])],
                        'value':
                        '0x' + option['value'],
                        'children': [{
                            'label':
                            '下一头部类型',
                            'value':
                            '%s (%s)' % (consts.protocol_types[str(
                                option['next_header'])], option['next_header'])
                        }]
                    })

                data.append(ipv6_header)

            if self.ipHeader.version == 4 and self.ipHeader.flags.more_fragment == True:
                # print('Waiting for more fragments.')
                ids = self.ip_ids[self.ipHeader.identification_int]
                slicing = {
                    'label': 'IP 分片',
                    'value': '共 %s 个数据包' % len(ids),
                    'bold': True,
                    'children': []
                }
                for id in ids:
                    slicing['children'].append({
                        'label':
                        '#%s' % id,
                        'value':
                        '%s Bytes' % (self.ip_packets[id].length / 8)
                    })
                data.append(slicing)
            else:
                if self.ipHeader.protocol == 'TCP':
                    self.ipBody.tcpHeader.verifyChecksum = verifyChecksum(
                        self.ipBody.parameters[0], self.ipBody.parameters[1],
                        self.ipHeader.protocol).verifyChecksum
                    self.ipBody.tcpHeader.options = tcpOptions(
                        BitArray(self.ipBodyRaw)
                        [160:self.ipBody.tcpHeader.header_length * 8]).options
                    tcp_header = {
                        'label':
                        'TCP 头部 / Transmission Control Protocol Header',
                        'value':
                        '',
                        'bold':
                        True,
                        'children': [{
                            'label': '源端口',
                            'value': self.ipBody.tcpHeader.source_port
                        }, {
                            'label':
                            '目的端口',
                            'value':
                            self.ipBody.tcpHeader.destination_port
                        }, {
                            'label':
                            '数据序号 (seq)',
                            'value':
                            self.ipBody.tcpHeader.sequence_number
                        }, {
                            'label':
                            '确认序号 (ack)',
                            'value':
                            self.ipBody.tcpHeader.acknowledge_number
                        }, {
                            'label':
                            '首部长度',
                            'value':
                            self.ipBody.tcpHeader.header_length
                        }, {
                            'label':
                            '标志位',
                            'value':
                            '0x' + self.ipBody.tcpHeader.flags_raw,
                            'children': [{
                                'label':
                                'Reserved',
                                'value':
                                '%s | %s. .... ....' %
                                (self.ipBody.tcpHeader.flags.reserved.uint,
                                 self.ipBody.tcpHeader.flags.reserved.bin)
                            }, {
                                'label':
                                'Nonce',
                                'value':
                                '%s | ...%d .... ....' %
                                (self.ipBody.tcpHeader.flags.nonce,
                                 self.ipBody.tcpHeader.flags.nonce)
                            }, {
                                'label':
                                'Congestion Window Reduced',
                                'value':
                                '%s | .... %d... ....' %
                                (self.ipBody.tcpHeader.flags.cwr,
                                 self.ipBody.tcpHeader.flags.cwr)
                            }, {
                                'label':
                                'ECN-Echo',
                                'value':
                                '%s | .... .%d.. ....' %
                                (self.ipBody.tcpHeader.flags.ecn_echo,
                                 self.ipBody.tcpHeader.flags.ecn_echo)
                            }, {
                                'label':
                                'Urgent',
                                'value':
                                '%s | .... ..%d. ....' %
                                (self.ipBody.tcpHeader.flags.urgent,
                                 self.ipBody.tcpHeader.flags.urgent)
                            }, {
                                'label':
                                'Acknowledgment',
                                'value':
                                '%s | .... ...%d ....' %
                                (self.ipBody.tcpHeader.flags.acknowledgement,
                                 self.ipBody.tcpHeader.flags.acknowledgement)
                            }, {
                                'label':
                                'Push',
                                'value':
                                '%s | .... .... %d...' %
                                (self.ipBody.tcpHeader.flags.push,
                                 self.ipBody.tcpHeader.flags.push)
                            }, {
                                'label':
                                'Reset',
                                'value':
                                '%s | .... .... .%d..' %
                                (self.ipBody.tcpHeader.flags.reset,
                                 self.ipBody.tcpHeader.flags.reset)
                            }, {
                                'label':
                                'Syn',
                                'value':
                                '%s | .... .... ..%d.' %
                                (self.ipBody.tcpHeader.flags.syn,
                                 self.ipBody.tcpHeader.flags.syn)
                            }, {
                                'label':
                                'Fin',
                                'value':
                                '%s | .... .... ...%d' %
                                (self.ipBody.tcpHeader.flags.fin,
                                 self.ipBody.tcpHeader.flags.fin)
                            }]
                        }, {
                            'label': '窗口大小',
                            'value': self.ipBody.tcpHeader.window_size
                        }, {
                            'label':
                            '校验和',
                            'value':
                            '0x%s (%s)' %
                            (self.ipBody.tcpHeader.checksum, '校验' + {
                                True: '通过',
                                False: '失败'
                            }[self.ipBody.tcpHeader.verifyChecksum])
                        }]
                    }
                    options = []
                    if self.ipBody.tcpHeader.options:
                        for idx in range(len(self.ipBody.tcpHeader.options)):
                            option = {
                                'label':
                                self.ipBody.tcpHeader.options[idx][0]['label'],
                                'value':
                                '(%s)' %
                                self.ipBody.tcpHeader.options[idx][0]['value'],
                                'children':
                                self.ipBody.tcpHeader.options[idx][1:]
                            }
                            options.append(option)
                    if options:
                        tcp_header['children'].append({
                            'label': '选项',
                            'value': '',
                            'children': options
                        })

                    data.append(tcp_header)

                    print(self.id)
                    print(tcp_bodies)
                    if self.id in packet_id_struct:
                        tmp = []
                        http_payload = None
                        for p_id in packet_id_struct[self.id]:
                            tmp.append({'value': '', 'label': '#%s' % p_id})

                        if self.id in tcp_bodies:
                            # print(tcp_bodies[self.id]['data'].decode('utf-8', 'ignore'))
                            children = [{
                                'label':
                                '该包是 TCP 分段的最后一段, 可以通过右下角按钮「导出 TCP 分段数据」.',
                                'value': '',
                                'bold': True
                            }, {
                                'label': '共 %s 个分段' % len(tmp),
                                'value': '',
                                'bold': True,
                                'children': tmp
                            }]

                            try:
                                p = HttpParser()
                                recved = len(tcp_bodies[self.id]['data'])
                                nparsed = p.execute(
                                    tcp_bodies[self.id]['data'], recved)
                                assert nparsed == recved

                                headers = []
                                for header in p.get_headers():
                                    headers.append({
                                        'label':
                                        header,
                                        'value':
                                        p.get_headers()[header]
                                    })

                                print(p.get_path(), p.get_url(),
                                      p.get_fragment(), p.get_method(),
                                      p.get_query_string(),
                                      p.get_status_code(),
                                      p.get_wsgi_environ())

                                http_payload = [{
                                    'label':
                                    'HTTP 版本',
                                    'value':
                                    '%s.%s' %
                                    (p.get_version()[0], p.get_version()[1])
                                }, {
                                    'label': 'HTTP 头部',
                                    'value': '',
                                    'children': headers
                                }]

                                if len(p.get_url()) != 0:
                                    http_payload.append({
                                        'label': '请求方式',
                                        'value': p.get_method()
                                    })
                                    http_payload.append({
                                        'label': '路径',
                                        'value': p.get_url()
                                    })
                                    http_payload.append({
                                        'label':
                                        '请求参数',
                                        'value':
                                        p.get_query_string()
                                    })
                                    http_payload.append({
                                        'label':
                                        '主机名',
                                        'value':
                                        p.get_wsgi_environ()['HTTP_HOST']
                                    })
                                else:
                                    http_payload.append({
                                        'label':
                                        '状态码',
                                        'value':
                                        p.get_status_code()
                                    })

                            except AssertionError:
                                pass

                        else:
                            children = [{
                                'label': '共 %s 个分段' % len(tmp),
                                'value': '',
                                'bold': True,
                                'children': tmp
                            }]

                        data.append({
                            'label': 'TCP 数据 / TCP Payload',
                            'value': '',
                            'bold': True,
                            'children': children
                        })

                        if http_payload != None:
                            data.append({
                                'label': 'HTTP 数据 / HTTP Data',
                                'value': '',
                                'bold': True,
                                'children': http_payload
                            })
                    '''
                    if self.ipBody.tcpBody.has_body:
                        try:
                            p = HttpParser()
                            recved = len(self.ipBody.tcpBody.buf)
                            nparsed = p.execute(self.ipBody.tcpBody.buf, recved)
                            assert nparsed == recved

                            print(p.get_headers())
                        except AssertionError:
                            print('NOT HTTP')

                        data.append({
                            'label': 'TCP 数据 / Data',
                            'value': '',
                            'bold': True,
                            'children': [
                                {
                                    'label': '数据',
                                    'value': self.ipBody.tcpBody.raw
                                }
                            ]
                        })
                    '''

                elif self.ipHeader.protocol == 'UDP':
                    self.ipBody.udpHeader.verifyChecksum = verifyChecksum(
                        self.ipBody.parameters[0], self.ipBody.parameters[1],
                        self.ipHeader.protocol).verifyChecksum
                    data.append({
                        'label':
                        'UDP 头部 / User Datagram Protocol Header',
                        'value':
                        '',
                        'bold':
                        True,
                        'children': [{
                            'label': '源端口',
                            'value': self.ipBody.udpHeader.source_port
                        }, {
                            'label':
                            '目的端口',
                            'value':
                            self.ipBody.udpHeader.destination_port
                        }, {
                            'label': '长度',
                            'value': self.ipBody.udpHeader.length
                        }, {
                            'label':
                            '校验和',
                            'value':
                            '0x%s (%s)' %
                            (self.ipBody.udpHeader.checksum, '校验' + {
                                True: '通过',
                                False: '失败'
                            }[self.ipBody.udpHeader.verifyChecksum])
                        }]
                    })

                    if self.ipBody.udpHeader.source_port == 53 or self.ipBody.udpHeader.destination_port == 53:  # DNS
                        children = [{
                            'label': '会话标识',
                            'value': self.ipBody.dnsBody.transaction_id
                        }, {
                            'label':
                            '标志',
                            'value':
                            '0x' + self.ipBody.dnsBody.transaction_id
                        }, {
                            'label': '问题数',
                            'value': self.ipBody.dnsBody.questions
                        }, {
                            'label': '回答资源记录数',
                            'value': self.ipBody.dnsBody.answer_rrs
                        }, {
                            'label': '授权资源记录数',
                            'value': self.ipBody.dnsBody.authority_rrs
                        }, {
                            'label': '附加资源记录数',
                            'value': self.ipBody.dnsBody.additional_rrs
                        }]

                        if len(self.ipBody.dnsBody.queries) > 0:
                            queries = []
                            for query in self.ipBody.dnsBody.queries:
                                queries.append({
                                    'label':
                                    str(query.qname),
                                    'value':
                                    '',
                                    'bold':
                                    True,
                                    'children': [{
                                        'label': '域名',
                                        'value': str(query.qname)
                                    }, {
                                        'label':
                                        'Type',
                                        'value':
                                        '%s (%s)' %
                                        (consts.dns_types[query.qtype],
                                         query.qtype)
                                    }, {
                                        'label':
                                        'Class',
                                        'value':
                                        '%s (%s)' %
                                        (consts.dns_classes[query.qclass],
                                         query.qclass)
                                    }]
                                })
                            children.append({
                                'label': '查询问题',
                                'value': '',
                                'bold': True,
                                'children': queries
                            })

                        if len(self.ipBody.dnsBody.answers) > 0:
                            answers = []
                            for answer in self.ipBody.dnsBody.answers:
                                answers.append({
                                    'label':
                                    str(answer.rname),
                                    'value':
                                    '',
                                    'bold':
                                    True,
                                    'children': [{
                                        'label': '域名',
                                        'value': str(answer.rname)
                                    }, {
                                        'label':
                                        'Type',
                                        'value':
                                        '%s (%s)' %
                                        (consts.dns_types[answer.rtype],
                                         answer.rtype)
                                    }, {
                                        'label':
                                        'Class',
                                        'value':
                                        '%s (%s)' %
                                        (consts.dns_classes[answer.rclass],
                                         answer.rclass)
                                    }, {
                                        'label': '生存时间 (ttl)',
                                        'value': str(answer.ttl)
                                    }, {
                                        'label': '数据',
                                        'value': str(answer.rdata)
                                    }]
                                })
                            children.append({
                                'label': '回答',
                                'value': '',
                                'bold': True,
                                'children': answers
                            })

                        data.append({
                            'label': 'DNS / Domain Name System',
                            'value': '',
                            'bold': True,
                            'children': children
                        })

                elif 'ICMP' in self.ipHeader.protocol:
                    if 'IPv6' in self.ipHeader.protocol:
                        self.ipBody.icmpHeader.verifyChecksum = verifyChecksum(
                            self.ipBody.parameters[0],
                            self.ipBody.parameters[1],
                            self.ipHeader.protocol).verifyChecksum
                    else:
                        self.ipBody.icmpHeader.verifyChecksum = verifyChecksum(
                            self.ipBody.parameters, [], '').verifyChecksum
                    data.append({
                        'label':
                        'ICMP 头部 / Internet Control Message Protocol Headers',
                        'value':
                        '',
                        'bold':
                        True,
                        'children': [{
                            'label':
                            '类型',
                            'value':
                            '%s (%s)' % (self.ipBody.icmpHeader.type,
                                         self.ipBody.icmpHeader.type_name)
                        }, {
                            'label': '代码',
                            'value': self.ipBody.icmpHeader.code
                        }, {
                            'label':
                            '校验和',
                            'value':
                            '0x%s (%s)' %
                            (self.ipBody.icmpHeader.checksum, '校验' + {
                                True: '通过',
                                False: '失败'
                            }[self.ipBody.icmpHeader.verifyChecksum])
                        }]
                    })

                elif 'IGMP' in self.ipHeader.protocol:
                    if self.ipHeader.payload_length == 8:
                        self.ipBody.igmpHeader.verifyChecksum = verifyChecksum(
                            self.ipBody.parameters, [], '').verifyChecksum
                        data.append({
                            'label':
                            'IGMP 头部 / Internet Group Management Protocol Headers',
                            'value':
                            '',
                            'bold':
                            True,
                            'children': [{
                                'label':
                                '类型',
                                'value':
                                '0x%s(%s)' % (self.ipBody.igmpHeader.type,
                                              self.ipBody.igmpHeader.type_name)
                            }, {
                                'label':
                                '最大响应时延',
                                'value':
                                '%s 秒(0x%s)' %
                                (self.ipBody.igmpHeader.maxRespTime,
                                 self.ipBody.igmpHeader.maxRespTimeHex)
                            }, {
                                'label':
                                '校验和',
                                'value':
                                '0x%s(%s)' %
                                (self.ipBody.igmpHeader.checksum, '校验' + {
                                    True: '通过',
                                    False: '失败'
                                }[self.ipBody.igmpHeader.verifyChecksum])
                            }, {
                                'label':
                                '组地址',
                                'value':
                                self.ipBody.igmpHeader.groupAddress
                            }]
                        })
                    else:
                        self.ipBody.igmpv3Header.verifyChecksum = verifyChecksum(
                            self.ipBody.parameters, [], '').verifyChecksum
                        data.append({
                            'label':
                            'IGMPv3 头部 / Internet Group Management Protocol Version 3 Headers',
                            'value':
                            '',
                            'bold':
                            True,
                            'children': [{
                                'label':
                                '类型',
                                'value':
                                '0x%s' % self.ipBody.igmpv3Header.type
                            }, {
                                'label':
                                '校验和',
                                'value':
                                '0x%s(%s)' %
                                (self.ipBody.igmpv3Header.checksum, '校验' + {
                                    True: '通过',
                                    False: '失败'
                                }[self.ipBody.igmpv3Header.verifyChecksum])
                            }]
                        })

        return data