def parse_http(rsp):
    '''Define parser.

    Define HttpParser'''
    p=HttpParser()
    p.execute(rsp,len(rsp))
    return p
Example #2
0
def check_correct_HEAD(host, port):
    #Check if HEAD only returns header but not body
    p = HttpParser()
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    rc_is_headers_complete = False
    rc_no_more_data = True
    try:
        s.connect((host, port))
        s.settimeout(1)
        s.send("HEAD /index.html HTTP/1.1\r\nHost: %s:%d\
            \r\nConnection:Keep-Alive\r\n\r\n" % (host, port))
        while True:
            data = s.recv(1024)

            if rc_is_headers_complete and data:
                rc_no_more_data = False
                break

            if not data:
                break

            recved = len(data)
            nparsed = p.execute(data, recved)
            assert nparsed == recved

            if p.is_headers_complete():
                rc_is_headers_complete = True

            if p.is_message_complete():
                break
    except socket.timeout:
        pass
    finally:
        s.close()
    return rc_is_headers_complete and rc_no_more_data
Example #3
0
def check_correct_HEAD(host, port):
    #Check if HEAD only returns header but not body
    p = HttpParser()
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    rc_is_headers_complete = False
    rc_no_more_data = True
    try:
        s.connect((host, port))
        s.settimeout(1)
        s.send("HEAD /index.html HTTP/1.1\r\nHost: %s:%d\
            \r\nConnection:Keep-Alive\r\n\r\n" % (host, port))
        while True:
            data = s.recv(1024)

            if rc_is_headers_complete and data:
                rc_no_more_data = False
                break

            if not data:
                break

            recved = len(data)
            nparsed = p.execute(data, recved)
            assert nparsed == recved

            if p.is_headers_complete():
                rc_is_headers_complete = True

            if p.is_message_complete():
                break
    except socket.timeout:
        pass
    finally:
        s.close()
    return rc_is_headers_complete and rc_no_more_data
def parse_http(rsp):
    """Define parser.

    Define HttpParser"""
    p = HttpParser()
    p.execute(rsp, len(rsp))
    return p
Example #5
0
def new_connection(client):
  parser =HttpParser(kind=0)
  parser.environ = True

  client.on(
    "data", on_read, parser , client
  ).on(
    "error", on_error, client
  )
Example #6
0
def parse_request_from_file(path):
    '''
    parse request from file
    '''
    raw_http = open(path).read()
    # if your are reading a text file from windows, u may need manually convert \n to \r\n
    # universal newline support: http://docs.python.org/library/functions.html#open
    raw_http = raw_http.replace('\n', '\r\n')
    p = HttpParser()
    p.execute(raw_http, len(raw_http))
    return p
Example #7
0
 def __init__(self, factory, params):
     self.parser = HttpParser()
     self.params = params
     self.verb = self.params.get_verb()
     self.headers = self.params.get_headers()
     self.uris = {}
     self.uris["getgame"] = "%s/game" % prefix
     self.uris["gethost"] = "%s/host" % prefix
     self.uris["getservice"] = "%s/service" % prefix
     self.recv = ""
     self.request = ""
     self.payload = None
Example #8
0
 def understand_request(self, data):
     p = HttpParser()
     recved = len(data)
     try:
         p.execute(data, recved)
         content_type = p.get_headers()["CONTENT-TYPE"]
         method = p.get_method()
         dataOfReq = str(p.recv_body(), "utf-8")
         path = p.get_path()
     except:
         print(sys.exc_info())
         return '400: Bad Request' + sys.exc_info()
     req = myRequest(content_type, method, dataOfReq, path)
     req.toPrint()
     if req._data == '':  # если нет data
         return '204: No Content'
     if req._content_type != 'application/json':
         return '501: Not Implemented'
     if req._method != 'POST':
         return '501: Not Implemented'
     if req._path == '/users/add':
         return self.users_add(req)
     if req._path == '/chats/add':
         return self.chats_add(req)
     if req._path == '/messages/add':
         return self.messages_add(req)
     if req._path == '/chats/get':
         return self.chats_get(req)
     if req._path == '/messages/get':
         return self.messages_get(req)
Example #9
0
def handle_http(sock, addr):
    """A more complicated handler which detects HTTP headers
    """

    def recv_request(p):
        while True:
            data = sock.recv(8192)

            if not data:
                return False

            nb = len(data)
            nparsed = p.execute(data, nb)
            assert nparsed == nb

            if USING_PYPARSER and p.is_headers_complete():
                h = p.get_headers()
                if not (h.get('content-length') or h.get('transfer-length')):
                    # pass length=0 to signal end of body
                    # TODO: pyparser requires this, but not the C parser for some reason
                    p.execute(data, 0)
                    return True

            if p.is_message_complete():
                return True

    # main request loop
    while True:
        p = HttpParser()

        if not recv_request(p):
            break

        h = p.get_headers()
        ka = p.should_keep_alive()
        h_connection = 'keep-alive' if ka else 'close'

        resp = create_response('Hello, world!', {'Connection': h_connection})
        sock.sendall(resp)

        if not ka:
            break
        else:
            # we should keep-alive, but yield to drastically improve overall request/response
            # latency
            gyield()

    sock.close()
Example #10
0
def handle_http(sock, addr):
    """A more complicated handler which detects HTTP headers
    """
    def recv_request(p):
        while True:
            data = sock.recv(8192)

            if not data:
                return False

            nb = len(data)
            nparsed = p.execute(data, nb)
            assert nparsed == nb

            if USING_PYPARSER and p.is_headers_complete():
                h = p.get_headers()
                if not (h.get('content-length') or h.get('transfer-length')):
                    # pass length=0 to signal end of body
                    # TODO: pyparser requires this, but not the C parser for some reason
                    p.execute(data, 0)
                    return True

            if p.is_message_complete():
                return True

    # main request loop
    while True:
        p = HttpParser()

        if not recv_request(p):
            break

        h = p.get_headers()
        ka = p.should_keep_alive()
        h_connection = 'keep-alive' if ka else 'close'

        resp = create_response('Hello, world!', {'Connection': h_connection})
        sock.sendall(resp)

        if not ka:
            break
        else:
            # we should keep-alive, but yield to drastically improve overall request/response
            # latency
            gyield()

    sock.close()
Example #11
0
def parse_start_string(con, data):
    p = HttpParser()
    try:
        p.execute(data, len(data))

        url = p.get_url()
        metopd = p.get_method()

        http_pos = url.find('://')
        if http_pos == -1:
            temp = url
        else:
            temp = url[(http_pos + 3):]

        port_pos = temp.find(':')
        host_pos = temp.find('/')
        if host_pos == -1:
            host_pos = len(temp)
        if port_pos == -1 or host_pos < port_pos:
            port = 443 if metopd == "CONNECT" else 80
        else:
            port = int((temp[port_pos + 1:])[:host_pos - port_pos - 1])

        host = p.get_headers()['host']
        port_ind = host.find(':')
        if port_ind != -1:
            host = host[:port_ind]
        if metopd == "CONNECT":
            https_proxy(host, port, con)
        else:
            proxy(host, port, con, data)
    except Exception as e:
        # print(e)
        pass
Example #12
0
    def run(self) -> None:
        p = HttpParser()
        try:
            p.execute(self.req, len(self.req))

            url = p.get_url()
            metopd = p.get_method()

            http_pos = url.find('://')
            if http_pos == -1:
                temp = url
            else:
                temp = url[(http_pos + 3):]

            port_pos = temp.find(':')
            host_pos = temp.find('/')
            if host_pos == -1:
                host_pos = len(temp)
            if port_pos == -1 or host_pos < port_pos:
                port = 443 if metopd == "CONNECT" else 80
            else:
                port = int((temp[port_pos + 1:])[:host_pos - port_pos - 1])

            host = p.get_headers()['host']
            port_ind = host.find(':')
            if port_ind != -1:
                host = host[:port_ind]
            if metopd == "CONNECT":
                https_proxy(host, port, self.client)
            else:
                proxy(host, port, self.client, self.req)
        except Exception as e:
            print(e)
            pass
Example #13
0
    def __init__(self, name, config_file):
        # type: (str, weechat.config) -> None
        # yapf: disable
        self.name = name                     # type: str
        self.user_id = ""
        self.address = ""                    # type: str
        self.port = 8448                     # type: int
        self.options = dict()                # type: Dict[str, weechat.config]
        self.device_name = "Weechat Matrix"  # type: str
        self.device_id = ""                  # type: str

        self.user = ""                       # type: str
        self.password = ""                   # type: str

        self.rooms = dict()                  # type: Dict[str, MatrixRoom]
        self.buffers = dict()                # type: Dict[str, weechat.buffer]
        self.server_buffer = None            # type: weechat.buffer
        self.fd_hook = None                  # type: weechat.hook
        self.ssl_hook = None                 # type: weechat.hook
        self.timer_hook = None               # type: weechat.hook
        self.numeric_address = ""            # type: str

        self.autoconnect = False                         # type: bool
        self.connected = False                           # type: bool
        self.connecting = False                          # type: bool
        self.proxy = None                                # type: str
        self.reconnect_delay = 0                         # type: int
        self.reconnect_time = None                       # type: float
        self.socket = None                               # type: ssl.SSLSocket
        self.ssl_context = ssl.create_default_context()  # type: ssl.SSLContext

        self.client = None
        self.access_token = None                         # type: str
        self.next_batch = None                           # type: str
        self.transaction_id = 0                          # type: int
        self.lag = 0                                     # type: int
        self.lag_done = False                            # type: bool

        self.send_fd_hook = None                         # type: weechat.hook
        self.send_buffer = b""                           # type: bytes
        self.current_message = None                      # type: MatrixMessage

        self.http_parser = HttpParser()                  # type: HttpParser
        self.http_buffer = []                            # type: List[bytes]

        # Queue of messages we need to send off.
        self.send_queue = deque()     # type: Deque[MatrixMessage]

        # Queue of messages we send off and are waiting a response for
        self.receive_queue = deque()  # type: Deque[MatrixMessage]

        self.event_queue_timer = None
        self.event_queue = deque()  # type: Deque[RoomInfo]

        self._create_options(config_file)
        self._create_session_dir()
        self._load_devide_id()
Example #14
0
    def __init__(self, client_conn: socket.socket, client_addr, flags, req):
        self.start_time: float = time.time()

        self.client = client_conn
        self.req = req

        self.client_addr = client_addr  # host and socket_fd
        self.flags = flags

        self.request_parser = HttpParser(0)  # 0 - parse only requests
        self.response_parser = HttpParser(1)  # 1 - parse only responses
        self.total_response_size: int = 0

        self.upstream: Optional[urlparse.SplitResultBytes] = None
        self.host = None
        self.port = None
        self.upstream_url = None

        self.server: Optional[socket.socket] = None
Example #15
0
def repeat(req_id):
    sqlite_con = saver.get_connection()
    cursor = sqlite_con.cursor()
    _, host, port, request, is_https = saver.get_request(cursor, req_id)
    sqlite_con.close()
    # Connecting to server
    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    sock.connect((host, port))
    if is_https:
        context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
        sock = context.wrap_socket(sock, server_hostname=host)
    sock.send(request)

    # Getting response
    parser = HttpParser()
    resp = b''
    while True:
        data = sock.recv(buffer_size)
        if not data:
            break

        received = len(data)
        _ = parser.execute(data, received)
        if parser.is_partial_body():
            resp += parser.recv_body()

        if parser.is_message_complete():
            break
    headers = parser.get_headers()
    # Decode answer
    if headers['CONTENT-ENCODING'] == 'gzip':
        resp = gzip.decompress(resp)
        resp = str(resp, 'utf-8')
    else:
        try:
            resp = resp.decode('utf-8')
        except UnicodeDecodeError:
            print('Body wasn\'t decoded')

    print("{} HTTP/{}.{}".format(parser.get_status_code(), *parser.get_version()))
    for header in headers:
        print('{}: {}'.format(header, headers.get(header)))
    print()
    print(resp)
    print()
Example #16
0
    def componentShown(self, e):
        self._split_pane_horizontal.setDividerLocation(0.25)
        # populate the table with the selected requests\response
        try:
            if self._reload_table:
                print("reload")
                self._table_data = [
                ]  # empty _table_data (not too cool but quick)
                for c in self._messages:
                    msg = c[0]
                    http_request = converter._byte_array_to_string(
                        msg.getRequest())
                    request_parser = HttpParser()
                    request_parser.execute(http_request, len(http_request))

                    host = msg.getHttpService().getHost()
                    page = request_parser.get_url()
                    method = request_parser.get_method()

                    tmp = [host, method, page]
                    self._table_data += [tmp]
                self._table.getModel().setDataVector(self._table_data,
                                                     self._columns_names)
                self._reload_table = False
        except Exception as e:
            print(e)
Example #17
0
def main():

    p = HttpParser()
    while True:
        header = read_head()
        if header.decode("utf-8") == "":
            return
        res = p.execute(header, len(header))
        result = None
        length_key = "content-length"
        content_length = p.get_headers()[length_key]
        if content_length != None:
            body = read_body(int(content_length))
            result = handle(body)

        out_buffer = "HTTP/1.1 200 OK\r\n"
        out_buffer += "Content-Length: "+str(len(result))+"\r\n"
        out_buffer += "\r\n"
        out_buffer += result

        sys.stdout.write(out_buffer)
        sys.stdout.flush()
Example #18
0
class JobClient(protocol.Protocol):
    def __init__(self, factory, params):
        self.parser = HttpParser()
        self.params = params
        self.verb = self.params.get_verb()
        self.headers = self.params.get_headers()
        self.uris = {}
        self.uris["getgame"] = "%s/game" % prefix
        self.uris["gethost"] = "%s/host" % prefix
        self.uris["getservice"] = "%s/service" % prefix
        self.recv = ""
        self.request = ""
        self.payload = None

    def no_unicode(self, text):
        if isinstance(text, unicode):
            return text.encode('utf-8')
        else:
            return text

    def check_json(self):
        try:
            return json.loads(self.recv)
        except:
            return False

    def TimedOut(self):
        pass

    def connectionMade(self):
        if self.verb == "GET":
            self.request = "GET %s HTTP/1.1\r\n%s\r\n" % (self.url,
                                                          self.headers)
        elif self.verb == "POST":
            self.payload = self.params.get_payload()
            self.request = "POST %s HTTP/1.1\r\n%s\r\n%s" % \
                             (self.url, self.headers, self.payload)
        self.transport.write(self.request)

    def dataReceived(self, data):
        self.parser.execute(data, len(data))
        if self.parser.is_headers_complete():
            self.headers = self.parser.get_headers()
        if self.parser.is_partial_body():
            self.recv += self.parser.recv_body()
        if self.parser.is_message_complete():
            if self.check_json():
                self.proc_response()
            else:
                print "Problem with %s" % self.recv

    def proc_response(self):
        #Override in subclass
        pass
Example #19
0
def check_correct_GET(host, port):
    #Check if only one response from each GET 
    error_reported = False
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    responses = 0
    buf_size = 1024
    data_all = ""
    try:
        s.connect((host, port))
        s.send("GET /index.html HTTP/1.1\r\nHost: %s:%d\
            \r\nConnection:Keep-Alive\r\n\r\n" % (host, port))
        while True:
            s.settimeout(1)
            data = s.recv(buf_size)
            data_all += data

            if not data:
                break
    except socket.timeout:
        pass
    finally:
        s.close()

    p = HttpParser()
    while len(data_all) > 0:
        nparsed = p.execute(data_all, len(data_all))
        if nparsed == 0:
            break
        if p.is_message_complete():
            responses += 1
            if nparsed < len(data_all):
                responses += 1 #more data 
            if p.get_status_code() >= 400:
                error_reported = True
            p = HttpParser() # create another
        data_all = data_all[nparsed:]

    return error_reported, responses
Example #20
0
def check_correct_GET(host, port, version="1.1"):
    #Check if only one response from each GET
    error_reported = "No error reported, check number of responses to confirm"
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    responses = 0
    status = 0
    buf_size = 1024
    data_all = ""
    try:
        s.connect((host, port))
        s.send("GET /index.html HTTP/" + version + "\r\nHost: %s:%d\
            \r\nConnection:Keep-Alive\r\n\r\n" % (host, port))
        while True:
            s.settimeout(1)
            data = s.recv(buf_size)
            data_all += data

            if not data:
                break
    except socket.timeout:
        pass
    finally:
        s.close()

    p = HttpParser()
    while len(data_all) > 0:
        nparsed = p.execute(data_all, len(data_all))
        if nparsed == 0:
            break
        status = p.get_status_code()
        if p.is_message_complete():
            responses += 1
            if nparsed < len(data_all):
                responses += 1  #more data
            if p.get_status_code() >= 400:
                error_reported = "Error found"
            p = HttpParser()  # create another
        data_all = data_all[nparsed:]

    return error_reported, responses, status
Example #21
0
def tx_req_from_raw_request(filename):
    '''
    把 plaintext 的请求 解析后,做重放
    '''
    headers_raw = ""
    with open(os.path.join(filename), "rb") as fr:
        headers_raw = fr.read()

    hp = HttpParser()
    r = hp.execute(headers_raw, len(headers_raw))
    print("{} len={} parsed={}".format(filename, len(headers_raw), r))
    headers = dict(hp.get_headers())
    body = hp.recv_body()
    url = f'''https://{headers.get("HOST", "")}{hp.get_path()}'''
    method = hp.get_method().lower()
    resp = requests.request(method=method, url=url, headers=headers, data=body)
    print(resp_dump.dump_all(resp))
    print("\n\n")
Example #22
0
def _parseHttpRequestResponse(model, http_request, http_response, protocol):
    try:
        global i_tag
        global var_i
        """ Parses a HTTP Request/Response and generate it's translation in ASLan++. """
        request_parser = HttpParser()
        request_parser.execute(http_request, len(http_request))

        var_i = 0

        # concretization details
        concrete = dict()

        # concretization TAG
        returntag = "tag{}".format(i_tag)

        # URL for concretization
        url = protocol + "://" + request_parser.get_headers(
        )['Host'] + "/" + request_parser.get_url()
        concrete['url'] = url

        # path (this string should not begin with something different from a character)
        # and replace every non alphanumeric character with _
        # the first re.sub is used to replace every non alphanumeric char
        # the second re.sub is used to remove non character from the begining of the string
        page = re.sub("^[^a-z]*", "",
                      re.sub("[^a-zA-Z0-9]", "_",
                             urlparse(url).path))
        # add page in the array _aslanpp_constants
        model._page_constants.add(page)

        # method for concretization
        method = request_parser.get_method()
        concrete['method'] = method

        # query string
        post_query_string = ""
        get_query_string = request_parser.get_query_string()
        if method == "POST" and "Content-type" in request_parser.get_headers(
        ) and "multipart/form-data" not in request_parser.get_headers(
        )['Content-Type']:
            # POST parameters, multipart/form-data not yet supported
            post_query_string = request_parser.recv_body(
            )  #"&".join(a for a in [query_string, request_parser.recv_body()] if len(a)>0)
        if "Content-type" in request_parser.get_headers(
        ) and "multipart/form-data" in request_parser.get_headers(
        )['Content-Type']:
            print("multipart/form-data not yet supported")

        # for each request\response I need
        aslanpp_params_no_questionmark = ""
        aslanpp_params_questionmark = ""
        aslanpp_cookie_no_questionmark = ""
        aslanpp_cookie_questionmark = ""
        aslanpp_cookie2_no_questionmark = ""
        aslanpp_cookie2_questionmark = ""

        # convert GET parameters
        if get_query_string:
            # saving the concrete parameters
            concrete_get_params = [
                couple.split("=") for couple in get_query_string.split("&")
            ]

            # parse the parameters and retrieve ASLan++ code and mapping
            aslanpp_no_questionmark, aslanpp_questionmark, mapping_get = _parse_parameters(
                model, concrete_get_params)
            aslanpp_params_no_questionmark += aslanpp_no_questionmark
            aslanpp_params_questionmark += aslanpp_questionmark

            # save get param in concretization
            concrete['get_params'] = mapping_get

        # convert POST parameters
        if post_query_string:
            # saving the concrete parameters
            concrete_post_params = [
                couple.split("=") for couple in post_query_string.split("&")
            ]

            # parse the parameters and retrieve ASLan++ code and mapping
            aslanpp_no_questionmark, aslanpp_questionmark, mapping_post = _parse_parameters(
                model, concrete_post_params)
            aslanpp_params_no_questionmark += aslanpp_no_questionmark
            aslanpp_params_questionmark += aslanpp_questionmark

            # save get param in concretization
            concrete['post_params'] = mapping_post

        if aslanpp_params_no_questionmark == "":
            aslanpp_params_no_questionmark = "none"
        else:
            aslanpp_params_no_questionmark = aslanpp_params_no_questionmark[:
                                                                            -5]
        if aslanpp_params_questionmark == "":
            aslanpp_params_questionmark = "none"
        else:
            aslanpp_params_questionmark = aslanpp_params_questionmark[:-5]

        # convert cookie in the request
        try:
            cookie_request = request_parser.get_headers()['Cookie']

            simple_cookie = Cookie.SimpleCookie(cookie_request)
            concrete_cookie = [[item, simple_cookie[item].value]
                               for item in simple_cookie]

            # parse the parameters and retrieve ASLan++ code, constants, variables and mapping
            cookie_no_questionmark, cookie_questionmark, cookie_mapping = _parse_parameters(
                model, concrete_cookie)
            aslanpp_cookie_no_questionmark += cookie_no_questionmark[:-5]
            aslanpp_cookie_questionmark += cookie_questionmark[:-5]

            # save the mapping cookies
            concrete['cookies'] = cookie_mapping
        except KeyError:
            aslanpp_cookie_no_questionmark = "none"
            aslanpp_cookie_questionmark = "none"
            pass

        # check the response
        response_parser = HttpParser()
        response_parser.execute(http_response, len(http_response))

        # Location
        # get the returned page by checking the Location field in
        # the header. If Location is set, it means is a 302 Redirect
        # and the client is receiving a different page back in the response
        try:
            location = response_parser.get_headers()['Location']
            # prepend the letter p since in ASLan++ constants should start with a char
            return_page = "p{}".format(
                urlparse(location).path.partition("?")[0].replace(
                    ".", "_").replace("/", "_"))
            model._page_constants.add(return_page)
        except KeyError:
            return_page = page

        # parse cookie in the response
        try:
            set_cookie_header = response_parser.get_headers()['Set-Cookie']
            # parse new cookie
            simple_cookie = Cookie.SimpleCookie(set_cookie_header)
            cookies = [[item, simple_cookie[item].value]
                       for item in simple_cookie]

            # parse the parameters and retrieve ASLan++ code, constants, variables and mapping
            aslanpp_cookie2_no_questionmark, aslanpp_cookie2_questionmark, cookie2_mapping = _parse_parameters(
                model, cookies)
            aslanpp_cookie2_no_questionmark += cookie_no_questionmark[:-5]
            aslanpp_cookie2_questionmark += cookie_questionmark[:-5]

            # save the mapping cookies
            concrete['cookies'] = cookie2_mapping

        except KeyError:
            aslanpp_cookie2_no_questionmark = "none"
            aslanpp_cookie2_questionmark = "non"
            pass

        model._webapp_branch += request_skeleton.format(
            page, aslanpp_params_questionmark, aslanpp_cookie_questionmark,
            returntag, return_page, "none", aslanpp_cookie2_no_questionmark,
            returntag)

        model._client_branch += client_skeleton.format(
            page, aslanpp_params_no_questionmark,
            aslanpp_cookie_no_questionmark, returntag, return_page, returntag)

        model._concretization[returntag] = concrete

        # save tag in taglist and increment the tag number
        model._taglist.add(returntag)

        # increate tag
        i_tag += 1

        return returntag
    except Exception as e:
        exc_type, exc_obj, exc_tb = sys.exc_info()
        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        print(exc_type, fname, exc_tb.tb_lineno)
Example #23
0
#coding=utf-8
'''
Created on 2012-3-24

@author: fengclient
'''
from http_parser.pyparser import HttpParser

if __name__ == '__main__':
    rsp = open('d:\\172_response.txt').read()
    # if your are reading a text file from windows, u may need manually convert \n to \r\n
    # universal newline support: http://docs.python.org/library/functions.html#open
    rsp = rsp.replace('\n', '\r\n')
    p = HttpParser()
    p.execute(rsp, len(rsp))
    print p.get_headers()
Example #24
0
def fetch(
    url,
    method="GET",
    headers=None,
    body=None,
    connect_timeout=DEFAULT_CONNECT_TIMEOUT,
    request_timeout=DEFAULT_REQUEST_TIMEOUT,
    io_loop=None,
    resolver=resolve,
    max_buffer_size=DEFAULT_BUFFER_SIZE,
    follow_redirects=False,
    max_redirects=DEFAULT_MAX_REDIRECTS,
    validate_cert=config.http_client.validate_certs,
    allow_proxy=False,
    proxies=None,
    user=None,
    password=None,
    content_encoding=None,
    eof_mark=None,
):
    """

    :param url: Fetch URL
    :param method: request method "GET", "POST", "PUT" etc
    :param headers: Dict of additional headers
    :param body: Request body for POST and PUT request
    :param connect_timeout:
    :param request_timeout:
    :param io_loop:
    :param resolver:
    :param follow_redirects:
    :param max_redirects:
    :param validate_cert:
    :param allow_proxy:
    :param proxies:
    :param user:
    :param password:
    :param max_buffer_size:
    :param content_encoding:
    :param eof_mark: Do not consider connection reset as error if
      eof_mark received (string or list)
    :return: code, headers, body
    """
    def get_ssl_options():
        ssl_options = {}
        if validate_cert:
            ssl_options["cert_reqs"] = ssl.CERT_REQUIRED
        return ssl_options

    logger.debug("HTTP %s %s", method, url)
    metrics["httpclient_requests", ("method", method.lower())] += 1
    # Detect proxy when necessary
    io_loop = io_loop or tornado.ioloop.IOLoop.current()
    u = urlparse(str(url))
    use_tls = u.scheme == "https"
    if ":" in u.netloc:
        host, port = u.netloc.rsplit(":")
        port = int(port)
    else:
        host = u.netloc
        port = DEFAULT_PORTS.get(u.scheme)
        if not port:
            raise tornado.gen.Return(
                (ERR_TIMEOUT, {},
                 "Cannot resolve port for scheme: %s" % u.scheme))
    if is_ipv4(host):
        addr = host
    else:
        addr = yield resolver(host)
    if not addr:
        raise tornado.gen.Return(
            (ERR_TIMEOUT, {}, "Cannot resolve host: %s" % host))
    # Detect proxy server
    if allow_proxy:
        proxy = (proxies or SYSTEM_PROXIES).get(u.scheme)
    else:
        proxy = None
    # Connect
    stream = None
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    try:
        if use_tls and not proxy:
            stream = tornado.iostream.SSLIOStream(
                s, io_loop=io_loop, ssl_options=get_ssl_options())
        else:
            stream = tornado.iostream.IOStream(s, io_loop=io_loop)
        try:
            if proxy:
                connect_address = proxy
            elif isinstance(addr, tuple):
                connect_address = addr
            else:
                connect_address = (addr, port)

            if proxy:
                logger.debug("Connecting to proxy %s:%s", connect_address[0],
                             connect_address[1])
            yield tornado.gen.with_timeout(
                io_loop.time() + connect_timeout,
                future=stream.connect(connect_address,
                                      server_hostname=u.netloc),
                io_loop=io_loop,
            )
        except tornado.iostream.StreamClosedError:
            metrics["httpclient_timeouts"] += 1
            raise tornado.gen.Return((ERR_TIMEOUT, {}, "Connection refused"))
        except tornado.gen.TimeoutError:
            metrics["httpclient_timeouts"] += 1
            raise tornado.gen.Return((ERR_TIMEOUT, {}, "Connection timed out"))
        deadline = io_loop.time() + request_timeout
        # Proxy CONNECT
        if proxy:
            logger.debug("Sending CONNECT %s:%s", addr, port)
            # Send CONNECT request
            req = b"CONNECT %s:%s HTTP/1.1\r\nUser-Agent: %s\r\n\r\n" % (
                addr,
                port,
                DEFAULT_USER_AGENT,
            )
            try:
                yield tornado.gen.with_timeout(
                    deadline,
                    future=stream.write(req),
                    io_loop=io_loop,
                    quiet_exceptions=(tornado.iostream.StreamClosedError, ),
                )
            except tornado.iostream.StreamClosedError:
                metrics["httpclient_proxy_timeouts"] += 1
                raise tornado.gen.Return(
                    (ERR_TIMEOUT, {},
                     "Connection reset while connecting to proxy"))
            except tornado.gen.TimeoutError:
                metrics["httpclient_proxy_timeouts"] += 1
                raise tornado.gen.Return(
                    (ERR_TIMEOUT, {},
                     "Timed out while sending request to proxy"))
            # Wait for proxy response
            parser = HttpParser()
            while not parser.is_headers_complete():
                try:
                    data = yield tornado.gen.with_timeout(
                        deadline,
                        future=stream.read_bytes(max_buffer_size,
                                                 partial=True),
                        io_loop=io_loop,
                        quiet_exceptions=(
                            tornado.iostream.StreamClosedError, ),
                    )
                except tornado.iostream.StreamClosedError:
                    metrics["httpclient_proxy_timeouts"] += 1
                    raise tornado.gen.Return(
                        (ERR_TIMEOUT, {},
                         "Connection reset while connecting to proxy"))
                except tornado.gen.TimeoutError:
                    metrics["httpclient_proxy_timeouts"] += 1
                    raise tornado.gen.Return(
                        (ERR_TIMEOUT, {},
                         "Timed out while sending request to proxy"))
                received = len(data)
                parsed = parser.execute(data, received)
                if parsed != received:
                    raise tornado.gen.Return(
                        (ERR_PARSE_ERROR, {}, "Parse error"))
            code = parser.get_status_code()
            logger.debug("Proxy response: %s", code)
            if not 200 <= code <= 299:
                raise tornado.gen.Return(
                    (code, parser.get_headers(), "Proxy error: %s" % code))
            # Switch to TLS when necessary
            if use_tls:
                logger.debug("Starting TLS negotiation")
                try:
                    stream = yield tornado.gen.with_timeout(
                        deadline,
                        future=stream.start_tls(
                            server_side=False,
                            ssl_options=get_ssl_options(),
                            server_hostname=u.netloc,
                        ),
                        io_loop=io_loop,
                        quiet_exceptions=(
                            tornado.iostream.StreamClosedError, ),
                    )
                except tornado.iostream.StreamClosedError:
                    metrics["httpclient_proxy_timeouts"] += 1
                    raise tornado.gen.Return(
                        (ERR_TIMEOUT, {},
                         "Connection reset while connecting to proxy"))
                except tornado.gen.TimeoutError:
                    metrics["httpclient_proxy_timeouts"] += 1
                    raise tornado.gen.Return(
                        (ERR_TIMEOUT, {},
                         "Timed out while sending request to proxy"))
        # Process request
        body = body or ""
        content_type = "application/binary"
        if isinstance(body, unicode):
            body = body.encode("utf-8")
        elif not isinstance(body, six.string_types):
            body = ujson.dumps(body)
            content_type = "text/json"
        h = {
            "Host": str(u.netloc),
            "Connection": "close",
            "User-Agent": DEFAULT_USER_AGENT
        }
        if body and content_encoding:
            if content_encoding == CE_DEFLATE:
                # Deflate compression
                h["Content-Encoding"] = CE_DEFLATE
                compress = zlib.compressobj(
                    zlib.Z_DEFAULT_COMPRESSION,
                    zlib.DEFLATED,
                    -zlib.MAX_WBITS,
                    zlib.DEF_MEM_LEVEL,
                    zlib.Z_DEFAULT_STRATEGY,
                )
                body = compress.compress(body) + compress.flush()
            elif content_encoding == CE_GZIP:
                # gzip compression
                h["Content-Encoding"] = CE_GZIP
                compress = zlib.compressobj(6, zlib.DEFLATED, -zlib.MAX_WBITS,
                                            zlib.DEF_MEM_LEVEL, 0)
                crc = zlib.crc32(body, 0) & 0xFFFFFFFF
                body = "\x1f\x8b\x08\x00%s\x02\xff%s%s%s%s" % (
                    to32u(int(time.time())),
                    compress.compress(body),
                    compress.flush(),
                    to32u(crc),
                    to32u(len(body)),
                )
        if method in REQUIRE_LENGTH_METHODS:
            h["Content-Length"] = str(len(body))
            h["Content-Type"] = content_type
        if user and password:
            # Include basic auth header
            h["Authorization"] = "Basic %s" % (
                "%s:%s" % (user, password)).encode("base64").strip()
        if headers:
            h.update(headers)
        path = u.path
        if u.query:
            path += "?%s" % u.query
        req = b"%s %s HTTP/1.1\r\n%s\r\n\r\n%s" % (
            method,
            path,
            "\r\n".join(b"%s: %s" % (k, h[k]) for k in h),
            body,
        )
        try:
            yield tornado.gen.with_timeout(
                deadline,
                future=stream.write(req),
                io_loop=io_loop,
                quiet_exceptions=(tornado.iostream.StreamClosedError, ),
            )
        except tornado.iostream.StreamClosedError:
            metrics["httpclient_timeouts"] += 1
            raise tornado.gen.Return(
                (ERR_TIMEOUT, {}, "Connection reset while sending request"))
        except tornado.gen.TimeoutError:
            metrics["httpclient_timeouts"] += 1
            raise tornado.gen.Return(
                (ERR_TIMEOUT, {}, "Timed out while sending request"))
        parser = HttpParser()
        response_body = []
        while not parser.is_message_complete():
            try:
                data = yield tornado.gen.with_timeout(
                    deadline,
                    future=stream.read_bytes(max_buffer_size, partial=True),
                    io_loop=io_loop,
                    quiet_exceptions=(tornado.iostream.StreamClosedError, ),
                )
            except tornado.iostream.StreamClosedError:
                if not response_body and config.features.pypy:
                    break
                if eof_mark and response_body:
                    # Check if EOF mark is in received data
                    response_body = ["".join(response_body)]
                    if isinstance(eof_mark, six.string_types):
                        if eof_mark in response_body[0]:
                            break
                    else:
                        found = False
                        for m in eof_mark:
                            if m in response_body[0]:
                                found = True
                                break
                        if found:
                            break
                metrics["httpclient_timeouts"] += 1
                raise tornado.gen.Return(
                    (ERR_READ_TIMEOUT, {}, "Connection reset"))
            except tornado.gen.TimeoutError:
                metrics["httpclient_timeouts"] += 1
                raise tornado.gen.Return(
                    (ERR_READ_TIMEOUT, {}, "Request timed out"))
            received = len(data)
            parsed = parser.execute(data, received)
            if parsed != received:
                raise tornado.gen.Return((ERR_PARSE_ERROR, {}, "Parse error"))
            if parser.is_partial_body():
                response_body += [parser.recv_body()]
        code = parser.get_status_code()
        parsed_headers = parser.get_headers()
        logger.debug("HTTP Response %s", code)
        if 300 <= code <= 399 and follow_redirects:
            # Process redirects
            if max_redirects > 0:
                new_url = parsed_headers.get("Location")
                if not new_url:
                    raise tornado.gen.Return(
                        (ERR_PARSE_ERROR, {}, "No Location header"))
                logger.debug("HTTP redirect %s %s", code, new_url)
                code, parsed_headers, response_body = yield fetch(
                    new_url,
                    method="GET",
                    headers=headers,
                    connect_timeout=connect_timeout,
                    request_timeout=request_timeout,
                    resolver=resolver,
                    max_buffer_size=max_buffer_size,
                    follow_redirects=follow_redirects,
                    max_redirects=max_redirects - 1,
                    validate_cert=validate_cert,
                    allow_proxy=allow_proxy,
                    proxies=proxies,
                )
                raise tornado.gen.Return((code, parsed_headers, response_body))
            else:
                raise tornado.gen.Return((404, {}, "Redirect limit exceeded"))
        # @todo: Process gzip and deflate Content-Encoding
        raise tornado.gen.Return(
            (code, parsed_headers, "".join(response_body)))
    finally:
        if stream:
            stream.close()
        else:
            s.close()
Example #25
0
 def reset_parser(self):
     self.http_parser = HttpParser()
     self.http_buffer = []
Example #26
0
def https_proxy(host, port, conn):
    epoch = "%d" % (time.time() * 1000)
    cert_path = "%s/%s.crt" % (cert_dir.rstrip('/'), host)
    # CGenerating config to add subjectAltName (required in modern browsers)
    conf_template = Template("subjectAltName=DNS:${hostname}")
    conf_path = "%s/%s.cnf" % (cert_dir.rstrip('/'), host)
    with open(conf_path, 'w') as fp:
        fp.write(conf_template.substitute(hostname=host))

    # Generating certificate
    p1 = Popen([
        "openssl", "req", "-new", "-key", cert_key, "-subj",
        "/CN=%s" % host, "-addext", "subjectAltName = DNS:" + host
    ],
               stdout=PIPE)
    p2 = Popen([
        "openssl", "x509", "-req", "-extfile", conf_path, "-days", "3650",
        "-CA", ca_cert, "-CAkey", ca_key, "-set_serial", epoch, "-out",
        cert_path
    ],
               stdin=p1.stdout,
               stderr=PIPE)
    p2.communicate()
    os.unlink(conf_path)

    # Connecting to server
    tunn = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    tunn.connect((host, port))
    # Establishing connection with client
    conn.sendall(b'HTTP/1.1 200 Connection Established\r\n\r\n')
    conn_s = ssl.wrap_socket(conn,
                             keyfile=cert_key,
                             certfile=cert_path,
                             server_side=True)
    conn_s.do_handshake()

    request = conn_s.recv(40960)
    # Establishing https connection with server
    # context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
    # s_sock = context.wrap_socket(tunn, server_hostname=host)
    s_sock = ssl.wrap_socket(tunn)
    s_sock.send(request)
    # Getting response
    parser = HttpParser()
    resp = b''
    while True:
        data = s_sock.recv(buffer_size)
        if not data:
            break

        received = len(data)
        _ = parser.execute(data, received)
        resp += data

        if parser.is_message_complete():
            break

    conn_s.sendall(resp)
    # # Save information about request
    # sql_conn = saver.get_connection()
    # saver.save_request(sql_conn, host, port, request, 1)
    # sql_conn.close()

    s_sock.close()
    conn_s.close()
Example #27
0
class HttpProtocolHandler:

    PROXY_TUNNEL_ESTABLISHED_RESPONSE_PKT = build_http_response(
        200, reason=b'Connection established')

    def __init__(self, client_conn: socket.socket, client_addr, flags, req):
        self.start_time: float = time.time()

        self.client = client_conn
        self.req = req

        self.client_addr = client_addr  # host and socket_fd
        self.flags = flags

        self.request_parser = HttpParser(0)  # 0 - parse only requests
        self.response_parser = HttpParser(1)  # 1 - parse only responses
        self.total_response_size: int = 0

        self.upstream: Optional[urlparse.SplitResultBytes] = None
        self.host = None
        self.port = None
        self.upstream_url = None

        self.server: Optional[socket.socket] = None

    def parse_url(self, parser):
        url = parser.get_url()
        method = parser.get_method()

        protocol_pos = url.find('://')
        if protocol_pos != -1:
            url = url[(protocol_pos + 3):]

        port_pos = url.find(':')
        host_pos = url.find('/')
        if host_pos == -1:
            host_pos = len(url)
        if port_pos == -1 or host_pos < port_pos:
            port = 443 if method == "CONNECT" else DEFAULT_HTTP_PORT
        else:
            port = int((url[port_pos + 1:])[:host_pos - port_pos - 1])

        port_ind = url.find(':')
        if port_ind != -1:
            url = url[:port_ind]

        self.upstream = urlparse.urlsplit('http://' + url + '/')
        self.upstream_url = self.build_upstream_relative_path()
        host = self.upstream.hostname

        port_ind = host.find(':')
        if port_ind != -1:
            host = host[:port_ind]

        return host, port

    def run(self) -> None:
        p = HttpParser()
        try:
            p.execute(self.req, len(self.req))

            url = p.get_url()
            metopd = p.get_method()

            http_pos = url.find('://')
            if http_pos == -1:
                temp = url
            else:
                temp = url[(http_pos + 3):]

            port_pos = temp.find(':')
            host_pos = temp.find('/')
            if host_pos == -1:
                host_pos = len(temp)
            if port_pos == -1 or host_pos < port_pos:
                port = 443 if metopd == "CONNECT" else 80
            else:
                port = int((temp[port_pos + 1:])[:host_pos - port_pos - 1])

            host = p.get_headers()['host']
            port_ind = host.find(':')
            if port_ind != -1:
                host = host[:port_ind]
            if metopd == "CONNECT":
                https_proxy(host, port, self.client)
            else:
                proxy(host, port, self.client, self.req)
        except Exception as e:
            print(e)
            pass

    def access_log(self):
        server_host, server_port = self.upstream.hostname, self.upstream.port \
            if self.upstream.port else DEFAULT_HTTP_PORT

        connection_time_ms = (time.time() - self.start_time) * 1000
        method = self.request_parser.get_method()
        if method == httpMethods.CONNECT:
            pass
        elif method:
            print('pid:%s |  %s:%s - %s %s:%s%s - %s %s - %s bytes - %.2f ms' %
                  (str(getpid()), self.client_addr[0], self.client_addr[1],
                   method, server_host, server_port,
                   self.request_parser.get_path(),
                   self.response_parser.get_status_code(),
                   self.response_parser.get_errno(), self.total_response_size,
                   connection_time_ms))
Example #28
0
def new_connection(client):
    parser = HttpParser(kind=0)
    parser.environ = True

    client.on("data", on_read, parser, client).on("error", on_error, client)
#coding=utf-8
'''
Created on 2012-3-24

@author: fengclient
'''
from http_parser.pyparser import HttpParser

if __name__ == '__main__':
    rsp = open('d:\\172_response.txt').read()
    # if your are reading a text file from windows, u may need manually convert \n to \r\n
    # universal newline support: http://docs.python.org/library/functions.html#open
    rsp = rsp.replace('\n', '\r\n')
    p = HttpParser()
    p.execute(rsp, len(rsp))
    print(p.get_headers())