def _read_request_line(rfile): try: line = _get_first_line(rfile) except exceptions.HttpReadDisconnect: # We want to provide a better error message. raise exceptions.HttpReadDisconnect("Client disconnected") try: method, target, http_version = line.split() if target == b"*" or target.startswith(b"/"): scheme, authority, path = b"", b"", target host, port = "", 0 elif method == b"CONNECT": scheme, authority, path = b"", target, b"" host, port = url.parse_authority(authority, check=True) if not port: raise ValueError else: scheme, rest = target.split(b"://", maxsplit=1) authority, path_ = rest.split(b"/", maxsplit=1) path = b"/" + path_ host, port = url.parse_authority(authority, check=True) port = port or url.default_port(scheme) if not port: raise ValueError # TODO: we can probably get rid of this check? url.parse(target) _check_http_version(http_version) except ValueError: raise exceptions.HttpSyntaxException(f"Bad HTTP request line: {line}") return host, port, method, scheme, authority, path, http_version
def _read_request_line( line: bytes) -> Tuple[str, int, bytes, bytes, bytes, bytes, bytes]: try: method, target, http_version = line.split() port: Optional[int] if target == b"*" or target.startswith(b"/"): scheme, authority, path = b"", b"", target host, port = "", 0 elif method == b"CONNECT": scheme, authority, path = b"", target, b"" host, port = url.parse_authority(authority, check=True) if not port: raise ValueError else: scheme, rest = target.split(b"://", maxsplit=1) authority, path_ = rest.split(b"/", maxsplit=1) path = b"/" + path_ host, port = url.parse_authority(authority, check=True) port = port or url.default_port(scheme) if not port: raise ValueError # TODO: we can probably get rid of this check? url.parse(target) raise_if_http_version_unknown(http_version) except ValueError as e: raise ValueError(f"Bad HTTP request line: {line!r}") from e return host, port, method, scheme, authority, path, http_version
def parse_headers(headers): authority = headers.get(':authority', '').encode() method = headers.get(':method', 'GET').encode() scheme = headers.get(':scheme', 'https').encode() path = headers.get(':path', '/').encode() headers.pop(":method", None) headers.pop(":scheme", None) headers.pop(":path", None) host = None port = None if method == b'CONNECT': raise NotImplementedError("CONNECT over HTTP/2 is not implemented.") if path == b'*' or path.startswith(b"/"): first_line_format = "relative" else: first_line_format = "absolute" scheme, host, port, _ = url.parse(path) if authority: host, _, port = authority.partition(b':') if not host: host = b'localhost' if not port: port = 443 if scheme == b'https' else 80 port = int(port) return first_line_format, method, scheme, host, port, path
def index(): page = request.args.get('page', 1, type=int) pagination = Ban.query().paginate(page, per_page=30, error_out=False) bans = pagination.items form = BanForm() if form.validate_on_submit(): # parse 返回的是bytes类型 scheme, netloc, port, full_path = parse(form.url.data.strip()) scheme = scheme.decode('utf-8') netloc = netloc.decode('utf-8') full_path = full_path.decode('utf-8') ban = Ban.query(netloc=netloc).first() if not ban: newban = Ban(netloc=netloc, only_netloc=form.only_netloc.data) if not form.only_netloc.data: path = Path(full_path=full_path) newban.path.append(path) db.session.add(newban) db.session.commit() else: ban.only_netloc = form.only_netloc.data if not form.only_netloc.data: path = Path(full_path=full_path) ban.path.append(path) db.session.add(ban) db.session.commit() flash(u'添加成功') return redirect(url_for('ban.index')) return render_template('ban/index.jinja', bans=bans, pagination=pagination, form=form)
def parse_headers(headers): authority = headers.get(':authority', '').encode() method = headers.get(':method', 'GET').encode() scheme = headers.get(':scheme', 'https').encode() path = headers.get(':path', '/').encode() headers.pop(":method", None) headers.pop(":scheme", None) headers.pop(":path", None) host = None port = None if path == b'*' or path.startswith(b"/"): first_line_format = "relative" elif method == b'CONNECT': # pragma: no cover raise NotImplementedError("CONNECT over HTTP/2 is not implemented.") else: # pragma: no cover first_line_format = "absolute" # FIXME: verify if path or :host contains what we need scheme, host, port, _ = url.parse(path) if authority: host, _, port = authority.partition(b':') if not host: host = b'localhost' if not port: port = 443 if scheme == b'https' else 80 port = int(port) return first_line_format, method, scheme, host, port, path
def parse_headers(headers): authority = headers.get(":authority", "").encode() method = headers.get(":method", "GET").encode() scheme = headers.get(":scheme", "https").encode() path = headers.get(":path", "/").encode() headers.pop(":method", None) headers.pop(":scheme", None) headers.pop(":path", None) host = None port = None if path == b"*" or path.startswith(b"/"): first_line_format = "relative" elif method == b"CONNECT": # pragma: no cover raise NotImplementedError("CONNECT over HTTP/2 is not implemented.") else: # pragma: no cover first_line_format = "absolute" # FIXME: verify if path or :host contains what we need scheme, host, port, _ = url.parse(path) if authority: host, _, port = authority.partition(b":") if not host: host = b"localhost" if not port: port = 443 if scheme == b"https" else 80 port = int(port) return first_line_format, method, scheme, host, port, path
def _read_request_line(rfile): try: line = _get_first_line(rfile) except exceptions.HttpReadDisconnect: # We want to provide a better error message. raise exceptions.HttpReadDisconnect("Client disconnected") try: method, path, http_version = line.split() if path == b"*" or path.startswith(b"/"): form = "relative" scheme, host, port = None, None, None elif method == b"CONNECT": form = "authority" host, port = _parse_authority_form(path) scheme, path = None, None else: form = "absolute" scheme, host, port, path = url.parse(path) _check_http_version(http_version) except ValueError: raise exceptions.HttpSyntaxException("Bad HTTP request line: {}".format(line)) return form, method, scheme, host, port, path, http_version
def _read_request_line(rfile): try: line = _get_first_line(rfile) except exceptions.HttpReadDisconnect: # We want to provide a better error message. raise exceptions.HttpReadDisconnect("Client disconnected") try: method, path, http_version = line.split() if path == b"*" or path.startswith(b"/"): form = "relative" scheme, host, port = None, None, None elif method == b"CONNECT": form = "authority" host, port = _parse_authority_form(path) scheme, path = None, None else: form = "absolute" scheme, host, port, path = url.parse(path) _check_http_version(http_version) except ValueError: raise exceptions.HttpSyntaxException( "Bad HTTP request line: {}".format(line)) return form, method, scheme, host, port, path, http_version
def __init__(self, upstream_url_resolver, proxy_magic='pywb.proxy', magic_fwd='http://localhost/', assets_path=None, is_rw=True): self.upstream_url_resolver = upstream_url_resolver self.loader = ArcWarcRecordLoader() self.proxy_magic = proxy_magic self.fwd_scheme, self.fwd_host, self.fwd_port, self.fwd_path = parse( magic_fwd) self.fwd_scheme = self.fwd_scheme.decode('latin-1') self.fwd_host = self.fwd_host.decode('latin-1') self.fwd_path = self.fwd_path.decode('latin-1') self.jinja_env = JinjaEnv(assets_path=assets_path) self.head_insert_view = HeadInsertView(self.jinja_env, 'head_insert.html', 'banner.html') self.error_view = BaseInsertView(self.jinja_env, 'error.html') self.home_redir_view = BaseInsertView(self.jinja_env, 'home.html') if is_rw: self.content_rewriter = Rewriter(is_framed_replay=False) else: self.content_rewriter = None
def add_url(url) -> None: scheme, netloc, port, full_path = parse(url) flow = Flow(scheme=scheme, netloc=netloc, port=port, full_path=full_path) db.session.add(flow) db.session.commit()
def test_ascii_check(): test_url = "https://xyz.tax-edu.net?flag=selectCourse&lc_id=42825&lc_name=茅莽莽猫氓猫氓".encode( ) scheme, host, port, full_path = url.parse(test_url) assert scheme == b'https' assert host == b'xyz.tax-edu.net' assert port == 443 assert full_path == b'/?flag%3DselectCourse%26lc_id%3D42825%26lc_name%3D%E8%8C%85%E8%8E%BD%E8%8E' \ b'%BD%E7%8C%AB%E6%B0%93%E7%8C%AB%E6%B0%93'
def _set_request_url(self, flow, postreq=''): host = flow.request.headers.get('host') if not host: host = flow.request.host homepage_redirect = None if (host == self.proxy_magic and (flow.request.path in (H_REFRESH_PATH, H_REDIR_PATH))): homepage_redirect = flow.request.path elif host == self.proxy_magic: flow.request.host = self.fwd_host flow.request.scheme = self.fwd_scheme flow.request.port = self.fwd_port flow.request.headers['X-Proxy-For'] = str( flow.client_conn.address.host) return False if host: host = flow.request.scheme + '://' + host else: host = hostport(flow.request.scheme, flow.request.host, flow.request.port) req_url = host + flow.request.path flow.request.req_url = req_url flow.request.req_scheme = flow.request.scheme result = self.upstream_url_resolver(url=quote_plus(req_url), headers=flow.request.headers, address=flow.client_conn.address, postreq=postreq) full_url, extra_data = result if homepage_redirect: url = extra_data.get('url') if url: if homepage_redirect == H_REFRESH_PATH: self.homepage_refresh(flow, url) elif homepage_redirect == H_REDIR_PATH: self.homepage_redir(flow, url) return False scheme, host, port, path = parse(full_url) flow.request.scheme = scheme flow.request.host = host flow.request.port = port flow.request.path = path flow.extra_data = extra_data return True
def parse_server_spec(spec): try: p = url.parse(spec) if p[0] not in (b"http", b"https"): raise ValueError() except ValueError: raise exceptions.OptionsError("Invalid server specification: %s" % spec) host, port = p[1:3] address = (host.decode("ascii"), port) scheme = p[0].decode("ascii").lower() return ServerSpec(scheme, address)
def parse_server_spec(spec): try: p = url.parse(spec) if p[0] not in (b"http", b"https"): raise ValueError() except ValueError: raise exceptions.OptionsError( "Invalid server specification: %s" % spec ) host, port = p[1:3] address = tcp.Address((host.decode("ascii"), port)) scheme = p[0].decode("ascii").lower() return ServerSpec(scheme, address)
def resolve(self, settings, msg=None): tokens = self.tokens[:] if self.ws: if not self.method: tokens.insert( 1, Method("get") ) for i in mitmproxy.net.websockets.client_handshake_headers().fields: if not get_header(i[0], self.headers): tokens.append( Header( base.TokValueLiteral(i[0].decode()), base.TokValueLiteral(i[1].decode()) ) ) if not self.raw: if not get_header(b"Content-Length", self.headers): if self.body: length = sum( len(i) for i in self.body.values(settings) ) tokens.append( Header( base.TokValueLiteral("Content-Length"), base.TokValueLiteral(str(length)), ) ) if settings.request_host: if not get_header(b"Host", self.headers): h = settings.request_host if self.path: path = b"".join(self.path.values({})).decode( "ascii", errors="ignore" ) try: _, h, _, _ = url.parse(path) h = h.decode("ascii", errors="ignore") except ValueError: pass tokens.append( Header( base.TokValueLiteral("Host"), base.TokValueLiteral(h) ) ) intermediate = self.__class__(tokens) return self.__class__( [i.resolve(settings, intermediate) for i in tokens] )
def test_parse(): with pytest.raises(ValueError): url.parse("") s, h, po, pa = url.parse(b"http://foo.com:8888/test") assert s == b"http" assert h == b"foo.com" assert po == 8888 assert pa == b"/test" s, h, po, pa = url.parse("http://foo/bar") assert s == b"http" assert h == b"foo" assert po == 80 assert pa == b"/bar" s, h, po, pa = url.parse(b"http://*****:*****@foo/bar") assert s == b"http" assert h == b"foo" assert po == 80 assert pa == b"/bar" s, h, po, pa = url.parse(b"http://foo") assert pa == b"/" s, h, po, pa = url.parse(b"https://foo") assert po == 443 with pytest.raises(ValueError): url.parse(b"https://foo:bar") # Invalid IDNA with pytest.raises(ValueError): url.parse("http://\xfafoo") # Invalid PATH with pytest.raises(ValueError): url.parse("http:/\xc6/localhost:56121") # Null byte in host with pytest.raises(ValueError): url.parse("http://foo\0") # Invalid IPv6 URL - see http://www.ietf.org/rfc/rfc2732.txt with pytest.raises(ValueError): url.parse('http://lo[calhost')
def url(self, val: Union[str, bytes]) -> None: val = always_str(val, "utf-8", "surrogateescape") self.scheme, self.host, self.port, self.path = url.parse(val)
def test_parse_port_range(): # Port out of range with pytest.raises(ValueError): url.parse("http://foo:999999")