def test_url_copywith_invalid_component(): url = httpx.URL("https://example.org") with pytest.raises(TypeError): url.copy_with(pathh="/incorrect-spelling") with pytest.raises(TypeError): url.copy_with(userinfo="should be bytes")
def test_not_same_origin(): origin1 = httpx.URL("https://example.com") origin2 = httpx.URL("HTTP://EXAMPLE.COM") assert not same_origin(origin1, origin2)
("https://foo/", "foo=bar", "https://foo/", "foo=bar"), ("https://foo/", b"foo=bar", "https://foo/", b"foo=bar"), ("https://foo/", [("foo", "bar")], "https://foo/", [("foo", "bar")]), ("https://foo/", { "foo": "bar" }, "https://foo/", { "foo": "bar" }), ("https://foo/", (("foo", "bar"), ), "https://foo/", (("foo", "bar"), )), ("https://foo?foo=bar", "baz=qux", "https://foo?foo=bar", "baz=qux"), ("https://foo?foo=bar", "baz=qux", "https://foo?foo=bar&baz=qux", None), (re.compile(r"https://foo/(\w+)/"), "foo=bar", "https://foo/bar/", "foo=bar"), (httpx.URL("https://foo/"), "foo=bar", "https://foo/", "foo=bar"), ( httpx.URL("https://foo?foo=bar"), "baz=qux", "https://foo?foo=bar&baz=qux", None, ), ], ) async def test_params_match(client, url, params, call_url, call_params): respx.get(url, params=params) % dict(content="spam spam") response = await client.get(call_url, params=call_params) assert response.text == "spam spam" @pytest.mark.asyncio
def __init__(self, client, dynamo_base=None): if dynamo_base is None: dynamo_base = Dynamo.defaults["dynamo_base"] self.client = client self.baseurl = httpx.URL(dynamo_base)
def __init__(self, client, dbs_base=None): if dbs_base is None: dbs_base = DBS.defaults["dbs_base"] self.client = client self.baseurl = httpx.URL(dbs_base)
def _real_extract(self, url): self.report_extraction(url) driver = self.get_driver() try: with GayStreamIE._LOCK: driver.get(url) el_over = self.wait_until( driver, 60, ec.presence_of_element_located((By.CSS_SELECTOR, "a.boner"))) if el_over: el_over.click() el_ifr = self.wait_until( driver, 60, ec.presence_of_element_located((By.ID, "ifr"))) _entry_video = {} if el_ifr: url_ifr = el_ifr.get_attribute("src") _url_ifr = httpx.URL(url_ifr) url_post = url_ifr.replace('/v/', '/api/source/') data_post = {'r': "https://gaystream.pw/", 'd': _url_ifr.host} headers_post = { 'Referer': url_ifr, 'Origin': f'{_url_ifr.scheme}://{_url_ifr.host}' } self.wait_until(driver, randint(3, 5)) info = self.get_info_video(url, url_post, data_post, headers_post, driver) self.to_screen(f'{url}:{url_post}\n{info}') _formats = [] if info: for vid in info.get('data'): _url = vid.get('file') _info_video = self.get_info_for_format(_url) if not _info_video: raise ExtractorError(f"[{_url}] no video info") _formats.append({ 'format_id': vid.get('label'), 'url': _info_video.get('url'), 'resolution': vid.get('label'), 'height': int_or_none(vid.get('label')[:-1]), 'filesize': _info_video.get('filesize'), 'ext': 'mp4' }) if _formats: self._sort_formats(_formats) _videoid = self._match_id(url) _title = driver.title.replace("Watch", "").replace( "on Gaystream.pw", "").strip() _entry_video = { 'id': _videoid, 'title': sanitize_filename(_title, restricted=True), 'formats': _formats, 'ext': 'mp4' } self.to_screen(f'{url}\n{_entry_video}') if not _entry_video: raise ExtractorError("no video info") else: return _entry_video except ExtractorError as e: raise except Exception as e: lines = traceback.format_exception(*sys.exc_info()) self.to_screen(f"{repr(e)} {str(e)} \n{'!!'.join(lines)}") raise ExtractorError(str(e)) finally: try: self.rm_driver(driver) except Exception: pass
def test_should_not_be_proxied(url, no_proxy, expected): os.environ.update(no_proxy) parsed_url = httpx.URL(url) assert should_not_be_proxied(parsed_url) == expected
def test_ipv6_url(): url = httpx.URL("http://[::ffff:192.168.0.1]:5678/") assert url.host == "::ffff:192.168.0.1" assert url.netloc == "[::ffff:192.168.0.1]:5678"
def test_ipv6_url_copy_with_host(url_str, new_host): url = httpx.URL(url_str).copy_with(host=new_host) assert url.host == "::ffff:192.168.0.1" assert url.netloc == "[::ffff:192.168.0.1]:1234" assert str(url) == "http://[::ffff:192.168.0.1]:1234"
def test_url_invalid_type(): class ExternalURLClass: # representing external URL class pass with pytest.raises(TypeError): httpx.URL(ExternalURLClass()) # type: ignore
def test_url_with_url_encoded_path(): url = httpx.URL("https://www.example.com/path%20to%20somewhere") assert url.path == "/path to somewhere" assert url.query == b"" assert url.raw_path == b"/path%20to%20somewhere"
def test_url_invalid(): with pytest.raises(httpx.InvalidURL): httpx.URL("https://😇/")
def test_url_copywith_query(): url = httpx.URL("https://example.org") url = url.copy_with(query=b"a=123") assert url.path == "/" assert url.query == b"a=123" assert url.raw_path == b"/?a=123"
def test_url_copywith_urlencoded_path(): url = httpx.URL("https://example.org") url = url.copy_with(path="/path to somewhere") assert url.path == "/path to somewhere" assert url.query == b"" assert url.raw_path == b"/path%20to%20somewhere"
def test_merge_relative_url_with_dotted_path(): client = httpx.Client(base_url="https://www.example.com/some/path") request = client.build_request("GET", "../testing/123") assert request.url == httpx.URL("https://www.example.com/some/testing/123")
def test_idna_url(given, idna, host, scheme, port): url = httpx.URL(given) assert url == httpx.URL(idna) assert url.host == host assert url.scheme == scheme assert url.port == port
def url(self) -> httpx.URL: """The relative URL for sending the request as an HTTP GET.""" return httpx.URL(self.endpoint, params=self.params)
def test_url_eq_str(): url = httpx.URL("https://example.org:123/path/to/somewhere?abc=123#anchor") assert url == "https://example.org:123/path/to/somewhere?abc=123#anchor" assert str(url) == url
def __init__(self, client, reqmgr_base=None): if reqmgr_base is None: reqmgr_base = ReqMgr.defaults["reqmgr_base"] self.client = client self.baseurl = httpx.URL(reqmgr_base)
def join_url(url: str, ref: str = '.') -> str: return str(httpx.URL(url).join(ref))
def __init__(self, client, mcm_base=None): if mcm_base is None: mcm_base = McM.defaults["mcm_base"] self.client = client self.baseurl = httpx.URL(mcm_base)
def __init__(self, client, unified_base=None): if unified_base is None: unified_base = Unified.defaults["unified_base"] self.client = client self.baseurl = httpx.URL(unified_base)
def to_production_url(url: str) -> str: urlobj = httpx.URL(url) return str( urlobj.copy_with(scheme="https", host="florimond.dev", port=None))
async def feed(request: Request) -> Response: # If this client has requested this page from us before and we gave them # our special ETag for an archive page, then we can immediately conclude # that whatever response we gave them before is fine. if any( ARCHIVE_ETAG in get_list(v) for v in request.headers.getlist("If-None-Match") ): # XXX: https://tools.ietf.org/html/rfc7232#section-4.1 says "The server # generating a 304 response MUST generate any of the following header # fields that would have been sent in a 200 (OK) response to the same # request: Cache-Control, Content-Location, Date, ETag, Expires, and # Vary," but we don't have most of those available without forwarding # the request upstream. We're acting sort of like a cache but because # we're stateless we don't have a stored response to use to satisfy # this requirement. However we can at least tell the client which of # the ETags it asked about is the right one. return Response(status_code=304, headers={"ETag": ARCHIVE_ETAG}) # Allow clients to either URL-encode query parameters in the path or append # them unquoted. url = httpx.URL(request.path_params["url"], request.query_params) expected_hash = next((v for k, v in parse_qsl(url.query) if k == b"modified"), None) # No matter what, make sure the feed is sorted by modification time. Even # the current feed must change whenever any post is modified. url = update_query(url, modified=None, orderby="modified") # Forward a limited set of headers from the client request. # Enumerating a complete block-list is too hard, so block anything # that isn't specifically allowed. request_headers = httpx.Headers() for header in FORWARD_REQUEST_HEADERS: value = request.headers.get(header) if value is not None: request_headers[header] = value async with http_client.stream("GET", url, headers=request_headers) as doc: response_headers = doc.headers.copy() # Remove the standard hop-by-hop headers, the `Connection` # header and any hop-by-hop headers it names, and anything else # that would interfere with our transformations. remove_headers( response_headers, *CONNECTION_HEADERS.union( header.lower() for header in get_list(response_headers.get("connection")) ), ) assert doc.url is not None url = doc.url if "content-location" not in response_headers: response_headers["content-location"] = str(url) if doc.status_code != 200: return Response( status_code=doc.status_code, headers=dict(response_headers.items()) ) if "https://api.w.org/" not in doc.links: raise HTTPException(403, "Not a WordPress site") try: raw_content_type = response_headers["content-type"] content_type = raw_content_type.split(";", 1)[0].strip() except KeyError: raise HTTPException(502, "Origin didn't provide a Content-Type") try: contents_hash = sha256() parser = XMLParser() async for chunk in doc.aiter_bytes(): contents_hash.update(chunk) parser.feed(chunk) root = parser.close() except ElementTree.ParseError: raise HTTPException(406, "Unsupported non-XML feed format") actual_hash = urlsafe_b64encode(contents_hash.digest()) if expected_hash is not None and expected_hash != actual_hash: raise HTTPException(410, "Page contents changed") feed_element = None if root.tag == "{" + NAMESPACES["atom"] + "}feed": feed_element = root elif root.tag == "rss": feed_element = root.find("./channel", NAMESPACES) if feed_element is None: raise HTTPException(406, f"Unsupported feed format {root.tag}") if any(feed_element.find(tag, NAMESPACES) for tag in HISTORY_TAGS): raise HTTPException(403, "Already an RFC5005 feed, no proxy needed") new_elements = [] query = dict(parse_qsl(url.query)) # If the client provided an Authorization header or something then we # should forward that for all sub-requests too. But we can't allow "not # modified" or "partial content" responses: we need the entire thing. remove_headers(request_headers, "if-none-match", "if-modified-since") if query.get(b"order") == b"ASC": # This is a legit archive page. We just need to construct an # appropriate URL for the next-oldest archive page and insert it. try: page = int(query.get(b"paged", 1)) except ValueError: raise HTTPException(400, "Invalid 'paged' parameter") new_elements.append(element("fh", "archive")) current_url = update_query(url, modified=None, order=None, paged=None) new_elements.append( element( "atom", "link", href=str(current_url), rel="current", type=content_type, ) ) if page > 1: prev_url = await hash_page( url, request_headers, actual_hash, raw_content_type, page - 1 ) new_elements.append( element( "atom", "link", href=str(prev_url), rel="prev-archive", type=content_type, ) ) # Archive pages are indefinitely cachable because we'll point the # client to a different URL if the contents should change. response_headers["etag"] = ARCHIVE_ETAG for header in ("last-modified", "expires"): try: del response_headers[header] except KeyError: pass # This feed might still not be okay for a shared cache to store though; # check for any existing Cache-Control directives. cc = { directive for directive in get_list(response_headers.get("cache-control")) if not directive.startswith("s-max-age=") if not directive.startswith("max-age=") } if "private" not in cc: cc.add("public") # Cache this response for up to a year and don't revalidate it. cc.discard("no-store") cc.add('max-age="31536000"') cc.add("immutable") response_headers["cache-control"] = ", ".join(cc) elif b"order" in query or b"paged" in query: # Refuse to process non-archive feeds that don't have the newest entries. raise HTTPException(403) else: # We need to treat this as the main feed document, which means now we # need to know how many pages WordPress is going to break this feed # into so we can link to the last of them. url = update_query(url, order="ASC") last_page = await exponential_search( partial(page_exists, url, request_headers, raw_content_type) ) if last_page == 1: # This is a complete feed, no pagination needed. new_elements.append(element("fh", "complete")) else: # The current document has all the posts of the final page, and # possibly a few extra if the number of posts per page doesn't # evenly divide into the total number of posts. Either way we # should link to the page before the final one. prev_url = await hash_page( url, request_headers, actual_hash, raw_content_type, last_page - 1 ) new_elements.append( element( "atom", "link", rel="prev-archive", type=content_type, href=str(prev_url), ) ) for e in reversed(new_elements): feed_element.insert(0, e) return Response( ElementTree.tostring(root), media_type=content_type, headers=dict(response_headers.items()), )
import logging import httpx import pytest from collector import IHSClient from tests.utils import MockAsyncDispatch logger = logging.getLogger(__name__) pytestmark = pytest.mark.asyncio base_url = httpx.URL("http://127.0.0.1") @pytest.fixture def well_dispatcher(): yield MockAsyncDispatch({ "data": [ { "a": 1, "b": 2, "c": 3 }, { "a": 3, "b": 4, "c": 5 }, { "a": 5,
def test_merge_absolute_url(): client = httpx.Client(base_url="https://www.example.com/") request = client.build_request("GET", "http://www.example.com/") assert request.url == httpx.URL("http://www.example.com/") with pytest.warns(DeprecationWarning): assert not request.url.is_ssl
def test_url_matches(pattern, url, expected): pattern = URLPattern(pattern) assert pattern.matches(httpx.URL(url)) == expected
def test_merge_relative_url(): client = httpx.Client(base_url="https://www.example.com/") request = client.build_request("GET", "/testing/123") assert request.url == httpx.URL("https://www.example.com/testing/123")
async def _async_request_hook(span: "Span", request: "RequestInfo"): url = httpx.URL(request[1]) span.update_name("GET" + str(url))
def __init__(self, client, msmgr_base=None): if msmgr_base is None: msmgr_base = MSMgr.defaults["msmgr_base"] self.client = client self.baseurl = httpx.URL(msmgr_base)