def _get_filesize(self, url): count = 0 try: _res = None while (count<3): try: res = httpx.head(url) if res.status_code > 400: time.sleep(1) count += 1 else: _res = int_or_none(res.headers.get('content-length')) break except Exception as e: count += 1 except Exception as e: pass return _res
def _get_info(self, url): count = 0 try: _res = None while (count < 3): try: res = httpx.head(url, headers=self.get_param('http_headers')) if res.status_code > 400: time.sleep(1) count += 1 else: _size = int_or_none(res.headers.get('content-length')) _url = unquote(str(res.url)) if _size and _url: _res = {'url': _url, 'filesize': _size} break else: count += 1 except Exception as e: count += 1 except Exception as e: pass return _res
def test_create_upload(self): user = User.objects.create(username="******", email="*****@*****.**") workflow = Workflow.create_and_init(owner=user) create_module_zipfile( "x", spec_kwargs={"parameters": [{"id_name": "foo", "type": "file"}]} ) step = workflow.tabs.first().steps.create( order=0, slug="step-1", module_id_name="x" ) with self.assertLogs("httpx._client", level=logging.DEBUG): response = self.run_handler( create_upload, user=user, workflow=workflow, stepSlug="step-1", filename="test.csv", size=1234, ) self.assertEqual(response.error, "") # Test that response has tusUploadUrl tus_upload_url = response.data["tusUploadUrl"] self.assertRegex(tus_upload_url, "http://testtusd:8080/files/[0-9a-z]+") # Upload was created on tusd with self.assertLogs("httpx._client", level=logging.DEBUG): response = httpx.head(tus_upload_url, headers={"Tus-Resumable": "1.0.0"}) self.assertEqual(response.status_code, 200) self.assertEqual(response.headers["Tus-Resumable"], "1.0.0") self.assertEqual(response.headers["Upload-Length"], "1234") # "dGVzdC5jc3Y=" = "test.csv" self.assertIn("filename dGVzdC5jc3Y=", response.headers["Upload-Metadata"]) # "c3RlcC0x": "step-1" self.assertIn("stepSlug c3RlcC0x", response.headers["Upload-Metadata"]) # apiToken should be empty self.assertRegex(response.headers["Upload-Metadata"], "apiToken ?(?:$|,)")
def add_track_rss_entry( self, feed: FeedGenerator, track: dict, username: str, tz: str = 'America/New_York', ): """ Add a new RSS entry for the track to the feed. track is the Last.fm response to user.getRecentTracks(...)['recenttracks']['track'][i]. """ entry = feed.add_entry() title = f"{track['artist']['#text']} - {track['name']}" playcount = self.get_playcount(username, track['name'], track['artist']['#text']) if playcount: title += f" ({playcount} play{'s' if playcount > 1 else ''})" entry.title(title) entry.guid(mkguid(username, track)) entry.link(href=track['url']) entry.published( delorean.epoch(int(track['date']['uts'])).shift(tz).datetime) if 'image' in track and len(track['image']) >= 1: url = track['image'][-1]['#text'].strip() if url: r = head(url) entry.enclosure(url, r.headers['Content-Length'], r.headers['Content-Type'])
def _get_filesize(self, url): count = 0 try: _res = None while (count<10): try: res = httpx.head(url,headers={'Referer': 'https://vidoza.net', 'User-Agent': self.get_param('http_headers')['User-agent']}) if res.status_code > 400: time.sleep(1) count += 1 else: _res = int_or_none(res.headers.get('content-length')) break except Exception as e: count += 1 except Exception as e: pass return _res
async def test_http_methods(client): async with respx.mock: url = "https://foo.bar" route = respx.get(url, path="/") % 404 respx.post(url, path="/").respond(200) respx.post(url, path="/").respond(201) respx.put(url, path="/").respond(202) respx.patch(url, path="/").respond(500) respx.delete(url, path="/").respond(204) respx.head(url, path="/").respond(405) respx.options(url, path="/").respond(status_code=501) respx.request("GET", url, path="/baz/").respond(status_code=204) url += "/" response = httpx.get(url) assert response.status_code == 404 response = await client.get(url) assert response.status_code == 404 response = httpx.get(url + "baz/") assert response.status_code == 204 response = await client.get(url + "baz/") assert response.status_code == 204 response = httpx.post(url) assert response.status_code == 201 response = await client.post(url) assert response.status_code == 201 response = httpx.put(url) assert response.status_code == 202 response = await client.put(url) assert response.status_code == 202 response = httpx.patch(url) assert response.status_code == 500 response = await client.patch(url) assert response.status_code == 500 response = httpx.delete(url) assert response.status_code == 204 response = await client.delete(url) assert response.status_code == 204 response = httpx.head(url) assert response.status_code == 405 response = await client.head(url) assert response.status_code == 405 response = httpx.options(url) assert response.status_code == 501 response = await client.options(url) assert response.status_code == 501 assert route.called is True assert respx.calls.call_count == 8 * 2
def check_url_response(url: str, **kwargs: Dict) -> str: """ Shortcut to `raise_for_status` instead of fetching the whole content. One should only use this is passing URLs that are known to work is necessary. Otherwise let it fail later and avoid fetching the head. """ r = httpx.head(url, **kwargs) r.raise_for_status() return url
async def test_http_methods(client): async with respx.HTTPXMock() as httpx_mock: url = "https://foo.bar/" m = httpx_mock.get(url, status_code=404) httpx_mock.post(url, status_code=201) httpx_mock.put(url, status_code=202) httpx_mock.patch(url, status_code=500) httpx_mock.delete(url, status_code=204) httpx_mock.head(url, status_code=405) httpx_mock.options(url, status_code=501) response = httpx.get(url) assert response.status_code == 404 response = await client.get(url) assert response.status_code == 404 response = httpx.post(url) assert response.status_code == 201 response = await client.post(url) assert response.status_code == 201 response = httpx.put(url) assert response.status_code == 202 response = await client.put(url) assert response.status_code == 202 response = httpx.patch(url) assert response.status_code == 500 response = await client.patch(url) assert response.status_code == 500 response = httpx.delete(url) assert response.status_code == 204 response = await client.delete(url) assert response.status_code == 204 response = httpx.head(url) assert response.status_code == 405 response = await client.head(url) assert response.status_code == 405 response = httpx.options(url) assert response.status_code == 501 response = await client.options(url) assert response.status_code == 501 assert m.called is True assert httpx_mock.stats.call_count == 7 * 2
def is_really_active(url, code, retries=2, retry_num=0): if not url: # no point in checking if API returns it as 'oldcourse' return None print(f"Trying {url} -> ", end=" ") time.sleep(0.1) # lame rate limiting try: result = httpx.head(url, allow_redirects=True) except Exception as e: print("failed (%s)" % e, end=" ") retry_num += 1 if retry_num <= retries: print(f"- retrying ({retry_num} / {retries})") return is_really_active(url, code, retries, retry_num=retry_num) really_active = False else: correct_redirect = code.lower() in str(result.url).lower() really_active = correct_redirect and result.status_code == 200 print(f"{really_active} ({result.url}, {result.status_code})") return really_active
def test_create(self): workflow = Workflow.create_and_init() _init_module("x") step = workflow.tabs.first().steps.create( order=0, slug="step-123", module_id_name="x", file_upload_api_token="abc123", params={"file": None}, ) with self.assertLogs("httpx._client", level=logging.DEBUG): response = self.client.post( f"/api/v1/workflows/{workflow.id}/steps/step-123/files", HTTP_AUTHORIZATION="Bearer abc123", content_type="application/json", data={ "filename": "foo bar.csv", "size": 12345 }, ) self.assertEqual(response.status_code, 200) tus_upload_url = response.json()["tusUploadUrl"] # Upload was created on tusd with self.assertLogs("httpx._client", level=logging.DEBUG): response = httpx.head(tus_upload_url, headers={"Tus-Resumable": "1.0.0"}) self.assertEqual(response.status_code, 200) self.assertEqual(response.headers["Tus-Resumable"], "1.0.0") self.assertEqual(response.headers["Upload-Length"], "12345") # "Zm9vIGJhci5jc3Y=": "foo bar.csv" self.assertIn("filename Zm9vIGJhci5jc3Y=", response.headers["Upload-Metadata"]) # "c3RlcC0xMjM=": "step-123" self.assertIn("stepSlug c3RlcC0xMjM=", response.headers["Upload-Metadata"]) # "YWJjMTIz": "abc123" self.assertIn("apiToken YWJjMTIz", response.headers["Upload-Metadata"])
def add_core_fields(self, s, filename, object_data): graph = self.graph if not httpx.head(s).status_code == 200: log.error(f"The resource at {s} is not available.") raise SystemExit(1) media_type = self.config.media_types[Path(filename).suffix[1:]] creation_uuid = uuid.uuid5(self.creation_uuid_ns, media_type) creation_iri = URIRef(f"{ENTITIES_NAMESPACE}{creation_uuid}") self.creation_iris.add(creation_iri) for p, o in [ (RDF.type, crmdig["D1.Digital_Object"]), (m4p0.fileName, Literal(filename)), (edm.dataProvider, self.data_provider), (m4p0.hasMediaType, URIRef(media_type)), (crm.P94i_was_created_by, creation_iri,), ]: graph.add((s, p, o)) if "Rechtehinweis" in object_data: graph.add((s, dc.rights, Literal(object_data["Rechtehinweis"]))) elif "Lizenz" in object_data: graph.add((s, dcterms.license, URIRef(object_data["Lizenz"]))) graph.add((s, m4p0.licensor, Literal(object_data["Lizenzgeber"]))) else: raise AssertionError if "Bezugsentität" in object_data: graph.add( ( s, m4p0.refersToMuseumObject, self.create_related_entity_iri(object_data["Bezugsentität"]), ) ) if "URL" in object_data: graph.add( (s, edm.shownAt, Literal(object_data["url"], datatype=XSD.anyURI),) )
def get_info_for_format(self, url, client=None, headers=None, verify=True): try: res = None if client: res = client.head(url, headers=headers) else: _config = SeleniumInfoExtractor._CLIENT_CONFIG.copy() if not verify and _config['verify']: if headers: _config['headers'].update(headers) res = httpx.head(url, verify=False, timeout=_config['timeout'], headers=_config['headers'], follow_redirects=_config['follow_redirects']) else: res = SeleniumInfoExtractor._CLIENT.head(url, headers=headers) res.raise_for_status() #self.write_debug(f"{res.request} \n{res.request.headers}") #self.logger_debug(f"{res.request} \n{res.request.headers}") _filesize = int_or_none(res.headers.get('content-length')) _url = unquote(str(res.url)) return ({'url': _url, 'filesize': _filesize}) except Exception as e: if not res: #self.write_debug(f"{repr(e)}") self.logger_debug(f"{repr(e)}") else: #self.write_debug(f"{repr(e)} {res.request} \n{res.request.headers}") self.logger_debug(f"{repr(e)} {res.request} \n{res.request.headers}") #HTTPErrorStatus exception raised to differenciate from ExtractorError from the function in the #extractor using this method if res.status_code == 404: res.raise_for_status() raise ExtractorError(repr(e))
#!/usr/bin/env python # -*- encoding: utf-8 -*- ''' @File : 1_request_method.py @Time : 2021-02-23 @Author : EvilRecluse @Contact : https://github.com/RecluseXU @Desc : 常用的请求方法GET, POST, PUT, DELETE, HEAD, OPTIONS ''' # here put the import lib import httpx # 常用的请求方法GET, POST, PUT, DELETE, HEAD, OPTIONS r = httpx.get('https://httpbin.org/get') r = httpx.post('https://httpbin.org/post', data={'key': 'value'}) r = httpx.put('https://httpbin.org/put', data={'key': 'value'}) r = httpx.delete('https://httpbin.org/delete') r = httpx.head('https://httpbin.org/get') r = httpx.options('https://httpbin.org/get') # 设置headers headers = {'user-agent': 'my-app/0.0.1'} r = httpx.get('http://httpbin.org/headers', headers=headers) print(r.json())
requests.delete('https://gmail.com', timeout=30, verify=True) requests.delete('https://gmail.com', timeout=30, verify=False) requests.patch('https://gmail.com', timeout=30, verify=True) requests.patch('https://gmail.com', timeout=30, verify=False) requests.options('https://gmail.com', timeout=30, verify=True) requests.options('https://gmail.com', timeout=30, verify=False) requests.head('https://gmail.com', timeout=30, verify=True) requests.head('https://gmail.com', timeout=30, verify=False) httpx.request('GET', 'https://gmail.com', verify=True) httpx.request('GET', 'https://gmail.com', verify=False) httpx.get('https://gmail.com', verify=True) httpx.get('https://gmail.com', verify=False) httpx.options('https://gmail.com', verify=True) httpx.options('https://gmail.com', verify=False) httpx.head('https://gmail.com', verify=True) httpx.head('https://gmail.com', verify=False) httpx.post('https://gmail.com', verify=True) httpx.post('https://gmail.com', verify=False) httpx.put('https://gmail.com', verify=True) httpx.put('https://gmail.com', verify=False) httpx.patch('https://gmail.com', verify=True) httpx.patch('https://gmail.com', verify=False) httpx.delete('https://gmail.com', verify=True) httpx.delete('https://gmail.com', verify=False) httpx.stream('https://gmail.com', verify=True) httpx.stream('https://gmail.com', verify=False) httpx.Client() httpx.Client(verify=False) httpx.AsyncClient() httpx.AsyncClient(verify=False)
def from_html(url, suffix=None, suffix_depth=0, url_depth=0): '''parse urls from html website Parameters: ----------- url: str the website contatins datas suffix: list, optional data format. suffix should be a list contains multipart. if suffix_depth is 0, all '.' will parsed. Examples: when set 'suffix_depth=0': suffix of 'xxx8.1_GLOBAL.nc' should be ['.1_GLOBAL', '.nc'] suffix of 'xxx.tar.gz' should be ['.tar', '.gz'] when set 'suffix_depth=1': suffix of 'xxx8.1_GLOBAL.nc' should be ['.nc'] suffix of 'xxx.tar.gz' should be ['.gz'] suffix_depth: interger Number of suffixes url_depth: interger depth of url in website will parsed Return: ------- a list contains urls Example: -------- >>> from downloader import parse_urls >>> url = 'https://cds-espri.ipsl.upmc.fr/espri/pubipsl/iasib_CH4_2014_uk.jsp' >>> urls = parse_urls.from_html(url, suffix=['.nc'], suffix_depth=1) >>> urls_all = parse_urls.from_html(url, suffix=['.nc'], suffix_depth=1, url_depth=1) >>> print(len(urls_all)-len(urls)) ''' def match_suffix(href, suffix): if suffix: sf = Path(href).suffixes[-suffix_depth:] return suffix == sf else: return True r_h = httpx.head(url) if 'text/html' in r_h.headers['Content-Type']: r = httpx.get(url) soup = BeautifulSoup(r.text, 'html.parser') a = soup.find_all('a') urls_all = [urljoin(url, i['href']) for i in a if i.has_key('href')] urls = [i for i in urls_all if match_suffix(i, suffix)] if url_depth > 0: urls_notdata = sorted(set(urls_all) - set(urls)) urls_depth = [ from_html(_url, suffix, suffix_depth, url_depth - 1) for _url in urls_notdata ] for u in urls_depth: if isinstance(u, list): urls.extend(u) return sorted(set(urls))
def test_head(server): response = httpx.head(server.url) assert response.status_code == 200 assert response.reason_phrase == "OK"
def _get_filesize(self, _vurl): res = httpx.head(_vurl, follow_redirects=True) res.raise_for_status() return (int_or_none(res.headers.get('content-length')))
def test_head(server): response = httpx.head("http://127.0.0.1:8000/") assert response.status_code == 200 assert response.reason_phrase == "OK"
#!/usr/bin/env python # -*- encoding: utf-8 -*- ''' @File : 2_6_redirection.py @Time : 2021-02-23 @Author : EvilRecluse @Contact : https://github.com/RecluseXU @Desc : 重定向 ''' # here put the import lib import httpx # 由http跳转到https r = httpx.get('http://github.com/') print(r.url) print(r.status_code) print(r.history) # 如果不想要跳转,那么可以设置 r = httpx.get('http://github.com/', allow_redirects=False) print(r.status_code) print(r.history) # 在使用head方式发送请求时,也能用这个参数来启用跳转 r = httpx.head('http://github.com/', allow_redirects=True) print(r.url) print(r.history)
def test_erddap2_10(): """Check regression for ERDDAP 2.10.""" e = ERDDAP(server="http://erddap.ioos.us/erddap/") url = e.get_search_url(search_for="NOAA", response="csv") r = httpx.head(url) assert r.raise_for_status() is None