async def test_lost_spans(fake_zipkin: Any, loop: asyncio.AbstractEventLoop) -> None: endpoint = az.create_endpoint("simple_service", ipv4="127.0.0.1", port=80) tr = azt.Transport( fake_zipkin.url, send_interval=0.01, send_max_size=100, send_attempt_count=2, send_timeout=ClientTimeout(total=1), ) fake_zipkin.next_errors.append("disconnect") fake_zipkin.next_errors.append("disconnect") tracer = await az.create_custom(endpoint, tr) with tracer.new_trace(sampled=True) as span: span.name("root_span") span.kind(az.CLIENT) await asyncio.sleep(1) await tracer.close() data = fake_zipkin.get_received_data() assert len(data) == 0
async def test_retry(fake_zipkin, loop): endpoint = az.create_endpoint('simple_service', ipv4='127.0.0.1', port=80) tr = azt.Transport(fake_zipkin.url, send_interval=0.01, send_max_size=100, send_attempt_count=3, send_timeout=ClientTimeout(total=1)) fake_zipkin.next_errors.append('disconnect') fake_zipkin.next_errors.append('timeout') waiter = fake_zipkin.wait_data(1) tracer = await az.create_custom(endpoint, tr) with tracer.new_trace(sampled=True) as span: span.name('root_span') span.kind(az.CLIENT) await waiter await tracer.close() data = fake_zipkin.get_received_data() trace_id = span.context.trace_id assert any(s['traceId'] == trace_id for trace in data for s in trace), data
async def test_batches(fake_zipkin, loop): endpoint = az.create_endpoint('simple_service', ipv4='127.0.0.1', port=80) tr = azt.Transport(fake_zipkin.url, send_interval=0.01, send_max_size=2, send_timeout=ClientTimeout(total=1)) tracer = await az.create_custom(endpoint, tr) with tracer.new_trace(sampled=True) as span: span.name('root_span') span.kind(az.CLIENT) with span.new_child('child_1', az.CLIENT): pass with span.new_child('child_2', az.CLIENT): pass # close forced sending data to server regardless of send interval await tracer.close() data = fake_zipkin.get_received_data() trace_id = span.context.trace_id assert len(data[0]) == 2 assert len(data[1]) == 1 assert data[0][0]['name'] == 'child_1' assert data[0][1]['name'] == 'child_2' assert data[1][0]['name'] == 'root_span' assert any(s['traceId'] == trace_id for trace in data for s in trace), data
async def test_send_full_batch(fake_zipkin, loop): endpoint = az.create_endpoint('simple_service', ipv4='127.0.0.1', port=80) tr = azt.Transport(fake_zipkin.url, send_interval=60, send_max_size=2, send_timeout=ClientTimeout(total=1)) tracer = await az.create_custom(endpoint, tr) waiter = fake_zipkin.wait_data(1) with tracer.new_trace(sampled=True) as span: span.name('root_span') span.kind(az.CLIENT) await asyncio.sleep(1, loop=loop) data = fake_zipkin.get_received_data() assert len(data) == 0 with tracer.new_trace(sampled=True) as span: span.name('root_span') span.kind(az.CLIENT) # batch is full here await waiter data = fake_zipkin.get_received_data() assert len(data) == 1 # close forced sending data to server regardless of send interval await tracer.close()
def _session(self) -> ClientSession: return ClientSession(connector=ProxyConnector.from_url( self.http_proxy), raise_for_status=True, timeout=ClientTimeout(total=60, connect=30, sock_read=10))
async def run(self) -> None: self._request_count = 0 self._cookie_jar = aiohttp.CookieJar() self._load_cookies() async with aiohttp.ClientSession( headers={"User-Agent": f"{NAME}/{VERSION}"}, cookie_jar=self._cookie_jar, connector=aiohttp.TCPConnector(ssl=ssl.create_default_context( cafile=certifi.where())), timeout=ClientTimeout( # 30 minutes. No download in the history of downloads was longer than 30 minutes. # This is enough to transfer a 600 MB file over a 3 Mib/s connection. # Allowing an arbitrary value could be annoying for overnight batch jobs total=15 * 60, connect=self._http_timeout, sock_connect=self._http_timeout, sock_read=self._http_timeout, )) as session: self.session = session try: await super().run() finally: del self.session log.explain_topic( f"Total amount of HTTP requests: {self._request_count}") # They are saved in authenticate, but a final save won't hurt self._save_cookies()
def __init__(self, service_url: str) -> dict: self.service_url = service_url jar = aiohttp.CookieJar(unsafe=True) self.session = aiohttp.ClientSession( timeout=ClientTimeout(total=10), headers={"User-Agent": get_agent()}, cookie_jar=jar)
async def test_batches(fake_zipkin: Any, loop: asyncio.AbstractEventLoop) -> None: endpoint = az.create_endpoint("simple_service", ipv4="127.0.0.1", port=80) tr = azt.Transport( fake_zipkin.url, send_interval=0.01, send_max_size=2, send_timeout=ClientTimeout(total=1), ) tracer = await az.create_custom(endpoint, tr) with tracer.new_trace(sampled=True) as span: span.name("root_span") span.kind(az.CLIENT) with span.new_child("child_1", az.CLIENT): pass with span.new_child("child_2", az.CLIENT): pass # close forced sending data to server regardless of send interval await tracer.close() data = fake_zipkin.get_received_data() trace_id = span.context.trace_id assert len(data[0]) == 2 assert len(data[1]) == 1 assert data[0][0]["name"] == "child_1" assert data[0][1]["name"] == "child_2" assert data[1][0]["name"] == "root_span" assert any(s["traceId"] == trace_id for trace in data for s in trace), data
async def async_download_file(self, url: str) -> bytes | None: """Download files, and return the content.""" if url is None: return None tries_left = 5 if "tags/" in url: url = url.replace("tags/", "") self.log.debug("Downloading %s", url) while tries_left > 0: try: request = await self.session.get(url=url, timeout=ClientTimeout(total=60)) # Make sure that we got a valid result if request.status == 200: return await request.read() raise HacsException( f"Got status code {request.status} when trying to download {url}" ) except BaseException as exception: # lgtm [py/catch-base-exception] pylint: disable=broad-except self.log.debug("Download failed - %s", exception) tries_left -= 1 await asyncio.sleep(1) continue return None
async def put_resource(self, api_path, data, **kwargs): """ Helper method for HTTP PUT API requests. Args: api_path(str): REST API path data: JSON data for POST request Keyword Args: kwargs: keyword args used for replacing items in the API path """ self.logger.info('') put_headers = {'Content-Type': 'application/json', **self.headers} url = self.build_api_url( api_path.format(tenant=self.tenant, controllerId=self.controller_id, **kwargs)) self.logger.debug('PUT {}'.format(url)) self.logger.debug(json.dumps(data)) async with self.session.put(url, headers=put_headers, data=json.dumps(data), timeout=ClientTimeout( self.timeout)) as resp: await self.check_http_status(resp)
async def main(): try: loop = asyncio.get_event_loop() async with aiohttp.ClientSession(timeout=ClientTimeout(total=10 * 60)) as session: if not os.path.exists(ALL_LINKS_PICKLE_PATH): html = await fetch_article_header_links(session, loop) links = await loop.run_in_executor(POOL, get_links, html) all_links = await asyncio.gather( *(fetch_article_links(session, link) for link in links) ) all_links = dict(ChainMap(*all_links)) print(f"{len(all_links)} links found.") with open(ALL_LINKS_PICKLE_PATH, "w+", encoding="utf-8") as link_handler: print(f"Writing into file: {ALL_LINKS_PICKLE_PATH}") # pickle.dump(all_links, link_handler) json.dump(all_links, link_handler, ensure_ascii=False, indent=4) print(f"{len(all_links)} links written.") else: with open(ALL_LINKS_PICKLE_PATH) as ph: all_links = json.load(ph) print(f"{len(all_links)} links found by reading the file.") loop = asyncio.get_running_loop() loop.run_until_complete( await asyncio.gather( *(processor(all_links, title) for title in all_links) ) ) # for title in all_links: # await processor(session, loop, all_links, title) # loop.close() except Exception as e: print(f"failed with error: {e}")
async def get_credential(self) -> Credential: """Function returning asynchronously a credential, either from local cache or from cooker. Raises: CookerConnectionError: error thrown if the client could not connect to auth or credential endpoints. CookerTokenError: error thrown if the client fails to retrieve a token from the auth endpoint. CookerResponseError: error thrown if the client received an invalid response from the credential endpoint (40x or 50x). Returns: Credential: credential retrieved from from local cache or from cooker """ if self._timer() > self._cache_expiration: # NOTE : this code is not protected with a lock, thus several requests could cache-miss at the same tim. # This is not cpu-costly at time of writting, and the last to enter will overwrite the cache which is ok # Warning : this does not decode the policy, thus does not check for bad cached value async with aiohttp.ClientSession( timeout=ClientTimeout(self.client_timeout)) as session: token = await self._retrieve_token(session, self.client_id, self.client_secret) self._cached_credential = await self._retrieve_credential( session, token) self._cache_expiration = self._timer() + self.cache_ttl return self._cached_credential
def __init__(self, LOOPHOLES: Loopholes): self.LOOPHOLES = LOOPHOLES self.timeout = ClientTimeout(total=30, connect=10, sock_connect=10, sock_read=10) self.tran_count = 0
async def test_retry(fake_zipkin: Any, loop: asyncio.AbstractEventLoop) -> None: endpoint = az.create_endpoint("simple_service", ipv4="127.0.0.1", port=80) tr = azt.Transport( fake_zipkin.url, send_interval=0.01, send_max_size=100, send_attempt_count=3, send_timeout=ClientTimeout(total=1), ) fake_zipkin.next_errors.append("disconnect") fake_zipkin.next_errors.append("timeout") waiter = fake_zipkin.wait_data(1) tracer = await az.create_custom(endpoint, tr) with tracer.new_trace(sampled=True) as span: span.name("root_span") span.kind(az.CLIENT) await waiter await tracer.close() data = fake_zipkin.get_received_data() trace_id = span.context.trace_id assert any(s["traceId"] == trace_id for trace in data for s in trace), data
async def get_resource(self, api_path, query_params={}, **kwargs): """ Helper method for HTTP GET API requests. Args: api_path(str): REST API path Keyword Args: query_params: Query parameters to add to the API URL kwargs: Other keyword args used for replacing items in the API path Returns: Response JSON data """ self.logger.info('') get_headers = {'Accept': 'application/json', **self.headers} url = self.build_api_url( api_path.format(tenant=self.tenant, controllerId=self.controller_id, **kwargs)) self.logger.debug('GET {} {}'.format(url, get_headers)) async with self.session.get(url, headers=get_headers, params=query_params, timeout=ClientTimeout( self.timeout)) as resp: await self.check_http_status(resp) json = await resp.json() self.logger.debug(json) return json
async def send(self, endpoint: str, as_token: str, log: logging.Logger) -> None: if not endpoint: return try: headers = { "Authorization": f"Bearer {as_token}", "User-Agent": HTTPAPI.default_ua } async with aiohttp.ClientSession() as sess, sess.post( endpoint, json={"checkpoints": [self.serialize()]}, headers=headers, timeout=ClientTimeout(30), ) as resp: if not 200 <= resp.status < 300: text = await resp.text() text = text.replace("\n", "\\n") log.warning( f"Unexpected status code {resp.status} sending checkpoints " f"for {self.event_id}: {text}") else: log.info( f"Successfully sent checkpoint for {self.event_id} (step: {self.step})" ) except Exception as e: log.warning(f"Failed to send checkpoint for {self.event_id}: " f"{type(e).__name__}: {e}")
def __init__(self, hostname): self.hostname = hostname self.url = f'http://{hostname}:{PORT}' self.session = aiohttp.ClientSession( timeout=ClientTimeout(total=100), headers={"User-Agent": get_agent()}, cookie_jar=aiohttp.CookieJar(unsafe=True))
class HaVersionSupervisor(HaVersionBase): """Handle versions for the Supervisor source.""" def validate_input(self) -> None: """Raise HaVersionInputException if expected input are missing.""" if self.session is None: raise HaVersionInputException("Missing aiohttp.ClientSession") if self.image is None: self.image = "default" async def fetch(self, **kwargs): """Logic to fetch new version data.""" headers = DEFAULT_HEADERS if (etag := kwargs.get("etag")) is not None: headers[IF_NONE_MATCH] = etag request = await self.session.get( url=URL.format(channel=self.channel), headers=headers, timeout=ClientTimeout(total=self.timeout), ) self._etag = request.headers.get("Etag") if request.status == 304: raise HaVersionNotModifiedException self._data = await request.json()
async def test_scicrunch_resolves_all_valid_rrids(name: str, rrid: str, settings: SciCrunchSettings): async with ClientSession(timeout=ClientTimeout(total=30)) as client: resolved = await resolve_rrid(rrid, client, settings) assert resolved assert isinstance(resolved, ResolvedItem) if resolved.is_unique: assert name in resolved.proper_citation assert rrid in resolved.proper_citation # NOTE: proper_citation does not seem to have a standard format. # So far I found four different formats!! :-o if not name: # only rrid with a prefix assert resolved.proper_citation == f"RRID:{rrid}" else: # includes name and rrid # # NOTE: why CELL_LINE_CITATIONS are removed from test parametrization ? # Since Sep.2021, test is not repeatable since the list order returned by # https://scicrunch.org/resolver/RRID:CVCL_0033.json changes per call and # sometimes (BCRJ Cat# 0226, RRID:CVCL_0033) appears as first hit instead # of the reference in CELL_LINE_CITATIONS # assert resolved.proper_citation in ( f"({name}, RRID:{rrid})", f"({name},RRID:{rrid})", f"{name} (RRID:{rrid})", )
async def _make_request(self, context: FContext, payload): """ Helper method to make a request over the network. Args: payload: The data to be sent over the network. Return: The status code and body of the response. Throws: TTransportException if the request timed out. """ # construct headers for request request_headers = {} if self._get_request_headers is not None: request_headers = self._get_request_headers(context) # apply the default headers so their values cannot be modified request_headers.update(self._headers) timeout = ClientTimeout(total=context.timeout / 1000) async with ClientSession(timeout=timeout) as session: try: async with session.post(self._url, data=payload, headers=request_headers) \ as response: return response.status, await response.content.read() except ServerTimeoutError: raise TTransportException( type=TTransportExceptionType.TIMED_OUT, message='request timed out')
def __init__(self, hostname: str, custom_headers=None): self.hostname = hostname self.base_url = f'http://{self.hostname}:{PORT}' custom_headers = custom_headers or {} headers = {} headers.update(custom_headers) self.session = aiohttp.ClientSession( cookie_jar=aiohttp.CookieJar(unsafe=True), timeout=ClientTimeout(total=10), headers=headers)
async def download(book_id: int, file_type: str, type_: int = 0, retry: int = 3): # type 0: flibusta.is # type 1: flibustahezeous3.onion while retry > 0: url: str = "" connector = None if type_ == 0: basic_url = "http://flibusta.is" elif type_ == 1: basic_url = "http://flibustahezeous3.onion" else: raise Exception() if type_ == 1: connector = ProxyConnector.from_url(Config.TOR_PROXIES) if file_type in ("fb2", "epub", "mobi"): url = basic_url + f"/b/{book_id}/{file_type}" else: url = basic_url + f"/b/{book_id}/download" try: async with aiohttp.ClientSession(timeout=ClientTimeout( total=10 * 60, sock_connect=2 * 60), connector=connector) as session: async with session.get( url, allow_redirects=True, max_redirects=50 ) as resp: # type: aiohttp.ClientResponse if resp.headers.get( "Content-Type" ) and "text/html" in resp.headers.get( "Content-Type") or resp.status != 200: raise NotBookException("NotBookException") if resp.headers.get("Content-Type") == "application/zip": return await asyncio.get_event_loop().run_in_executor( process_pool_executor, unzip, await resp.read(), file_type) return await resp.content.read() except (aiohttp.ServerDisconnectedError, aiohttp.ClientOSError, aiohttp.ClientPayloadError, aiohttp.ClientConnectorError, zipfile.BadZipFile, FileNotFoundError, ProxyTimeoutError, NotBookException) as e: print(e) type_ += 1 if type_ >= 2: type_ = 0 retry -= 1 return await manual_convert(book_id, file_type)
async def request(self, method, url, data=None, headers=None, files=None, timeout=5, timeout_retry=3, decode_json=True, return_binary=False, **kwargs): for _ in range(timeout_retry): try: post_data = data if files is None: if isinstance(data, dict) or isinstance(data, list): post_data = json.dumps(data, ensure_ascii=False) else: post_data = FormData() if data: for k, v in data.items(): post_data.add_field(k, v) file_args, fileobs = convert_file_args(files) for name, (filename, fileobj, mimetype) in file_args.items(): post_data.add_field(name, fileobj, content_type=mimetype, filename=filename) func = getattr(self.session, method) async with func(url, data=post_data, headers=headers, timeout=ClientTimeout(timeout), **kwargs) as resp: if resp.status != 200: self.logger.error( "[%s] url[%s], data[%s] headers[%s] kwargs[%s] failed,"\ " code[%d], resp[%s]", method, url, post_data, headers, kwargs, resp.status, resp) return None if return_binary: result = await resp.content.read() else: result = await resp.text(encoding='UTF-8') if decode_json: result = json.loads(result) return result except asyncio.TimeoutError: self.logger.warning( "[%s] url[%s], data[%s] headers[%s] kwargs[%s] timeout", method, url, data, headers, kwargs) else: self.logger.error("[%s] url[%s], timeout after retry [%d] times", method, url, timeout_retry)
async def is_registry_responsive(app: web.Application) -> bool: path = "/v2/" try: await registry_request(app, path, no_cache=True, timeout=ClientTimeout(total=1.0)) return True except (exceptions.DirectorException, asyncio.TimeoutError) as exc: logger.debug("Registry not responsive: %s", exc) return False
async def fetch(self, **kwargs): """Logic to fetch new version data.""" request = await self.session.get( url=kwargs.get("url", URL.format(image=self.image)), headers=DEFAULT_HEADERS, timeout=ClientTimeout(total=self.timeout), ) self._data = await request.json() self.parse() if not self.version and (next_url := self.data.get("next")): await self.fetch(**{"url": next_url})
async def _multi_update_coordinates(offices: Iterable[ExchangeOffice]): async with ClientSession( connector=TCPConnector(limit=3), timeout=ClientTimeout( total=DEFAULT_TIMEOUT * 60, # connections are queued, so 30 minutes is ok. sock_connect=DEFAULT_TIMEOUT, # 30 seconds to really connect. sock_read=DEFAULT_TIMEOUT * 2, # 1 minute to GET. )) as client: await asyncio.gather(*(load_and_save_coordinates(x, client) for x in offices), return_exceptions=True)
async def fetch_all(): async with ClientSession(timeout=ClientTimeout(40)) as client_session: await asyncio.gather( fetch( "tests/samples/paradiso-amsterdam/events?lang=en&start_time=now&sort=date&order=asc&limit=30&page=1&with=locations", client_session, ), fetch( "tests/samples/paradiso-amsterdam/events?lang=en&start_time=now&sort=date&order=asc&limit=30&page=2&with=locations", client_session, ), )
async def async_download_file(self, url: str, *, headers: dict | None = None) -> bytes | None: """Download files, and return the content.""" if url is None: return None if "tags/" in url: url = url.replace("tags/", "") self.log.debug("Downloading %s", url) timeouts = 0 while timeouts < 5: try: request = await self.session.get( url=url, timeout=ClientTimeout(total=60), headers=headers, ) # Make sure that we got a valid result if request.status == 200: return await request.read() raise HacsException( f"Got status code {request.status} when trying to download {url}" ) except asyncio.TimeoutError: self.log.warning( "A timeout of 60! seconds was encountered while downloading %s, " "using over 60 seconds to download a single file is not normal. " "This is not a problem with HACS but how your host communicates with GitHub. " "Retrying up to 5 times to mask/hide your host/network problems to " "stop the flow of issues opened about it. " "Tries left %s", url, (4 - timeouts), ) timeouts += 1 await asyncio.sleep(1) continue except BaseException as exception: # lgtm [py/catch-base-exception] pylint: disable=broad-except self.log.exception("Download failed - %s", exception) return None
def __init__( self, dsn, session_class=ClientSession, session_timeout: Optional[Union[float, int, dict]] = None, client_class=Client, **params, ): timeout_params = self.DEFAULT_TIMEOUT.copy() if isinstance(session_timeout, dict): timeout_params.update(session_timeout) else: timeout_params['total'] = session_timeout # type: ignore self._session = session_class(timeout=ClientTimeout(**timeout_params)) params.update(dsn_to_params(dsn)) self._client = client_class(self._session, **params)
async def get_binary(self, url, dl_location, mime='application/octet-stream', timeout=3600): """ Actual download method with checksum checking. Args: url(str): URL of item to download dl_location(str): storage path for downloaded artifact Keyword Args: mime: mimetype of content to retrieve (default: 'application/octet-stream') timeout: download timeout (default: 3600) Returns: MD5 hash of downloaded content """ self.logger.info('') get_bin_headers = {'Accept': mime, **self.headers} hash_md5 = hashlib.md5() self.logger.debug('GET binary {}'.format(url)) # session timeout & single socket read timeout timeout = ClientTimeout(timeout, sock_read=60) async with self.session.get(url, headers=get_bin_headers, timeout=timeout) as resp: await self.check_http_status(resp) with dl_location.open('wb') as fd: while True: chunk, _ = await resp.content.readchunk() # we are EOF if not chunk: break fd.write(chunk) hash_md5.update(chunk) return hash_md5.hexdigest()