def test_both_urls(self): os.environ["http_proxy"] = "http://test1.com:123" os.environ["https_proxy"] = "https://test2.com:124" res = get_proxies() self.assertEqual("http://test1.com:123", res.get("http://")) self.assertEqual("https://test2.com:124", res.get("https://")) self.assertNotIn("all://", res)
def test_add_scheme(self): os.environ["http_proxy"] = "test1.com" os.environ["https_proxy"] = "test2.com" res = get_proxies() self.assertEqual("http://test1.com", res.get("http://")) self.assertEqual("http://test2.com", res.get("https://")) os.environ["http_proxy"] = "test1.com/path" os.environ["https_proxy"] = "test2.com/path" res = get_proxies() self.assertEqual("http://test1.com/path", res.get("http://")) self.assertEqual("http://test2.com/path", res.get("https://")) os.environ["http_proxy"] = "https://test1.com:123" os.environ["https_proxy"] = "https://test2.com:124" res = get_proxies() self.assertEqual("https://test1.com:123", res.get("http://")) self.assertEqual("https://test2.com:124", res.get("https://"))
async def _read_files(urls: List[str], callback: Callable) -> List[ResponseType]: async with httpx.AsyncClient(timeout=10.0, proxies=get_proxies()) as client: req = [ UrlProxyReader._read_file(url, client, callback) for url in urls ] return await asyncio.gather(*req)
def _get_proxy_address(): """ Set proxy addresses for NLTK since NLTK do not use proxy addresses from https_proxy environment variable """ proxies = get_proxies() or {} # nltk uses https to download data if "https://" in proxies: proxy = urlparse(proxies['https://']) log.debug(f"Using proxy for NLTK: {proxy}") port = proxy.port or DEFAULT_PORTS.get(proxy.scheme) url = ParseResult(scheme=proxy.scheme, netloc="{}:{}".format(proxy.hostname, port) if port else proxy.netloc, path=proxy.path, params=proxy.params, query=proxy.query, fragment=proxy.fragment).geturl() return url
async def embedd_batch( self, data: List[Any], proc_callback: Callable[[bool], None] = None ) -> List[Optional[List[float]]]: """ Function perform embedding of a batch of data items. Parameters ---------- data A list of data that must be embedded. proc_callback A function that is called after each item is fully processed by either getting a successful response from the server, getting the result from cache or skipping the item. Returns ------- List of float list (embeddings) for successfully embedded items and Nones for skipped items. Raises ------ EmbeddingCancelledException: If cancelled attribute is set to True (default=False). """ requests = [] async with AsyncClient(timeout=self.timeout, base_url=self.server_url, proxies=get_proxies()) as client: for p in data: if self._cancelled: raise EmbeddingCancelledException() requests.append(self._send_to_server(p, client, proc_callback)) embeddings = await asyncio.gather(*requests) self._cache.persist_cache() assert self.num_parallel_requests == 0 return embeddings
async def embedd_batch( self, data: List[Any], proc_callback: Optional[Callable] = None, *, callback: Callable = dummy_callback, ) -> List[Optional[List[float]]]: """ Function perform embedding of a batch of data items. Parameters ---------- data A list of data that must be embedded. callback Callback for reporting the progress in share of embedded items Returns ------- List of float list (embeddings) for successfully embedded items and Nones for skipped items. Raises ------ EmbeddingCancelledException: If cancelled attribute is set to True (default=False). """ # in Orange 3.34 keep content of the if - remove if clause and complete else if proc_callback is None: progress_items = iter(linspace(0, 1, len(data))) def success_callback(): """Callback called on every successful embedding""" callback(next(progress_items)) else: warnings.warn( "proc_callback is deprecated and will be removed in version 3.34, " "use callback instead", FutureWarning, ) success_callback = partial(proc_callback, True) results = [None] * len(data) queue = asyncio.Queue() # fill the queue with items to embedd for i, item in enumerate(data): queue.put_nowait(TaskItem(id=i, item=item, no_repeats=0)) async with AsyncClient(timeout=self.timeout, base_url=self.server_url, proxies=get_proxies()) as client: tasks = self._init_workers(client, queue, results, success_callback) try: # wait for workers to stop - they stop when queue is empty # if one worker raises exception wait will raise it further await asyncio.gather(*tasks) finally: await self._cancel_workers(tasks) self._cache.persist_cache() return results
def test_none(self): """ When no variable is set return None """ self.assertIsNone(get_proxies())
def test_https_only(self): os.environ["https_proxy"] = "https://test1.com:123" res = get_proxies() self.assertEqual("https://test1.com:123", res.get("https://")) self.assertNotIn("http://", res)