Beispiel #1
0
 def test_both_urls(self):
     os.environ["http_proxy"] = "http://test1.com:123"
     os.environ["https_proxy"] = "https://test2.com:124"
     res = get_proxies()
     self.assertEqual("http://test1.com:123", res.get("http://"))
     self.assertEqual("https://test2.com:124", res.get("https://"))
     self.assertNotIn("all://", res)
Beispiel #2
0
    def test_add_scheme(self):
        os.environ["http_proxy"] = "test1.com"
        os.environ["https_proxy"] = "test2.com"
        res = get_proxies()
        self.assertEqual("http://test1.com", res.get("http://"))
        self.assertEqual("http://test2.com", res.get("https://"))

        os.environ["http_proxy"] = "test1.com/path"
        os.environ["https_proxy"] = "test2.com/path"
        res = get_proxies()
        self.assertEqual("http://test1.com/path", res.get("http://"))
        self.assertEqual("http://test2.com/path", res.get("https://"))

        os.environ["http_proxy"] = "https://test1.com:123"
        os.environ["https_proxy"] = "https://test2.com:124"
        res = get_proxies()
        self.assertEqual("https://test1.com:123", res.get("http://"))
        self.assertEqual("https://test2.com:124", res.get("https://"))
 async def _read_files(urls: List[str],
                       callback: Callable) -> List[ResponseType]:
     async with httpx.AsyncClient(timeout=10.0,
                                  proxies=get_proxies()) as client:
         req = [
             UrlProxyReader._read_file(url, client, callback)
             for url in urls
         ]
         return await asyncio.gather(*req)
Beispiel #4
0
def _get_proxy_address():
    """
    Set proxy addresses for NLTK since NLTK do not use proxy addresses from
    https_proxy environment variable
    """
    proxies = get_proxies() or {}
    # nltk uses https to download data
    if "https://" in proxies:
        proxy = urlparse(proxies['https://'])
        log.debug(f"Using proxy for NLTK: {proxy}")
        port = proxy.port or DEFAULT_PORTS.get(proxy.scheme)
        url = ParseResult(scheme=proxy.scheme,
                          netloc="{}:{}".format(proxy.hostname, port)
                          if port else proxy.netloc,
                          path=proxy.path,
                          params=proxy.params,
                          query=proxy.query,
                          fragment=proxy.fragment).geturl()
        return url
    async def embedd_batch(
        self,
        data: List[Any],
        proc_callback: Callable[[bool], None] = None
    ) -> List[Optional[List[float]]]:
        """
        Function perform embedding of a batch of data items.

        Parameters
        ----------
        data
            A list of data that must be embedded.
        proc_callback
            A function that is called after each item is fully processed
            by either getting a successful response from the server,
            getting the result from cache or skipping the item.

        Returns
        -------
        List of float list (embeddings) for successfully embedded
        items and Nones for skipped items.

        Raises
        ------
        EmbeddingCancelledException:
            If cancelled attribute is set to True (default=False).
        """
        requests = []
        async with AsyncClient(timeout=self.timeout,
                               base_url=self.server_url,
                               proxies=get_proxies()) as client:
            for p in data:
                if self._cancelled:
                    raise EmbeddingCancelledException()
                requests.append(self._send_to_server(p, client, proc_callback))

            embeddings = await asyncio.gather(*requests)
        self._cache.persist_cache()
        assert self.num_parallel_requests == 0

        return embeddings
Beispiel #6
0
    async def embedd_batch(
        self,
        data: List[Any],
        proc_callback: Optional[Callable] = None,
        *,
        callback: Callable = dummy_callback,
    ) -> List[Optional[List[float]]]:
        """
        Function perform embedding of a batch of data items.

        Parameters
        ----------
        data
            A list of data that must be embedded.
        callback
            Callback for reporting the progress in share of embedded items

        Returns
        -------
        List of float list (embeddings) for successfully embedded
        items and Nones for skipped items.

        Raises
        ------
        EmbeddingCancelledException:
            If cancelled attribute is set to True (default=False).
        """
        # in Orange 3.34 keep content of the if - remove if clause and complete else
        if proc_callback is None:
            progress_items = iter(linspace(0, 1, len(data)))

            def success_callback():
                """Callback called on every successful embedding"""
                callback(next(progress_items))
        else:
            warnings.warn(
                "proc_callback is deprecated and will be removed in version 3.34, "
                "use callback instead",
                FutureWarning,
            )
            success_callback = partial(proc_callback, True)

        results = [None] * len(data)
        queue = asyncio.Queue()

        # fill the queue with items to embedd
        for i, item in enumerate(data):
            queue.put_nowait(TaskItem(id=i, item=item, no_repeats=0))

        async with AsyncClient(timeout=self.timeout,
                               base_url=self.server_url,
                               proxies=get_proxies()) as client:
            tasks = self._init_workers(client, queue, results,
                                       success_callback)

            try:
                # wait for workers to stop - they stop when queue is empty
                # if one worker raises exception wait will raise it further
                await asyncio.gather(*tasks)
            finally:
                await self._cancel_workers(tasks)
                self._cache.persist_cache()

        return results
Beispiel #7
0
 def test_none(self):
     """ When no variable is set return None """
     self.assertIsNone(get_proxies())
Beispiel #8
0
 def test_https_only(self):
     os.environ["https_proxy"] = "https://test1.com:123"
     res = get_proxies()
     self.assertEqual("https://test1.com:123", res.get("https://"))
     self.assertNotIn("http://", res)