Ejemplo n.º 1
0
async def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--mount', nargs=2, action="append", default=[])
    parser.add_argument('--recurse-in-domain', action="append", default=[])
    parser.add_argument('--user-agent', default=USER_AGENT)
    parser.add_argument('--ignore', action="append", default=[])
    parser.add_argument('--ignore-glob', action="append", default=[])
    parser.add_argument('--ignore-http-code', action="append", default=[])
    parser.add_argument('--disable-certificate-verification', action="append", default=[])
    parser.add_argument('urls', nargs='*', default=[])

    args = parser.parse_args()
    mounts = {"all://": httpx.AsyncHTTPTransport(http2=True)}

    for hostname in args.disable_certificate_verification:
        mounts[f"https://{hostname}"] = httpx.AsyncHTTPTransport(verify=False)

    for directory, url in args.mount:
        mounts[url] = AsyncStaticFileTransport(directory=directory)

    async with httpx.AsyncClient(mounts=mounts) as client:
        return await check_urls(
            client=client, urls=args.urls, recurse_in_domains=set(args.recurse_in_domain),
            user_agent=args.user_agent, ignore=set(args.ignore), ignore_glob=set(args.ignore_glob),
            ignore_http_codes=set(int(c) for c in args.ignore_http_code))
    async def process(self, loop, url):
        print(f"processing: {url}")
        self.todo.remove(url)
        self.busy.add(url)

        transport = httpx.AsyncHTTPTransport(retries=3)
        client = httpx.AsyncClient(verify=False,
                                   transport=transport,
                                   timeout=httpx.Timeout(10.0, connect=20.0))
        try:
            resp = await client.get(url)
            if resp.status_code != 200:
                self.done[url] = False
                self.failed[url] = resp.status_code
            else:
                parsed_feed = await loop.run_in_executor(
                    None, self._parse_feed, resp.text)
                self.done[url] = True
        except Exception as exc:
            self.done[url] = False
            self.failed[url] = {
                'type': 'exception',
                'message': str(exc),
                'other': type(exc)
            }
        finally:
            await client.aclose()
        self.busy.remove(url)
Ejemplo n.º 3
0
async def query_items(collection,
                      url,
                      params,
                      max_sync_queries=10,
                      item_template="${year}/${month}/${day}"):
    found = hits(url, params)
    limit = params['limit']
    semaphore = asyncio.Semaphore(max_sync_queries)

    total_pages = math.ceil(found / limit)
    logger.info(f"Found {found} items ({total_pages} pages)")

    queries = []
    transport = httpx.AsyncHTTPTransport(retries=3)
    limits = httpx.Limits(max_keepalive_connections=None, max_connections=5000)
    async with httpx.AsyncClient(timeout=None,
                                 pool_limits=limits,
                                 transport=transport) as client:
        for p in range(1, total_pages + 1):
            _params = {'page': p}
            _params.update(params)
            queries.append(
                query_items_page(collection,
                                 url,
                                 client,
                                 semaphore,
                                 params=_params,
                                 item_template=item_template))
        return await asyncio.gather(*queries)
def _instrument_client(
    client: typing.Union[httpx.Client, httpx.AsyncClient],
    tracer_provider: TracerProvider = None,
    request_hook: typing.Optional[RequestHook] = None,
    response_hook: typing.Optional[ResponseHook] = None,
) -> None:
    """Enables instrumentation for the given Client or AsyncClient"""
    # pylint: disable=protected-access
    if isinstance(client, httpx.Client):
        transport = client._transport or httpx.HTTPTransport()
        telemetry_transport = SyncOpenTelemetryTransport(
            transport,
            tracer_provider=tracer_provider,
            request_hook=request_hook,
            response_hook=response_hook,
        )
    elif isinstance(client, httpx.AsyncClient):
        transport = client._transport or httpx.AsyncHTTPTransport()
        telemetry_transport = AsyncOpenTelemetryTransport(
            transport,
            tracer_provider=tracer_provider,
            request_hook=request_hook,
            response_hook=response_hook,
        )
    else:
        raise TypeError("Invalid client provided")
    client._transport = telemetry_transport
Ejemplo n.º 5
0
async def test_uds():
    async with respx.mock:
        uds = httpx.AsyncHTTPTransport(uds="/tmp/foobar.sock")
        async with httpx.AsyncClient(transport=uds) as client:
            request = respx.get("https://foo.bar/") % 202
            response = await client.get("https://foo.bar/")
            assert request.called is True
            assert response.status_code == 202
Ejemplo n.º 6
0
 def __init__(self, conf: Dict) -> None:
     super().__init__(conf)
     self.session = httpx.AsyncClient(
         auth=DigestAuthCached(
             username=self._conf.auth.user,
             password=self._conf.auth.password,
         ),
         transport=httpx.AsyncHTTPTransport(verify=False, retries=3),
     )
Ejemplo n.º 7
0
 async def client():
     if httpx_version >= (0, 20):
         transport = httpx.AsyncHTTPTransport(uds=SOCKPATH)
     else:
         transport = httpcore.AsyncConnectionPool(uds=SOCKPATH)
     for _ in range(40):
         async with httpx.AsyncClient(transport=transport) as client:
             r = await client.get("http://localhost/sleep/0.1")
             assert r.status_code == 200
             assert r.text == "Slept 0.1 seconds.\n"
Ejemplo n.º 8
0
def get_client() -> httpx.AsyncClient:
    """Returns a httpx client that can be used with get_updates()."""
    transport = httpx.AsyncHTTPTransport(
        limits=httpx.Limits(max_keepalive_connections=5),
        retries=5,
    )
    headers = {
        'Content-Type': 'application/x-www-form-urlencoded',
        'User-Agent': 'packet-windoze',
    }
    return httpx.AsyncClient(headers=headers, timeout=30, transport=transport)
Ejemplo n.º 9
0
 async def client(app, loop):
     if httpx_version >= (0, 20):
         transport = httpx.AsyncHTTPTransport(uds=SOCKPATH)
     else:
         transport = httpcore.AsyncConnectionPool(uds=SOCKPATH)
     try:
         async with httpx.AsyncClient(transport=transport) as client:
             r = await client.get("http://myhost.invalid/")
             assert r.status_code == 200
             assert r.text == os.path.abspath(SOCKPATH)
     finally:
         app.stop()
Ejemplo n.º 10
0
def get_transport(verify, http2, local_address, proxy_url, limit, retries):
    verify = get_sslcontexts(None, None, True, False,
                             http2) if verify is True else verify
    return httpx.AsyncHTTPTransport(
        # pylint: disable=protected-access
        verify=verify,
        http2=http2,
        limits=limit,
        proxy=httpx._config.Proxy(proxy_url) if proxy_url else None,
        local_address=local_address,
        retries=retries,
        **TRANSPORT_KWARGS,
    )
Ejemplo n.º 11
0
 def create_transport(
     self,
     tracer_provider: typing.Optional["TracerProvider"] = None,
     request_hook: typing.Optional["AsyncRequestHook"] = None,
     response_hook: typing.Optional["AsyncResponseHook"] = None,
 ):
     transport = httpx.AsyncHTTPTransport()
     telemetry_transport = AsyncOpenTelemetryTransport(
         transport,
         tracer_provider=tracer_provider,
         request_hook=request_hook,
         response_hook=response_hook,
     )
     return telemetry_transport
    async def instrumented_async_send(wrapped, instance, args, kwargs):
        if context.get_value("suppress_instrumentation"):
            return await wrapped(*args, **kwargs)

        transport = instance._transport or httpx.AsyncHTTPTransport()
        telemetry_transport = AsyncOpenTelemetryTransport(
            transport,
            tracer_provider=tracer_provider,
            request_hook=request_hook,
            response_hook=response_hook,
        )

        instance._transport = telemetry_transport
        return await wrapped(*args, **kwargs)
Ejemplo n.º 13
0
    def instrument_client(
        client: typing.Union[httpx.Client, httpx.AsyncClient],
        tracer_provider: TracerProvider = None,
        request_hook: typing.Optional[RequestHook] = None,
        response_hook: typing.Optional[ResponseHook] = None,
    ) -> None:
        """Instrument httpx Client or AsyncClient

        Args:
            client: The httpx Client or AsyncClient instance
            tracer_provider: A TracerProvider, defaults to global
            request_hook: A hook that receives the span and request that is called
                right after the span is created
            response_hook: A hook that receives the span, request, and response
                that is called right before the span ends
        """
        # pylint: disable=protected-access
        if not hasattr(client, "_is_instrumented_by_opentelemetry"):
            client._is_instrumented_by_opentelemetry = False

        if not client._is_instrumented_by_opentelemetry:
            if isinstance(client, httpx.Client):
                client._original_transport = client._transport
                transport = client._transport or httpx.HTTPTransport()
                client._transport = SyncOpenTelemetryTransport(
                    transport,
                    tracer_provider=tracer_provider,
                    request_hook=request_hook,
                    response_hook=response_hook,
                )
                client._is_instrumented_by_opentelemetry = True
            if isinstance(client, httpx.AsyncClient):
                transport = client._transport or httpx.AsyncHTTPTransport()
                client._transport = AsyncOpenTelemetryTransport(
                    transport,
                    tracer_provider=tracer_provider,
                    request_hook=request_hook,
                    response_hook=response_hook,
                )
                client._is_instrumented_by_opentelemetry = True
        else:
            _logger.warning(
                "Attempting to instrument Httpx client while already instrumented"
            )
Ejemplo n.º 14
0
    def __init__(
            self,
            base_url,
            portal_name,
            save_metadata,
            download_only_new_data,
            base_path=Path(),
    ):
        # TODO: try/validate the base url
        self.base_url = base_url
        # TODO: check valid portal_name (solo letras/numeros . - _)
        # Esto va a ser un nombre del dir tambien, que sea lindo sin espacios
        # y que arranque con una letra por las dudas
        self.portal_name = portal_name
        self.save_metadata = save_metadata
        self.download_only_new_data = download_only_new_data
        self.client = httpx.AsyncClient(transport=httpx.AsyncHTTPTransport(
            retries=10))

        self.p_base = base_path / self.portal_name
        self.p_items_md = self.p_base / "items_metadata.json"
        self.p_files = self.p_base / "files"
        self.p_metadata = self.p_base / "metadata"
        self.p_internal_md = self.p_base / "internal_metadata.json"

        self.p_base.mkdir(exist_ok=True)
        self.p_files.mkdir(exist_ok=True)
        self.p_metadata.mkdir(exist_ok=True)

        self.url_package_list = urljoin(self.base_url,
                                        "/api/3/action/package_list")
        self.url_package_show = urljoin(self.base_url,
                                        "/api/3/action/package_show")

        if not self.p_internal_md.exists():
            self.know_resoureces = set()
        else:
            self.know_resoureces = set(
                json.loads(line).get("resource_id")
                for line in self.p_internal_md.open())

        # TODO: add valid formats from config file
        self.valid_formats = ["csv"]
Ejemplo n.º 15
0
    async def _query(self, query, variables=None, **kwargs) -> Dict:
        # for graphql queries
        kwargs["headers"] = {
            "Content-Type": "application/json",
            "Accept": "application/json",
            "Accept-Encoding": "gzip, deflate, br",
        }

        payload = {"query": query}
        if variables:
            payload["variables"] = variables

        async with httpx.AsyncClient(
                proxies=self._proxy,
                transport=httpx.AsyncHTTPTransport(retries=3)) as client:
            response = await client.post(f"{self.API_BASE}/graphql",
                                         json=payload,
                                         **kwargs)
            return response.json()
Ejemplo n.º 16
0
async def test_gateway_endpoint(
    endpoint: AnyUrl, authentication: ClusterAuthentication
) -> None:
    """This method will try to connect to a gateway endpoint and raise a ConfigurationError in case of problem

    :raises ConfigurationError: contians some information as to why the connection failed
    """
    try:
        gateway_auth = await get_gateway_auth_from_params(authentication)
        async with dask_gateway.Gateway(
            address=f"{endpoint}", auth=gateway_auth, asynchronous=True
        ) as gateway:
            # this does not yet create any connection to the underlying gateway.
            # since using a fct from dask gateway is going to timeout after a long time
            # we bypass the pinging by calling in ourselves with a short timeout
            async with httpx.AsyncClient(
                transport=httpx.AsyncHTTPTransport(retries=2)
            ) as client:
                # try to get something the api shall return fast
                response = await client.get(
                    f"{endpoint}/api/version", timeout=_PING_TIMEOUT_S
                )
                response.raise_for_status()
                # now we try to list the clusters to check the gateway responds in a sensible way
                await gateway.list_clusters()

            logger.debug("Pinging %s, succeeded", f"{endpoint=}")
    except (
        dask_gateway.GatewayServerError,
        ClientConnectionError,
        ClientResponseError,
        httpx.HTTPError,
    ) as exc:
        logger.debug("Pinging %s, failed: %s", f"{endpoint=}", f"{exc=!r}")
        raise ConfigurationError(
            f"Could not connect to cluster in {endpoint}: error: {exc}"
        ) from exc
Ejemplo n.º 17
0
async def https(
    q: dns.message.Message,
    where: str,
    timeout: Optional[float] = None,
    port: int = 443,
    source: Optional[str] = None,
    source_port: int = 0,
    one_rr_per_rrset: bool = False,
    ignore_trailing: bool = False,
    client: Optional["httpx.AsyncClient"] = None,
    path: str = "/dns-query",
    post: bool = True,
    verify: Union[bool, str] = True,
) -> dns.message.Message:
    """Return the response obtained after sending a query via DNS-over-HTTPS.

    *client*, a ``httpx.AsyncClient``.  If provided, the client to use for
    the query.

    Unlike the other dnspython async functions, a backend cannot be provided
    in this function because httpx always auto-detects the async backend.

    See :py:func:`dns.query.https()` for the documentation of the other
    parameters, exceptions, and return type of this method.
    """

    if not _have_httpx:
        raise NoDOH("httpx is not available.")  # pragma: no cover

    wire = q.to_wire()
    try:
        af = dns.inet.af_for_address(where)
    except ValueError:
        af = None
    transport = None
    headers = {"accept": "application/dns-message"}
    if af is not None:
        if af == socket.AF_INET:
            url = "https://{}:{}{}".format(where, port, path)
        elif af == socket.AF_INET6:
            url = "https://[{}]:{}{}".format(where, port, path)
    else:
        url = where
    if source is not None:
        transport = httpx.AsyncHTTPTransport(local_address=source[0])

    if client:
        cm: contextlib.AbstractAsyncContextManager = NullContext(client)
    else:
        cm = httpx.AsyncClient(http1=True,
                               http2=_have_http2,
                               verify=verify,
                               transport=transport)

    async with cm as the_client:
        # see https://tools.ietf.org/html/rfc8484#section-4.1.1 for DoH
        # GET and POST examples
        if post:
            headers.update({
                "content-type": "application/dns-message",
                "content-length": str(len(wire)),
            })
            response = await the_client.post(url,
                                             headers=headers,
                                             content=wire,
                                             timeout=timeout)
        else:
            wire = base64.urlsafe_b64encode(wire).rstrip(b"=")
            twire = wire.decode()  # httpx does a repr() if we give it bytes
            response = await the_client.get(url,
                                            headers=headers,
                                            timeout=timeout,
                                            params={"dns": twire})

    # see https://tools.ietf.org/html/rfc8484#section-4.2.1 for info about DoH
    # status codes
    if response.status_code < 200 or response.status_code > 299:
        raise ValueError("{} responded with status code {}"
                         "\nResponse body: {!r}".format(
                             where, response.status_code, response.content))
    r = dns.message.from_wire(
        response.content,
        keyring=q.keyring,
        request_mac=q.request_mac,
        one_rr_per_rrset=one_rr_per_rrset,
        ignore_trailing=ignore_trailing,
    )
    r.time = response.elapsed.total_seconds()
    if not q.is_response(r):
        raise BadResponse
    return r
Ejemplo n.º 18
0
 def __init__(self):
     transport = httpx.AsyncHTTPTransport(retries=3)
     self.client = httpx.AsyncClient(transport=transport)
Ejemplo n.º 19
0
async def https(q,
                where,
                timeout=None,
                port=443,
                source=None,
                source_port=0,
                one_rr_per_rrset=False,
                ignore_trailing=False,
                client=None,
                path='/dns-query',
                post=True,
                verify=True):
    """Return the response obtained after sending a query via DNS-over-HTTPS.

    *client*, a ``httpx.AsyncClient``.  If provided, the client to use for
    the query.

    Unlike the other dnspython async functions, a backend cannot be provided
    in this function because httpx always auto-detects the async backend.

    See :py:func:`dns.query.https()` for the documentation of the other
    parameters, exceptions, and return type of this method.
    """

    if not _have_httpx:
        raise NoDOH('httpx is not available.')  # pragma: no cover

    wire = q.to_wire()
    try:
        af = dns.inet.af_for_address(where)
    except ValueError:
        af = None
    transport = None
    headers = {"accept": "application/dns-message"}
    if af is not None:
        if af == socket.AF_INET:
            url = 'https://{}:{}{}'.format(where, port, path)
        elif af == socket.AF_INET6:
            url = 'https://[{}]:{}{}'.format(where, port, path)
    else:
        url = where
    if source is not None:
        transport = httpx.AsyncHTTPTransport(local_address=source[0])

    # After 3.6 is no longer supported, this can use an AsyncExitStack
    client_to_close = None
    try:
        if not client:
            client = httpx.AsyncClient(http1=True,
                                       http2=_have_http2,
                                       verify=verify,
                                       transport=transport)
            client_to_close = client

        # see https://tools.ietf.org/html/rfc8484#section-4.1.1 for DoH
        # GET and POST examples
        if post:
            headers.update({
                "content-type": "application/dns-message",
                "content-length": str(len(wire))
            })
            response = await client.post(url,
                                         headers=headers,
                                         content=wire,
                                         timeout=timeout)
        else:
            wire = base64.urlsafe_b64encode(wire).rstrip(b"=")
            wire = wire.decode()  # httpx does a repr() if we give it bytes
            response = await client.get(url,
                                        headers=headers,
                                        timeout=timeout,
                                        params={"dns": wire})
    finally:
        if client_to_close:
            await client.aclose()

    # see https://tools.ietf.org/html/rfc8484#section-4.2.1 for info about DoH
    # status codes
    if response.status_code < 200 or response.status_code > 299:
        raise ValueError('{} responded with status code {}'
                         '\nResponse body: {}'.format(where,
                                                      response.status_code,
                                                      response.content))
    r = dns.message.from_wire(response.content,
                              keyring=q.keyring,
                              request_mac=q.request_mac,
                              one_rr_per_rrset=one_rr_per_rrset,
                              ignore_trailing=ignore_trailing)
    r.time = response.elapsed
    if not q.is_response(r):
        raise BadResponse
    return r
Ejemplo n.º 20
0
async def worker(args):
    global num_users, spinner
    rel = args[0]
    rows = args[1]
    global start_time
    async with rows:
        async for row in rows:
            user_profile_complete = build_profile(row)
            async with httpx.AsyncClient(
                    timeout=120,
                    transport=httpx.AsyncHTTPTransport(retries=5)) as client:
                try:
                    r = await client.post(
                        org + rel,
                        headers=headers,
                        data=json.dumps(user_profile_complete))
                except httpx.ConnectError as exc:
                    spinner.info(
                        f"yamit encountered a name resolution error and is logging your progress. Process failed on {row[attributes.index('login')]}"
                    )
                    with open('log.csv', 'a', newline='') as logger:
                        w = csv.writer(logger)
                        w.writerow([
                            'Failure', row[attributes.index('login')],
                            "Connect error, unable to resolve name.",
                            exc.request.url
                        ])
                        logger.close()
                    sys.exit(1)

                while r.status_code == 429:
                    if reset_time_in_seconds != 0:
                        await trio.sleep(reset_time_in_seconds)
                    else:
                        await trio.sleep(
                            int(r.headers['x-rate-limit-reset']) -
                            int(time()) + 10)

                    try:
                        r = await client.post(
                            org + rel,
                            headers=headers,
                            data=json.dumps(user_profile_complete))
                    except httpx.ConnectError as exc:
                        with open('log.csv', 'a', newline='') as logger:
                            w = csv.writer(logger)
                            w.writerow([
                                'Failure', row[attributes.index('login')],
                                "Connect error, unable to resolve name.",
                                exc.request.url
                            ])
                            logger.close()
                        sys.exit(1)

                num_users += 1
                if num_users % notify == 0:
                    spinner.text = f"Last imported {row[attributes.index('login')]} \t total {num_users} \t runtime {int(int(time() - start_time)/60)} minutes \t status {r.status_code}"

                if speed != 100:
                    limit = int(r.headers['x-rate-limit-limit'])
                    remaining = int(r.headers['x-rate-limit-remaining'])
                    if (remaining <= (limit + N - (limit * speed / 100))
                        ) and (int(r.headers['x-rate-limit-reset']) -
                               int(time())) > 0:
                        await trio.sleep(
                            int(r.headers['x-rate-limit-reset']) - int(time()))

                if r.status_code != 200 and r.status_code != 429:
                    with open('log.csv', 'a', newline='') as logger:
                        w = csv.writer(logger)
                        try:
                            w.writerow([
                                'Failure', row[attributes.index('login')],
                                r.json()['errorSummary'], r.status_code
                            ])
                        except json.decoder.JSONDecodeError:
                            w.writerow([
                                'Failure', "",
                                "Error decoding JSON. More information on next line.",
                                r.status_code
                            ])
                            w.writerow([
                                'Failure', row[attributes.index('login')],
                                r.text, r.status_code
                            ])
                        logger.close()
Ejemplo n.º 21
0
 async def _fetch_binary_data(self, url: str) -> bytes:
     async with httpx.AsyncClient(
             proxies=self._proxy,
             transport=httpx.AsyncHTTPTransport(retries=3)) as client:
         response = await client.get(url)
         return response.read()
Ejemplo n.º 22
0
app.state.config = app_config

timeout = httpx.Timeout(
    app_config.origin_request_timeout_seconds(),
    connect=app_config.origin_request_connect_timeout_seconds(),
)

limits = httpx.Limits(
    max_keepalive_connections=app_config.
    origin_request_max_keepalive_connections(),
    max_connections=app_config.origin_request_max_connections(),
)

transport = httpx.AsyncHTTPTransport(
    http2=app_config.origin_request_http2(),
    limits=limits,
    local_address=app_config.origin_request_local_address(),
)

app.state.httpx_client = httpx.AsyncClient(timeout=timeout,
                                           transport=transport)

if app_config.sentry_dsn():
    if app_config.sentry_traces_sample_rate():
        sentry_sdk.init(  # pylint: disable=abstract-class-instantiated; see https://github.com/getsentry/sentry-python/issues/1081
            dsn=app_config.sentry_dsn(),
            traces_sample_rate=app_config.sentry_traces_sample_rate(),
        )
    else:
        sentry_sdk.init(  # pylint: disable=abstract-class-instantiated
            dsn=app_config.sentry_dsn())