예제 #1
0
 async def use_proxy_request(
     self,
     url: str,
     method: str,
     data: dict,
     headers: dict,
     timeout: int,
     proxy_dict: dict,
     resp_encoding: str,
 ) -> Union[HTTPResponse, int, None]:
     """
     检查代理状态
     :param url:
     :param method:
     :param data:
     :param headers:
     :param timeout:
     :param proxy_dict:
     :param resp_encoding:
     :return:
     """
     client = CurlAsyncHTTPClient(force_instance=True)
     request = self.make_request(url, method, data, headers, timeout,
                                 proxy_dict)
     try:
         resp = await client.fetch(request, raise_error=False)
         msg = ('proxy: {}:{}, url: {} ,result: {}'.format(
             proxy_dict.get('host'), proxy_dict.get('port'), url,
             resp.code))
         self.logger.debug(msg)
     except CurlError as e:
         self.logger.error(e)
         resp = 599
     except Exception as e:
         self.logger.error('proxy: {}:{}, url: {}, result: {}'.format(
             proxy_dict.get('host'), proxy_dict.get('port'), url, str(e)))
         resp = None
     else:
         resp = self.get_response_body(resp, resp_encoding)
     finally:
         client.close()
     return resp
예제 #2
0
class HTTPClient:
    def __init__(self,
                 *,
                 max_clients=100,
                 connect_timeout=20.0,
                 verify_ssl=True,
                 **kwargs):
        self._httpclient = AsyncHTTPClient(max_clients=max_clients, **kwargs)
        self._connect_timeout = connect_timeout
        self._verify_ssl = verify_ssl

    async def fetch(self,
                    url,
                    *,
                    method="GET",
                    headers=None,
                    body=None,
                    request_timeout=30.0):
        if isinstance(body, (dict, list)):
            if headers is None:
                headers = {'Content-Type': "application/json"}
            elif 'Content-Type' not in headers:
                headers['Content-Type'] = "application/json"
            body = tornado.escape.json_encode(body)
        resp = await self._httpclient.fetch(
            url,
            method=method,
            headers=headers,
            body=body,
            validate_cert=self._verify_ssl,
            request_timeout=request_timeout,
            connect_timeout=self._connect_timeout,
            raise_error=False)
        if resp.code < 200 or resp.code >= 300:
            raise HTTPError(resp.code, message=resp.reason)
        return HTTPResponse(resp.code, resp.body)

    async def close(self):
        self._httpclient.close()
예제 #3
0
class Downloader:
    def __init__(self, max_clients=100, renderer=None, renderer_cores=None):
        self._max_clients = max_clients
        self._http_client = CurlAsyncHTTPClient(max_clients=max_clients,
                                                force_instance=True)
        self._renderer = renderer
        if renderer_cores is None:
            renderer_cores = self._max_clients
        self._renderer_semaphore = Semaphore(renderer_cores)

    @classmethod
    def from_crawler(cls, crawler):
        config = crawler.config
        renderer = ChromeRenderer(
            options=config.get('chrome_renderer_options'))
        downloader = cls(**with_not_none_params(
            max_clients=config.getint('downloader_clients'),
            renderer=renderer,
            renderer_cores=config.getint('renderer_cores')))
        crawler.event_bus.subscribe(downloader.close, events.crawler_shutdown)
        return downloader

    @property
    def max_clients(self):
        return self._max_clients

    async def fetch(self, request):
        log.debug("HTTP request: %s", request)
        try:
            if request.render:
                async with self._renderer_semaphore:
                    response = await self._renderer.fetch(request)
            else:
                req = self._make_request(request)
                resp = await self._http_client.fetch(req)
                response = self._make_response(resp)
        except CancelledError:
            raise
        except HTTPClientError as e:
            if e.response is not None and e.response.code != 599:
                raise HttpError('{} {}'.format(e.response.code, e.message),
                                response=self._make_response(e.response))
            raise ClientError(e.message)
        except Exception as e:
            raise ClientError(e)
        log.debug("HTTP response: %s", response)
        return response

    def _make_request(self, request):
        kwargs = {
            'method': request.method,
            'headers': request.headers,
            'body': request.body,
            'connect_timeout': request.timeout,  # FIXME
            'request_timeout': request.timeout,  # FIXME
            'follow_redirects': request.allow_redirects,
            'validate_cert': request.verify_ssl
        }
        if request.auth is not None:
            auth_username, auth_password = request.auth
            kwargs['auth_username'] = auth_username
            kwargs['auth_password'] = auth_password
        if request.proxy is not None:
            s = urlsplit(request.proxy)
            if s.scheme:
                if s.scheme in ('http', 'socks4', 'socks5'):
                    proxy_host, proxy_port = s.hostname, s.port
                else:
                    raise ValueError('Unsupported proxy scheme: {}'.format(
                        s.scheme))
                if s.scheme == 'socks5':
                    kwargs['prepare_curl_callback'] = prepare_curl_socks5
                elif s.scheme == 'socks4':
                    kwargs['prepare_curl_callback'] = prepare_curl_socks4
            else:
                proxy_host, proxy_port = request.proxy.split(':')
            kwargs['proxy_host'] = proxy_host
            kwargs['proxy_port'] = int(proxy_port)
        if request.proxy_auth is not None:
            proxy_username, proxy_password = request.proxy_auth
            kwargs['proxy_username'] = proxy_username
            kwargs['proxy_password'] = proxy_password
        return HTTPRequest(request.url, **kwargs)

    def _make_response(self, resp):
        return HttpResponse(resp.effective_url,
                            resp.code,
                            headers=resp.headers,
                            body=resp.body)

    def close(self):
        self._http_client.close()
        self._renderer.close()
예제 #4
0
__author__ = 'TzAnAnY'

from monkey import patch_socks
patch_socks()

from tornado.curl_httpclient import CurlAsyncHTTPClient
from tornado.ioloop import IOLoop
from pycurl import PROXYTYPE_SOCKS5


def handle_request(response):
    if response.error:
        print "Error:", response.error
    else:
        print response.body
    IOLoop.instance().stop()


if __name__ == '__main__':
    config = {
        'proxy_type': PROXYTYPE_SOCKS5,
        'proxy_host': '127.0.0.1',
        'proxy_port': 9050,
        'validate_cert': False
    }
    client = CurlAsyncHTTPClient()
    # for i in range(5):
    client.fetch("https://www.dyndns.org/", handle_request, **config)
    IOLoop.instance().start()
    client.close()