Exemple #1
0
    def post(self):
        protocol = options.api_protocol
        host = options.api_host
        port = options.api_port

        # port suffix
        port = "" if port == "80" else ":%s" % port

        uri = self.request.uri
        url = "%s://%s%s%s" % (protocol, host, port, uri)

        # update host to destination host
        headers = dict(self.request.headers)
        headers["Host"] = host
        if self.request.path in options.api_setting.keys():
            headers["If-Appflood-Api"] = "true"
        try:
            a = AsyncHTTPClient().fetch(
                HTTPRequest(url=url,
                            method="POST",
                            body=self.request.body,
                            headers=headers,
                            follow_redirects=False), self._on_proxy)

        except tornado.httpclient.HTTPError, x:
            if hasattr(x, "response") and x.response:
                self._on_proxy(x.response)
            else:
                logging.error("Tornado signalled HTTPError %s", x)
    def async_http(self, proxy, idx, requests_proxies):
        success_count = self.SUCCESS_RATIO * self.CONNECTIVITY_TEST
        fail_count = self.CONNECTIVITY_TEST - success_count
        count = {
        }  # callback of async_httpclient(handle_request) get local variable,
        # no conflict with multi coroutine, originally
        count[idx] = [success_count, fail_count]

        for protocol, pro_ip_port in proxy.items():
            pro_type, host, port = pro_ip_port
            if protocol == pycurl.PROXYTYPE_HTTP:
                pass
            elif protocol == pycurl.PROXYTYPE_SOCKS5:
                pass
            elif protocol == pycurl.PROXYTYPE_SOCKS4:
                pass
            else:
                yield

        def prepare_curl_type(curl):
            curl.setopt(pycurl.PROXYTYPE, protocol)

        @gen.coroutine
        def handle_request(response):
            if response.code == 200:
                count[idx][0] -= 1
            else:
                count[idx][1] -= 1

        AsyncHTTPClient.configure(
            'tornado.curl_httpclient.CurlAsyncHTTPClient')
        http_client = AsyncHTTPClient()
        http_request = httpclient.HTTPRequest(
            url=self.TESTURL,
            method='HEAD',
            proxy_host=host,
            proxy_port=int(port),
            prepare_curl_callback=prepare_curl_type,
            follow_redirects=False,
        )

        for i in range(self.CONNECTIVITY_TEST):
            try:
                response = yield http_client.fetch(http_request,
                                                   handle_request)
            except CurlError as e:
                print('Curl Error: ', e)

        while True:
            gen.sleep(0.01)
            if count[idx][0] <= 0:
                if pro_type == 'https':
                    requests_proxies.add('{}://{}:{}'.format(
                        pro_type, host, port))
                else:
                    requests_proxies.add('{}://{}:{}'.format(
                        pro_type, host, port))
                raise gen.Return(True)
            if count[idx][1] <= 0:
                raise gen.Return(False)
Exemple #3
0
 def __init__(self,
              *,
              max_clients=100,
              connect_timeout=20.0,
              verify_ssl=True,
              **kwargs):
     self._httpclient = AsyncHTTPClient(max_clients=max_clients, **kwargs)
     self._connect_timeout = connect_timeout
     self._verify_ssl = verify_ssl
Exemple #4
0
def async_request(url):
    future = Future()
    def handle_response(response):
        if response.error:
            future.set_result('')
        else:
            future.set_result(len(response.body))
    curl_client = AsyncHTTPClient()
    request = HTTPRequest(url=url,headers={})
    curl_client.fetch(request, handle_response)
    return future
Exemple #5
0
    def _do_fetch(self, method):
        uri = self.request.uri
        # 解析是哪个站点
        result, url, host = self._parse_site(uri)
        self._backend_url = url  # 后端站点
        # 未找到站点
        if not result:
            raise HTTPError(404)

        logger.debug(u'请求的后端网站 %s' % url)

        headers = dict(self.request.headers)
        # 更新host字段为后端访问网站的host
        headers['Host'] = host
        # 去掉 cookie 中的.site字段
        if 'Cookie' in headers:
            cookies = headers['Cookie'].split(';')
            for i in range(len(cookies)):
                if cookies[i].strip() == '.site=' + self._site_name:
                    cookies.pop(i)
                    break
            headers['Cookie'] = ';'.join(cookies)

        if 'Authorization' in headers:
            auth_header_value = headers['Authorization']
            m = re.match('(NTLM [A-Za-z0-9+\-/=]+)', auth_header_value)
            if m:
                if len(auth_header_value) < 100:
                    pass
                else:
                    # todo 解析ntlm Authorization数据,修改IP
                    pass

        logger.debug(u'修改后的 headers %s' % headers)

        try:
            if method == 'POST':
                body = self.request.body
            else:
                body = None

            AsyncHTTPClient(
                max_clients=config.async_http_client_max_clients).fetch(
                    HTTPRequest(url=url,
                                method=method,
                                body=body,
                                headers=headers,
                                follow_redirects=False), self._on_proxy)
        except tornado.httpclient.HTTPError, x:
            if hasattr(x, "response") and x.response:
                self._on_proxy(x.response)
            else:
                logger.error("Tornado signalled HTTPError %s", x)
Exemple #6
0
    def post(self, *args, **kwargs):
        logger.debug('AuthLoginHandler')
        login_auth_url = self.client.config.get('login_auth_url')
        logger.debug(login_auth_url)
        if login_auth_url is None:
            raise AuthRequestException(
                'Missing Login Auth Url in Client Config')
        # access_token 多少秒后过期
        access_token_ex = self.client.config.get('access_token_ex')
        refresh_token_ex = self.client.config.get('refresh_token_ex')

        # 设置超时时间
        async_http_connect_timeout = ASYNC_HTTP_CONNECT_TIMEOUT
        async_http_request_timeout = ASYNC_HTTP_REQUEST_TIMEOUT
        headers = {'Content-Type': 'application/json; charset=utf-8'}
        try:
            response = yield AsyncHTTPClient().fetch(
                HTTPRequest(url=login_auth_url,
                            method=self.request.method,
                            body=self.request.body,
                            headers=headers,
                            connect_timeout=async_http_connect_timeout,
                            request_timeout=async_http_request_timeout))
        except Exception as e:
            logger.error(e)
            logger.error(traceback.format_exc())
            raise AuthRequestException('Fail to Request Login Auth Url')

        json_data = json.loads(response.body)
        if json_data['code'] == APIStatusCode.SUCCESS:
            user_info = json_data['data']
            token_info = {
                'access_key': self.client.access_key,
                'user_info': user_info
            }

            token_info = RedisHelper.set_token_info(token_info,
                                                    access_token_ex,
                                                    refresh_token_ex)
            if token_info is None:
                self.error(msg='Save Access Token Error')
            else:
                data = {
                    'access_token': token_info['access_token'],
                    'refresh_token': token_info['refresh_token'],
                    # access_token 过期时间
                    'expires_in': int(time.time()) + access_token_ex,
                    'user_info': user_info
                }
                self.success(data)
        else:
            self.fail(msg=json_data['msg'])
Exemple #7
0
    def _http_call(self, url, method, **kwargs):
        """Makes a http call. Logs response information.
        """
        logging.info("Request[%s]: %s" % (method, url))
        start_time = datetime.datetime.now()

        response = yield AsyncHTTPClient().fetch(
            HTTPRequest(url=url, method=method, **kwargs))
        duration = datetime.datetime.now() - start_time
        logging.info("Response[%d]: %s, Duration: %s.%ss." %
                     (response.code, response.reason, duration.seconds,
                      duration.microseconds))
        raise gen.Return(
            self._handle_response(response, response.body.decode("utf-8")))
Exemple #8
0
 def post(self):
     print("Post->", self.request.uri)
     print(str(self.request))
     method = self.request.method
     uri = self.request.uri
     headers = self.request.headers
     body = self.request.body
     proxy = get_proxy()
     AsyncHTTPClient().fetch(
         HTTPRequest(url=uri,
                     method=method,
                     headers=headers,
                     body=body,
                     validate_cert=False,
                     proxy_host=proxy[0],
                     proxy_port=proxy[1]), self.on_response)
Exemple #9
0
    def fetch(self, *args, **kwargs):
        ret = []
        if not len(args) > 0:
            return ret
        
        urls = self._fetching_urls(*args, **kwargs)

        http = AsyncHTTPClient()
        i = 0
        for url in urls:
            callback = self._callback(args[i], **kwargs)
            logging.info("start urlfetch %s" % url)
            http.fetch(url, callback)
            self.queue_len = self.queue_len + 1
            i += 1

        ioloop.IOLoop.instance().start()
        return ret
    def post(self):
        url = self.request.uri

        # update host to destination host
        headers = dict(self.request.headers)

        try:
            AsyncHTTPClient().fetch(
                HTTPRequest(url=url,
                            method="POST",
                            body=self.request.body,
                            headers=headers,
                            follow_redirects=False), self._on_proxy)
        except tornado.httpclient.HTTPError, x:
            if hasattr(x, "response") and x.response:
                self._on_proxy(x.response)
            else:
                logging.error("Tornado signalled HTTPError %s", x)
Exemple #11
0
    def _do_fetch(self, method):
        forward_url = self.client.request['forward_url']
        logger.debug('请求的后端网站 %s' % forward_url)
        logger.debug('原始的 headers %s' % self.request.headers)
        # 清理和处理一下 header
        headers = self._clean_headers()
        logger.debug('修改后的 headers %s' % headers)

        try:
            if method == 'GET':
                # GET 方法 Body 必须为 None,否则会出现异常
                body = None
            else:
                body = self.request.body

            # 设置超时时间
            async_http_connect_timeout = self.client.config.get(
                'async_http_connect_timeout',
                settings.ASYNC_HTTP_CONNECT_TIMEOUT)
            async_http_request_timeout = self.client.config.get(
                'async_http_request_timeout',
                settings.ASYNC_HTTP_REQUEST_TIMEOUT)

            response = yield AsyncHTTPClient().fetch(
                HTTPRequest(url=forward_url,
                            method=method,
                            body=body,
                            headers=headers,
                            decompress_response=True,
                            connect_timeout=async_http_connect_timeout,
                            request_timeout=async_http_request_timeout,
                            follow_redirects=False))
            self._on_proxy(response)
        except tornado.httpclient.HTTPError as x:
            if hasattr(x, 'response') and x.response:
                self._on_proxy(x.response)
            else:
                self.analytics.result_code = ResultCode.REQUEST_ENDPOINT_ERROR
                logger.error(u'proxy failed for %s, error: %s' %
                             (forward_url, x))
        except Exception as e:
            logger.error(e)
            logger.error(traceback.format_exc())
            self.analytics.result_code = ResultCode.REQUEST_ENDPOINT_ERROR
Exemple #12
0
 def get(self):
     global change_flag
     global start_get_proxy
     print("Get->", self.request.uri)
     print(str(self.request))
     protocol = self.request.protocol
     host = self.request.host
     method = self.request.method
     uri = self.request.uri
     headers = self.request.headers
     if host in "%s:%d" % (host_ip, host_port) and "next" in uri:
         change_flag = True
     proxy = get_proxy()
     AsyncHTTPClient().fetch(
         HTTPRequest(url=uri,
                     method=method,
                     headers=headers,
                     validate_cert=False,
                     proxy_host=proxy[0],
                     proxy_port=proxy[1]), self.on_response)
Exemple #13
0
    def fetch(self, blob_cache, callback=None):
        if self._data is None:
            # Fetch data
            parts = urlparse(self.uri)
            if parts.scheme == 'blob':
                try:
                    data = blob_cache[parts.path]
                except KeyError:
                    raise HTTPError(400, 'Blob missing from blob cache')
            elif parts.scheme == 'http' or parts.scheme == 'https':
                client = AsyncHTTPClient()
                if options.http_proxy is not None:
                    proxy_host, proxy_port = options.http_proxy.split(':', 1)
                    proxy_port = int(proxy_port)
                else:
                    proxy_host = proxy_port = None
                response = yield gen.Task(client.fetch,
                                          self.uri,
                                          user_agent='JSONBlaster/%s' %
                                          opendiamond.__version__,
                                          proxy_host=proxy_host,
                                          proxy_port=proxy_port,
                                          validate_cert=False)
                if response.error:
                    raise HTTPError(
                        400, 'Error fetching <%s>: %s' %
                        (self.uri, str(response.error)))
                data = response.body
            else:
                raise HTTPError(400, 'Unacceptable blob URI scheme')

            # Check hash if requested
            if self._expected_sha256 is not None:
                if sha256(data).hexdigest() != self._expected_sha256:
                    raise HTTPError(400, 'SHA-256 mismatch on %s' % self.uri)

            # Commit
            self._data = data

        if callback is not None:
            callback()
Exemple #14
0
 def _do_fetch(self, method):
     uri = self.request.uri
     url = "%s://%s:%s%s" % (
         options.backend_scheme,
         options.backend_host,
         options.backend_port,
         uri,
     )
     headers = dict(self.request.headers)
     try:
         AsyncHTTPClient(max_clients=options.max_clients).fetch(
             tornado.httpclient.HTTPRequest(url=url,
                                            method=method,
                                            body=None,
                                            headers=headers,
                                            follow_redirects=False),
             self._on_proxy)
     except HTTPError as x:
         if hasattr(x, "response") and x.response:
             self._on_proxy(x.response)
         else:
             logger.error("Tornado signalled HTTPError %s", x)
Exemple #15
0
        async def fetch_proxyrotator():
            result = []
            client = AsyncHTTPClient()

            async def fetch_proxy(i):
                logging.info(f'fetching froxy #{i} from proxyrotator')
                resp = await client.fetch(
                    HTTPRequest(
                        method='GET',
                        url=
                        'http://falcon.proxyrotator.com:51337/?apiKey=9EKVT48tBSANFXkxWbeMhCUZqwzypfPa&get=true&post=true'
                    ))
                data = json.loads(resp.body.decode())
                result.append(
                    Proxy(ip=data['ip'],
                          port=int(data['port']),
                          c_code=data['country'],
                          country=data['country']))

            for chunk in chunks(range(300), 10):
                await asyncio.wait([fetch_proxy(i) for i in chunk])
            logging.info(f"Found {len(result)} proxies with proxyrotator")
            return result
Exemple #16
0
class AsyncProxyClient(object):
    AsyncHTTPClient.configure('tornado.curl_httpclient.CurlAsyncHTTPClient')
    METRIC_RETRIED_REQUESTS: Counter = None

    def __init__(self,
                 enable_proxy=True,
                 penalty_fn=None,
                 promote_fn=None,
                 max_clients=50,
                 before_retry_callback=None,
                 monitoring=False) -> None:
        super().__init__()

        self.shuffle_proxy_for_each_request = True
        self.fetch_opts = {}
        self.enable_proxy = enable_proxy
        self.before_retry_callback = before_retry_callback
        if monitoring:
            from prometheus_client import Counter
            from urllib.parse import urlparse
            if not AsyncProxyClient.METRIC_RETRIED_REQUESTS:
                AsyncProxyClient.METRIC_RETRIED_REQUESTS = Counter(
                    "async_proxy_client_retried_requests",
                    "Number of retried requests", ['host'])

            def _before_retry_callback(*args, **kwargs):
                AsyncProxyClient.METRIC_RETRIED_REQUESTS.labels(
                    urlparse(args[0].url).hostname).inc()
                before_retry_callback and before_retry_callback(
                    *args, **kwargs)

            self.before_retry_callback = _before_retry_callback

        if self.enable_proxy:
            self.proxy_manager = ProxyManager(penalty_fn, promote_fn)
        self._client = CurlAsyncHTTPClient(max_clients=max_clients,
                                           defaults=dict(validate_cert=True))

    async def patient_fetch(self,
                            request,
                            proxy=None,
                            use_proxy_for_request=True,
                            redundancy=1,
                            **kwargs):
        impatient_fetch = nofail_async(
            before_retry_callback=self.before_retry_callback)(
                self.impatient_fetch)
        return await impatient_fetch(request, proxy, use_proxy_for_request,
                                     redundancy, **kwargs)

    async def impatient_fetch(self,
                              request,
                              proxy=None,
                              use_proxy_for_request=True,
                              redundancy=1,
                              **kwargs):
        res = await asyncio.wait([
            self.fetch(request,
                       proxy=proxy,
                       use_proxy_for_request=use_proxy_for_request,
                       **kwargs) for _ in range(redundancy)
        ],
                                 return_when=asyncio.FIRST_COMPLETED)
        for task in [j for e in res for j in e if not j.done()]:
            task.cancel()
        result = [j for e in res for j in e if j.done()][0].result()
        return result

    async def fetch(self,
                    request: HTTPRequest,
                    proxy=None,
                    use_proxy_for_request=True,
                    **kwargs):
        ok_statuses = set([200] + kwargs.get('ok_statuses', []))
        logging.debug(f"Sending {request.method} : {request.url}")
        if kwargs.get('cookies'):
            cookies = ';'.join([
                f'{i[0]}={i[1]}' for i in {
                    **parse_cookie(request.headers.get('Cookie', '')),
                    **kwargs.get('cookies')
                }.items()
            ])
            request.headers['Cookie'] = cookies
        is_proxying = self.enable_proxy and use_proxy_for_request
        curr_proxy = None
        try:
            if is_proxying:
                while not self.proxy_manager.has_proxies():
                    await asyncio.sleep(1)
                self.shuffle_proxy_for_each_request and self.proxy_manager.shuffle_proxy(
                )
                curr_proxy: Proxy = self.proxy_manager.current_proxy if not proxy else proxy
                request.proxy_host = curr_proxy.ip
                request.proxy_port = curr_proxy.port
                if curr_proxy.username:
                    request.proxy_username = curr_proxy.username
                if curr_proxy.password:
                    request.proxy_password = curr_proxy.password

            request.connect_timeout = kwargs.get('connect_timeout', 10)
            request.request_timeout = kwargs.get('request_timeout', 60)
            if is_proxying and curr_proxy:
                logging.debug(f"using proxy: {curr_proxy.ip}")

            res = await self._client.fetch(request, raise_error=False)
            if res.code not in ok_statuses:
                # not self.shuffle_proxy_for_each_request and self.proxy_manager.shuffle_proxy()
                logging.error(f"BadResponseCodeException: {res.code}")
                raise BadResponseCodeException(res.code)
            if is_proxying:
                self.proxy_manager.promote_proxy(curr_proxy)
            return self.enhance_response(res)
        except CancelledError:
            pass
        except Exception as e:
            if kwargs.get('error_handler'):
                kwargs.get('error_handler')(e, self.proxy_manager, curr_proxy)
            if is_proxying:
                await self.proxy_manager.punish_proxy(curr_proxy, e)
            raise e

    def enhance_response(self, res):
        import json as JSON

        def json():
            return JSON.loads(res.body.decode())

        def text():
            return res.body.decode('utf-8')

        res.json = json
        return res
Exemple #17
0
            self.write(response.get("response"))
            self.finish()
        else:
            try:
                serverIP, serverPort = getServerHost(serviceName)
            except Exception, e:
                self.set_status(500, "Internal Server Error!")
                logger.error(
                    e.message, 'line %d (/apps/api/handlers.py)' %
                    (sys._getframe().f_lineno - 2))
                self.finish()
            else:
                serverhost = "%s://%s:%s" % (protocol, serverIP, serverPort)
                AsyncHTTPClient().fetch(
                    HTTPRequest(url=serverhost + uri,
                                method=method,
                                headers=headers_o,
                                body=body_o,
                                validate_cert=False), self.on_response)

    @tornado.web.asynchronous
    def get(self):
        try:
            req = self.request
            protocol = req.protocol
            host = req.host
            method = req.method
            uri = req.uri
            urigroups = uri.split('/')
            if urigroups[1] == "errorpage":
                path = self.get_argument("path")
                sc = urigroups[2]
Exemple #18
0
 def __init__(self):
     if not hasattr(self, "client"):
         self.client = AsyncHTTPClient()
Exemple #19
0
 def __init__(self, url):
     self.url = url.strip('/')
     self.update_url = self.url + '/update'
     self.update_url_with_commit = self.url + '/update?commit=true'
     self.select_url = self.url + '/select'
     self.client = AsyncHTTPClient()
Exemple #20
0
CWD = os.path.abspath(os.getcwd())
if '.' not in sys.path and CWD not in sys.path:
    sys.path.insert(0, CWD)

if six.PY2 and sys.getdefaultencoding() != 'utf8':
    reload(sys)
    sys.setdefaultencoding('utf8')

from sklearn.base import BaseEstimator, TransformerMixin, ClassifierMixin
try:
    from tornado.curl_httpclient import CurlAsyncHTTPClient as AsyncHTTPClient
except:
    from tornado.httpclient import AsyncHTTPClient
from tornado import gen, ioloop

HTTP_CLIENT = AsyncHTTPClient()
THIS_DIRNAME = os.path.abspath(os.getcwd())
PORT = 8888
PINTU_FILE = './data/test.pintu__20180504.json'
PINTU = os.path.exists(PINTU_FILE)
cmd = {
    'sys': ['http://%s:%s/sys', [], {}],
    'ports': ['http://%s:%s/sys?op=ports', [], {
        'method': "POST",
        'body': ''
    }],
    'pintu': [
        'http://%s:%s/finup?model=pintu__2018051116', [], {
            'method': "POST",
            'body': open(PINTU_FILE).read() if PINTU else None
        }
 def async_get(self, url):
     client = AsyncHTTPClient()
     print('to connect ...', url)
     client.fetch(url, self.handle_async_reponse)