def _connect(self): headers = httputil.HTTPHeaders({'Content-Type': APPLICATION_JSON}) request = httpclient.HTTPRequest(url=self._url, connect_timeout=self.connect_timeout, request_timeout=self.request_timeout, headers=headers) ws_conn = PadchatSocketClientConnection( request, ping_interval=self.ping_interval) self._ws_connection = ws_conn ws_conn.connect_future.add_done_callback(self._connect_callback)
def open(self) -> None: # setup connection with target websocket server websocket_url = "ws://{host}:{port}{uri}".format( host=self.target_host, port=self.target_port, uri=self.request.uri ) request = httpclient.HTTPRequest(websocket_url) request.headers = self._add_request_headers(['sec-websocket-extensions']) websocket_connect(request, callback=self.open_callback, on_message_callback=self.on_client_message)
def mock_ok_response(): content = b'{"foo": "bar"}' stream = io.BytesIO(content) request = httpclient.HTTPRequest('/') headers = httputil.HTTPHeaders({ 'x-amzn-RequestId': '3840c615-0503-4a53-a2f6-07afa795a5d6', 'Content-Type': 'application/x-amz-json-1.0', 'Server': 'Server', 'Date': 'Tue, 06 Jun 2017 18:31:47 GMT' }) return httpclient.HTTPResponse(request, 200, headers, stream)
async def _get(self, api_method, args=None): url = self._get_api_url(api_method) if args is not None: args = util.normalize_args(args) url = thu.url_concat(url, args) link = thc.AsyncHTTPClient() request = thc.HTTPRequest(url) response = await link.fetch(request) return self._parse_response(response)
def tornado(): http_client = httpclient.HTTPClient() try: response = http_client.fetch( httpclient.HTTPRequest(url="http://localhost:8080/sleep", request_timeout=70)) return response.body except Exception as e: return e else: return 'else from tornado request'
def put(self, *args, **kwargs): client = httpclient.AsyncHTTPClient() h = {'Content-Type': 'application/json; charset=UTF-8'} request = httpclient.HTTPRequest(url='http://172.24.41.152:8888/pwds',method='POST',body=json.dumps(self.json_args),headers=h) print(self.json_args) response = client.fetch(request) k = str(uuid.uuid1().int) self.json_args['key'] = k yield self.application.db.insert(bucket, self.json_args) self.set_header('Content-Type', 'text/javascript;charset=utf-8') self.write(self.json_args)
def start(self): self.active = True headers = {"Content-Type": "text/event-stream"} req = httpclient.HTTPRequest(url=self.url, method='GET', headers=headers, request_timeout=0, streaming_callback=self._on_stream) self.client.fetch(req, self._on_request) self._io_loop.start(False)
def clone_request(request, url, validate_cert=True): """Create and return an httpclient.HTTPRequest from the given request. The passed url is used for the new request. The given request object is usually an instance of tornado.httpserver.HTTPRequest. """ return httpclient.HTTPRequest(url, body=request.body or None, headers=request.headers, method=request.method, validate_cert=validate_cert)
def websocket_connect(url, io_loop=None, callback=None, connect_timeout=None, on_message_callback=None, compression_options=None): """Client-side websocket support. Takes a url and returns a Future whose result is a `WebSocketClientConnection`. ``compression_options`` is interpreted in the same way as the return value of `.WebSocketHandler.get_compression_options`. The connection supports two styles of operation. In the coroutine style, the application typically calls `~.WebSocketClientConnection.read_message` in a loop:: conn = yield websocket_connection(loop) while True: msg = yield conn.read_message() if msg is None: break # Do something with msg In the callback style, pass an ``on_message_callback`` to ``websocket_connect``. In both styles, a message of ``None`` indicates that the connection has been closed. .. versionchanged:: 3.2 Also accepts ``HTTPRequest`` objects in place of urls. .. versionchanged:: 4.1 Added ``compression_options`` and ``on_message_callback``. The ``io_loop`` argument is deprecated. """ if io_loop is None: io_loop = IOLoop.current() if isinstance(url, httpclient.HTTPRequest): assert connect_timeout is None request = url # Copy and convert the headers dict/object (see comments in # AsyncHTTPClient.fetch) request.headers = httputil.HTTPHeaders(request.headers) else: request = httpclient.HTTPRequest(url, connect_timeout=connect_timeout) request = httpclient._RequestProxy(request, httpclient.HTTPRequest._DEFAULTS) conn = WebSocketClientConnection(io_loop, request, on_message_callback=on_message_callback, compression_options=compression_options) if callback is not None: io_loop.add_future(conn.connect_future, callback) return conn.connect_future
def raw_fetch(self, headers, body): conn = RawRequestHTTPConnection( self.io_loop, self.http_client, httpclient.HTTPRequest(self.get_url("/")), self.stop) conn.set_request( b("\r\n").join(headers + [utf8("Content-Length: %d\r\n" % len(body))]) + b("\r\n") + body) response = self.wait() response.rethrow() return response
def communication(self, request): data = request.SerializeToString() http_client = httpclient.HTTPClient() url = self.baseUrl + self.handlerName req = httpclient.HTTPRequest(url, method='POST', body=data) ret = http_client.fetch(req) response = eval('ProtobufAPI.' + self.handlerName + 'Response()') response.ParseFromString(ret.body) return response
def test_process_bogus_response(self): content = b'Slow Down' stream = io.BytesIO(content) request = httpclient.HTTPRequest('/') headers = httputil.HTTPHeaders( {'x-amzn-RequestId': '3840c615-0503-4a53-a2f6-07afa795a5d6', 'Date': 'Tue, 06 Jun 2017 18:31:47 GMT'}) response = httpclient.HTTPResponse(request, 503, headers, stream) error = httpclient.HTTPError(503, 'Bad Request', response) with self.client_with_default_creds('s3') as obj: self.assertEqual(obj._process_error(error), (False, None))
def fetch_request(url, callback, **kwargs): # type: (str, Any, **Any) -> Generator[Callable[..., Any], Any, None] # use large timeouts to handle polling requests req = httpclient.HTTPRequest(url, connect_timeout=240.0, request_timeout=240.0, **kwargs) client = httpclient.AsyncHTTPClient() # wait for response response = yield gen.Task(client.fetch, req) callback(response)
def translator_future(self, translate_from, translate_to): uri = "wss://dev.microsofttranslator.com/speech/translate?from={0}&to={1}&api-version=1.0".format( translate_from[:2], translate_to) request = httpclient.HTTPRequest( uri, headers={ 'Authorization': 'Bearer ' + azure_auth_client.get_access_token(), }) return websocket.websocket_connect( request, on_message_callback=self.speech_to_translation_completed)
def go(self): print u"子爬虫 %s 启动" % self.name while not self.search_done or self.queue.qsize() > 0: # 获取下一个需要爬取的专利详情 task = yield self.queue.get() if task is None: continue print u"%s 开始爬取 %s[%s]" % (self.name, task.r_url, task.retries) country = task.country url = self.make_url(task.r_url) req = httpclient.HTTPRequest( url=url, headers={'Cookie': self.search.cookies}, follow_redirects=False) res = yield self.client.fetch(req, raise_error=False) if res.code != 200: print u"%s 获取 %s 失败, 错误码为 %s" % (self.name, task.r_url, res.code) # 如果失败了重新加入队列,并且将这个worker睡眠五秒钟 task.retries += 1 yield self.queue.put(task) yield gen.sleep(5) continue parser = DetailResultParser(content=res.body, url=url) p, created = self.get_or_create_patent(parser.get_p_id()) p.country = country if created: try: parser.analyze()(p) except AttributeError as e: if self.come_to_validate_image(parser.soup): yield gen.sleep(60) yield self.queue.put(task) continue self.write_log_file(parser.soup.prettify("utf8")) ioloop.IOLoop.current().stop() raise e self.session.add(p) parser.debug(self.name) self.session.commit() citations = parser.cited_patents() if len(citations) > 0: print u"======%s 发现了%s条引用数据" % (self.name, len(citations)) for url in citations: # 我们来同步的将本页面下的引用的专利加进来 # 从详情页面拿到的引用链接中最后编号是p_id p_id = url.split("/")[-1] if self.patent_exists(p_id): continue yield self.fetch_citation(url, patent=p) print u"%s 完成爬取 %s[%s]" % (self.name, task.r_url, task.retries) self.queue.task_done()
def download_pages(): global i,json_urls,total,http_client i = 0 http_client = httpclient.AsyncHTTPClient(force_instance=True,max_clients=threads) for json_url in json_urls: request = httpclient.HTTPRequest(json_url.strip(),method='GET',connect_timeout=4,request_timeout=4) http_client.fetch(request,handle_json_response) i += 1 total = i print("Starting Download Of Pages...") ioloop.IOLoop.instance().start()
async def asynchronous_fetch(url, sleep=0): await gen.sleep(sleep) print(url) try: response = await http_client.fetch( httpclient.HTTPRequest(url, headers=headers, request_timeout=MAX_REQUEST_TIME)) return response.body except Exception as e: print("Unexpected error:", str(e), sys.exc_info()[0]) return None
def check_user(self, username): if not re.match(r'^[-_a-zA-Z0-9]+$', username): return self.running += 1 req = httpclient.HTTPRequest( ("http://community.webshots.com/user/%s/stats" % username), connect_timeout=10, request_timeout=30, use_gzip=True, user_agent=USER_AGENT) req.username = username self.http_client.fetch(req, self.handle_response)
async def send_create(cookies, data): client = httpclient.AsyncHTTPClient() httpmethod = "POST" url = "https://www.kijiji.ca/p-submit-ad.html" nativeCookies = { 'Content-Type': 'application/x-www-form-urlencoded', "Cookie": generate_cookie_str(cookies) } request = httpclient.HTTPRequest(url, method=httpmethod, body=data, headers=nativeCookies) response = await client.fetch(request, handle_create) if response.effective_url == "https://www.kijiji.ca/p-submit-ad.html": return (False, None) adId = urllib.parse.parse_qs(urllib.parse.urlparse(response.effective_url).query)['adId'][0] return (True, adId)
def make_request(self, url, method, body=None): headers = httputil.HTTPHeaders() headers.add('Accept', 'application/json') if method == "GET": request = httpclient.HTTPRequest( url = url, method = method, connect_timeout = 3, ) elif body is not None: headers.add('Content-type', 'application/x-www-form-urlencoded') if isinstance(body, dict): body = urllib.urlencode(body) request = httpclient.HTTPRequest( url = url, method = method, connect_timeout = 3, body = body ) return request
def httppost(url, **kwgs): httpClient = None try: httpClient = httpclient.HTTPClient() httpReq = httpclient.HTTPRequest(url=url, method="POST") httpResp = httpClient.fetch(httpReq) print(httpResp.body) except httpclient.HTTPError as e: print(e) finally: if httpClient != None: httpClient.close()
def readability_parser(url): """使用readability的API 教程: http://www.readability.com/developers/api/parser """ request = httpclient.HTTPRequest( "https://readability.com/api/content/v1/parser?token=7f579fc61973e200632c9e43ff2639234817fbb3&url=" + url, method='GET', user_agent= 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.6 Safari/537.36' ) return httpclient.AsyncHTTPClient().fetch(request)
def search_for_company(company, skip=0): """ Search for the given company name, save the result to the database through ORM of django 返回是一个Turple: (是否完成,完成的数量,未完成原因) """ print u'->开始搜索:%s' % company.name fetched_patent = skip start_url = base_url % company.name.strip() # cookie = 'patentids=; domain=.soopat.com; expires=%s GMT; path=/' %\ # (timezone.now() + datetime.timedelta(seconds=60)).strftime('%a, %d-%b-%Y %H-%M-%S') cookie = 'lynx-randomcodestring=; patentids=' client = httpclient.AsyncHTTPClient() while True: if fetched_patent > 0: request_url = start_url + '&PatentIndex=%s' % fetched_patent else: request_url = start_url print u'开始发送访问请求:%s' % request_url print 'cookie::' + cookie request = httpclient.HTTPRequest(url=request_url, headers={'Cookie': cookie, 'User-Agent': random.choice(user_agents)}, follow_redirects=False,) response = yield client.fetch(request, raise_error=False) if response.code == 200: new_patents = yield parse_data_for_html(response.body, company) if 0 <= new_patents < 10: if new_patents == 0: print u'未能发现新的专利' break elif new_patents == -1: print u'正在退出搜索: %s' % fetched_patent # 如果遇到了验证码问题,返回进行休眠,通过返回告知上层目前进度 raise gen.Return((False, fetched_patent, 'authenticate code')) fetched_patent += new_patents sleep_time = random.uniform(2, 10) print '正常工作间隔%s' % sleep_time time.sleep(sleep_time) print response.headers cookie = response.headers.get('Set-Cookie', '') elif response.code == 500: print '遇到500错误,完成对当前条目的搜索' break else: print '出现其他返回状态代码:%s -> %s' % (response.code, response.headers.get('Location', '')) print response.body time.sleep(10) # 出现其他错误放弃 client.close() raise gen.Return((False, 0, response.code)) client.close() raise gen.Return((True, fetched_patent, None))
def connect(self, url): headers = httputil.HTTPHeaders({ 'Content-Type': self.config['WEBSOCKET_CLIENT']['APPLICATION_JSON'] }) request = httpclient.HTTPRequest(url=url, connect_timeout=self.connect_timeout, request_timeout=self.request_timeout, headers=headers) ws_conn = websocket.WebSocketClientConnection(ioloop.IOLoop.current(), request) ws_conn.connect_future.add_done_callback(self._connect_callback)
async def start_websocket_connection(): self.log.info( 'Trying to establish websocket connection to {}'.format( client_uri)) self._record_activity() request = httpclient.HTTPRequest(url=client_uri, headers=headers) self.ws = await pingable_ws_connect(request=request, on_message_callback=message_cb, on_ping_callback=ping_cb) self._record_activity() self.log.info( 'Websocket connection established to {}'.format(client_uri))
def _request(self, u, m="HEAD", h=None, b=None): request = httpclient.HTTPRequest( url=u, method=m, headers=h, body=b, connect_timeout=self.context.config.HTTP_LOADER_CONNECT_TIMEOUT, request_timeout=self.context.config.HTTP_LOADER_REQUEST_TIMEOUT, follow_redirects=self.context.config.HTTP_LOADER_FOLLOW_REDIRECTS, max_redirects=self.context.config.HTTP_LOADER_MAX_REDIRECTS) return request
def _load_remote_data(self, url): client = httpclient.HTTPClient() kwargs = { 'method': 'GET', 'validate_cert': self.session.verify, 'client_key': self._get_client_key(), 'client_cert': self._get_client_cert(), 'request_timeout': self.load_timeout } http_req = httpclient.HTTPRequest(url, **kwargs) response = client.fetch(http_req) return response.body
async def post(self): action = self.get_body_argument('move') logger.info('[DIRECTION]: %s' % action) http_client = httpclient.AsyncHTTPClient() args = urllib.parse.urlencode({'move': action}) request = httpclient.HTTPRequest(url=ROBOT_ADDR, method='POST', body=args) res = await http_client.fetch(request) self.finish()
def connect(self, url): """Connect to the server. :param str url: server URL. """ headers = httputil.HTTPHeaders({'Content-Type': APPLICATION_JSON}) request = httpclient.HTTPRequest(url=url, connect_timeout=self.connect_timeout, request_timeout=self.request_timeout, headers=headers) ws_conn = websocket.WebSocketClientConnection(request) ws_conn.connect_future.add_done_callback(self._connect_callback)
async def handle_request(self, body): body = json.loads(body) body['client'] = 'web' body['user_id'] = '' request = TextMessageRequest(body) url = os.environ.get('API_APP_MESSAGE') http_client = httpclient.AsyncHTTPClient() http_req = httpclient.HTTPRequest(url, method='POST') http_req.headers = {'Access-Token': os.environ['API_APP_ACCESS_TOKEN']} http_req.body = json.dumps(body).encode() raw_response = await http_client.fetch(http_req) return self.handle_response(raw_response)