def aiohttp_session_cookies(self): ''' Returns an aiohttp compatible list of relevant session cookies. ''' output_cookie = SimpleCookie() for cookie in self.session_cookies: converted_cookie = self.cookiejar_cookie_to_simplecookie(cookie) output_cookie.update(converted_cookie) return output_cookie
async def open( self, path: str, *, method: str = "GET", headers: Optional[Union[dict, CIMultiDict]] = None, data: Any = None, form: Optional[dict] = None, query_string: Optional[dict] = None, json: Any = sentinel, scheme: str = "http", cookies: Optional[dict] = None, stream: bool = False, allow_redirects: bool = True, ): """Open a request to the app associated with this client. Arguments: path The path to request. If the query_string argument is not defined this argument will be partitioned on a '?' with the following part being considered the query_string. method The method to make the request with, defaults to 'GET'. headers Headers to include in the request. data Raw data to send in the request body or async generator form Data to send form encoded in the request body. query_string To send as a dictionary, alternatively the query_string can be determined from the path. json Data to send json encoded in the request body. scheme The scheme to use in the request, default http. cookies Cookies to send in the request instead of cookies in TestClient.cookie_jar stream Return the response in streaming instead of buffering allow_redirects If set to True follows redirects Returns: The response from the app handling the request. """ input_queue: asyncio.Queue[dict] = asyncio.Queue() output_queue: asyncio.Queue[dict] = asyncio.Queue() headers, path, query_string_bytes = make_test_headers_path_and_query_string( self.application, path, headers, query_string) if [json is not sentinel, form is not None, data is not None ].count(True) > 1: raise ValueError( "Test args 'json', 'form', and 'data' are mutually exclusive") request_data = b"" if isinstance(data, str): request_data = data.encode("utf-8") elif isinstance(data, bytes): request_data = data if json is not sentinel: request_data = dumps(json).encode("utf-8") headers["Content-Type"] = "application/json" if form is not None: request_data = urlencode(form).encode("utf-8") headers["Content-Type"] = "application/x-www-form-urlencoded" if cookies is None: # use TestClient.cookie_jar cookie_jar = self.cookie_jar else: cookie_jar = SimpleCookie(cookies) if cookie_jar and cookie_jar.output(header=""): headers.add("Cookie", cookie_jar.output(header="")) flat_headers: List[Tuple] = [(bytes(k.lower(), "utf8"), bytes(v, "utf8")) for k, v in headers.items()] scope = { "type": "http", "http_version": "1.1", "asgi": { "version": "3.0" }, "method": method, "scheme": scheme, "path": path, "query_string": query_string_bytes, "root_path": "", "headers": flat_headers, } create_monitored_task( self.application(scope, input_queue.get, output_queue.put), output_queue.put_nowait, ) send = input_queue.put_nowait receive_or_fail = partial(receive, output_queue, timeout=self.timeout) # Send request if inspect.isasyncgen(data): async for is_last, body in is_last_one(data): send({ "type": "http.request", "body": body, "more_body": not is_last }) else: send({"type": "http.request", "body": request_data}) response = Response(stream, receive_or_fail, send) # Receive response start message = await self.wait_response(receive_or_fail, "http.response.start") response.status_code = message["status"] response.headers = CIMultiDict([(k.decode("utf8"), v.decode("utf8")) for k, v in message["headers"]]) # Receive initial response body message = await self.wait_response(receive_or_fail, "http.response.body") response.raw.write(message["body"]) response._more_body = message.get("more_body", False) # Consume the remaining response if not in stream if not stream: bytes_io = BytesRW() bytes_io.write(response.raw.read()) async for chunk in response: bytes_io.write(chunk) response.raw = bytes_io response._content = bytes_io.read() response._content_consumed = True if cookie_jar is not None: cookies = SimpleCookie() for c in response.headers.getall("Set-Cookie", ""): cookies.load(c) response.cookies = requests.cookies.RequestsCookieJar() response.cookies.update(cookies) cookie_jar.update(cookies) if allow_redirects and response.is_redirect: path = response.headers["location"] return await self.get(path) else: return response
class Client(object): """ A class that can act as a client for testing purposes. It allows the user to compose GET and POST requests, and obtain the response that the server gave to those requests. The server Response objects are annotated with the details of the contexts and templates that were rendered during the process of serving the request. Client objects are stateful - they will retain cookie (and thus session) details for the lifetime of the Client instance. This is not intended as a replacement for Twill/Selenium or the like - it is here to allow testing against the contexts and templates produced by a view, rather than the HTML rendered to the end-user. """ def __init__(self, cookies=None, handler_class=LocalHandler, **defaults): self.handler = handler_class() self.defaults = {'SERVER_NAME': 'localserver'} self.defaults.update(defaults) self.cookies = SimpleCookie(cookies or {}) self.exc_info = None self.errors = StringIO() def _session(self): """ Obtains the current session variables. """ if 'django.contrib.sessions' in settings.INSTALLED_APPS: engine = import_module(settings.SESSION_ENGINE) cookie = self.cookies.get(settings.SESSION_COOKIE_NAME, None) if cookie: return engine.SessionStore(cookie.value) return {} session = property(_session) def request(self, **request): """ The master request method. Composes the environment dictionary and passes to the handler, returning the result of the handler. Assumes defaults for the query environment, which can be overridden using the arguments to the request. """ environ = { 'HTTP_COOKIE': self.cookies.output(header='', sep='; '), 'PATH_INFO': '/', 'QUERY_STRING': '', 'REMOTE_ADDR': '127.0.0.1', 'REQUEST_METHOD': 'GET', 'SCRIPT_NAME': '', 'SERVER_NAME': 'testserver', 'SERVER_PORT': '80', 'SERVER_PROTOCOL': 'HTTP/1.1', 'wsgi.version': (1, 0), 'wsgi.url_scheme': 'http', 'wsgi.errors': self.errors, 'wsgi.multiprocess': True, 'wsgi.multithread': False, 'wsgi.run_once': False, } environ.update(self.defaults) environ.update(request) try: response = self.handler(environ) except TemplateDoesNotExist as e: # If the view raises an exception, Django will attempt to show # the 500.html template. If that template is not available, # we should ignore the error in favor of re-raising the # underlying exception that caused the 500 error. Any other # template found to be missing during view error handling # should be reported as-is. if e.args != ('500.html', ): raise # Update persistent cookie data. if response.cookies: self.cookies.update(response.cookies) return response def get(self, path, data={}, follow=False, **extra): """ Requests a response from the server using GET. """ parsed = urlparse(path) r = { 'CONTENT_TYPE': 'text/html; charset=utf-8', 'PATH_INFO': unquote(parsed[2]), 'QUERY_STRING': urlencode(data, doseq=True) or parsed[4], 'REQUEST_METHOD': 'GET', 'wsgi.input': FakePayload('') } r.update(extra) response = self.request(**r) if follow: response = self._handle_redirects(response, **extra) return response def post(self, path, data={}, content_type=MULTIPART_CONTENT, follow=False, **extra): """ Requests a response from the server using POST. """ if content_type is MULTIPART_CONTENT: post_data = encode_multipart(BOUNDARY, data) else: # Encode the content so that the byte representation is correct. match = CONTENT_TYPE_RE.match(content_type) if match: charset = match.group(1) else: charset = settings.DEFAULT_CHARSET post_data = smart_str(data, encoding=charset) parsed = urlparse(path) r = { 'CONTENT_LENGTH': len(post_data), 'CONTENT_TYPE': content_type, 'PATH_INFO': urllib.unquote(parsed[2]), 'QUERY_STRING': parsed[4], 'REQUEST_METHOD': 'POST', 'wsgi.input': FakePayload(post_data), } r.update(extra) response = self.request(**r) if follow: response = self._handle_redirects(response, **extra) return response def head(self, path, data={}, follow=False, **extra): """ Request a response from the server using HEAD. """ parsed = urlparse(path) r = { 'CONTENT_TYPE': 'text/html; charset=utf-8', 'PATH_INFO': urllib.unquote(parsed[2]), 'QUERY_STRING': urlencode(data, doseq=True) or parsed[4], 'REQUEST_METHOD': 'HEAD', 'wsgi.input': FakePayload('') } r.update(extra) response = self.request(**r) if follow: response = self._handle_redirects(response, **extra) return response def options(self, path, data={}, follow=False, **extra): """ Request a response from the server using OPTIONS. """ parsed = urlparse(path) r = { 'PATH_INFO': urllib.unquote(parsed[2]), 'QUERY_STRING': urlencode(data, doseq=True) or parsed[4], 'REQUEST_METHOD': 'OPTIONS', 'wsgi.input': FakePayload('') } r.update(extra) response = self.request(**r) if follow: response = self._handle_redirects(response, **extra) return response def put(self, path, data={}, content_type=MULTIPART_CONTENT, follow=False, **extra): """ Send a resource to the server using PUT. """ if content_type is MULTIPART_CONTENT: post_data = encode_multipart(BOUNDARY, data) else: post_data = data # Make `data` into a querystring only if it's not already a string. If # it is a string, we'll assume that the caller has already encoded it. query_string = None if not isinstance(data, basestring): query_string = urlencode(data, doseq=True) parsed = urlparse(path) r = { 'CONTENT_LENGTH': len(post_data), 'CONTENT_TYPE': content_type, 'PATH_INFO': urllib.unquote(parsed[2]), 'QUERY_STRING': query_string or parsed[4], 'REQUEST_METHOD': 'PUT', 'wsgi.input': FakePayload(post_data), } r.update(extra) response = self.request(**r) if follow: response = self._handle_redirects(response, **extra) return response def delete(self, path, data={}, follow=False, **extra): """ Send a DELETE request to the server. """ parsed = urlparse(path) r = { 'PATH_INFO': urllib.unquote(parsed[2]), 'QUERY_STRING': urlencode(data, doseq=True) or parsed[4], 'REQUEST_METHOD': 'DELETE', 'wsgi.input': FakePayload('') } r.update(extra) response = self.request(**r) if follow: response = self._handle_redirects(response, **extra) return response def _handle_redirects(self, response, **extra): "Follows any redirects by requesting responses from the server using GET." response.redirect_chain = [] while response.status_code in (301, 302, 303, 307): url = response['Location'] scheme, netloc, path, query, fragment = urlsplit(url) redirect_chain = response.redirect_chain redirect_chain.append((url, response.status_code)) if scheme: extra['wsgi.url_scheme'] = scheme # The test client doesn't handle external links, # but since the situation is simulated in test_client, # we fake things here by ignoring the netloc portion of the # redirected URL. response = self.get(path, QueryDict(query), follow=False, **extra) response.redirect_chain = redirect_chain # Prevent loops if response.redirect_chain[-1] in response.redirect_chain[0:-1]: break return response
class WebTest(tornado.testing.AsyncHTTPTestCase): def __init__(self, *rest): self.cookies = SimpleCookie() tornado.testing.AsyncHTTPTestCase.__init__(self, *rest) def setUp(self): super(WebTest, self).setUp() # create database conn = functions.DB.conn(config.DB["username"], config.DB["password"], "") x = conn.cursor() x.execute( "DROP DATABASE IF EXISTS `{db}`; CREATE DATABASE `{db}`;".format( db=config.DB["db"] ) ) x.close() conn.close() # create tables conn = functions.DB.conn( config.DB["username"], config.DB["password"], config.DB["db"] ) functions.DB.execute_sql_in_file(conn, config.ROOT + "/sql/schema.sql") conn.close() def get_app(self): client.server_settings["debug"] = False client.server_settings["xsrf_cookies"] = False return tornado.web.Application( client.web_urls.www_urls, **client.server_settings ) def _update_cookies(self, headers): cs = str(headers["Set-Cookie"]) cs = escape.native_str(cs) cookies = cs.split(",") for cookie in cookies: self.cookies.update(SimpleCookie(cookie)) def fetch(self, url, *r, **kw): if "follow_redirects" not in kw: kw["follow_redirects"] = False header = {"Cookie": ""} for cookie in self.cookies: header["Cookie"] += cookie + "=" + self.cookies[cookie].value + "; " resp = tornado.testing.AsyncHTTPTestCase.fetch( self, url, headers=header, *r, **kw ) self._update_cookies(resp.headers) return resp def post(self, url, data, *r, **kw): body = urllib.parse.urlencode(data) return self.fetch(url, body=body, method="POST", *r, **kw) def get_cookie(self, name): cookie = decode_signed_value( config.COOKIE_SECRET, name, self.cookies[name].value ) if cookie: return cookie.decode() return None
class Chinaso(SearchEngine): name = 'Chinaso' fake_url = True source_importance = 2 page_size = 10 def __init__(self): self.cookies = SimpleCookie() asyncio.ensure_future(self.update_cookies()) def search_url(self, query): return 'http://www.chinaso.com/search/pagesearch.htm?q={}'.format( quote(query)) def page_requests(self, query, **kwargs): max_records = kwargs.get('data_source_results') if max_records is None: max_records = self.page_size for num in range(0, max_records, self.page_size): url = 'http://www.chinaso.com/search/pagesearch.htm?q={}&page={}&wd={}'.format( quote(query), num // self.page_size + 1, quote(query)) yield HttpRequest(url) def before_request(self, request): self.set_cookie_header(request, self.cookies) def after_request(self, response): self.cookies.update(self.get_cookies_in_response(response)) def extract_results(self, response): selector = Selector(response.text) for item in selector.css('li.reItem'): a = item.css('h2>a') if len(a) <= 0: continue title = a[0].text.strip() text = None div = item.css('div.reNewsWrapper') if len(div) > 0: text = div[0].text.strip().split('\n')[0] url = urljoin('http://www.chinaso.com/search/', item.css('h2>a')[0].attr('href').strip()) if text is not None: yield {'title': title, 'text': text, 'url': url} async def update_cookies(self): while True: try: url = 'http://www.chinaso.com/search/pagesearch.htm?q={}'.format( quote('中国搜索')) try: req = HttpRequest(url, allow_redirects=False) await self.extension.handle_request(req) resp = await self.downloader.fetch(req) except HttpError as e: resp = e.response cookies = self.get_cookies_in_response(resp) self.cookies.update(cookies) except Exception as e: log.warning('Failed to update cookies: %s', e) finally: await asyncio.sleep(5 * 60)
class Baidu(SearchEngine): name = 'Baidu' fake_url = True source_importance = 2 page_size = 10 def __init__(self): self.cookies = SimpleCookie() asyncio.ensure_future(self.update_cookies()) def search_url(self, query): return 'https://www.baidu.com/s?wd={}'.format(quote(query)) def page_requests(self, query, **kwargs): max_records = kwargs.get('data_source_results') recent_days = kwargs.get('recent_days') site = kwargs.get('site') if max_records is None: max_records = self.page_size if site: query = query + " site:" + site if recent_days: today = datetime.now() if recent_days == 1: start = today + timedelta(days=-1) elif recent_days == 7: start = today + timedelta(days=-7) elif recent_days == 30: start = today + timedelta(days=-30) else: raise ValueError('recent_days: {}'.format(recent_days)) start, end = int(time.mktime(start.timetuple())), int( time.mktime(today.timetuple())) raw_url = 'http://www.baidu.com/s?wd={}&gpc=stf%3D{}%2C{}|stftype%3D1'.format( quote(query), start, end) else: raw_url = 'http://www.baidu.com/s?wd={}'.format(quote(query)) for num in range(0, max_records, self.page_size): url = '{}&pn={}'.format(raw_url, num) yield HttpRequest(url) def extract_results(self, response): selector = Selector(response.text) for item in selector.css('div.result'): title = item.css('h3>a')[0].text.strip() text = None abstract = item.css('div.c-abstract') if len(abstract) > 0: text = abstract[0].text.strip() url = item.css('h3>a')[0].attr('href').strip() if text is not None: yield {'title': title, 'text': text, 'url': url} def before_request(self, request): self.set_cookie_header(request, self.cookies) def after_request(self, response): self.cookies.update(self.get_cookies_in_response(response)) async def update_cookies(self): """ 避免被BAN,定时通过主页刷新Cookie """ while True: try: req = HttpRequest('http://www.baidu.com/') await self.extension.handle_request(req) resp = await self.downloader.fetch(req) self.cookies.update(self.get_cookies_in_response(resp)) except Exception as e: log.warning('Failed to update cookies: %s', e) finally: await asyncio.sleep(5 * 60)
class So(SearchEngine): name = 'So' fake_url = False source_importance = 1 page_size = 10 def __init__(self): self.cookies = SimpleCookie() asyncio.ensure_future(self.update_cookies()) def search_url(self, query): return 'https://www.so.com/s?q={}'.format(quote(query)) def page_requests(self, query, **kwargs): max_records = kwargs.get('data_source_results') recent_days = kwargs.get('recent_days') site = kwargs.get('site') if site: query = query + " site:" + site if recent_days: if recent_days == 1: adv_t = 'd' elif recent_days == 7: adv_t = 'w' elif recent_days == 30: adv_t = 'm' else: raise ValueError('recent_days: {}'.format(recent_days)) raw_url = 'https://www.so.com/s?q={}&adv_t={}'.format( quote(query), adv_t) else: raw_url = 'https://www.so.com/s?q={}'.format(quote(query)) if max_records is None: max_records = self.page_size for num in range(0, max_records, self.page_size): url = '{}&pn={}'.format(raw_url, num // self.page_size + 1) yield HttpRequest(url) def before_request(self, request): self.set_cookie_header(request, self.cookies) def after_request(self, response): self.cookies.update(self.get_cookies_in_response(response)) def extract_results(self, response): selector = Selector(response.text) for item in selector.css('li.res-list'): title = item.css('h3>a')[0].text.strip() text = None res_desc = item.css('p.res-desc') if len(res_desc) > 0: text = res_desc[0].text.strip() else: res_rich = item.css('div.res-rich') if len(res_rich) > 0: text = res_rich[0].text.strip() h3_a = item.css('h3>a')[0] url = h3_a.attr('data-url') if not url: url = h3_a.attr('href').strip() if text is not None: yield {'title': title, 'text': text, 'url': url} async def update_cookies(self): """ 避免被BAN,定时通过主页刷新Cookie """ while True: try: req = HttpRequest('https://www.so.com/') await self.extension.handle_request(req) resp = await self.downloader.fetch(req) self.cookies.update(self.get_cookies_in_response(resp)) except Exception as e: log.warning('Failed to update cookies: %s', e) finally: await asyncio.sleep(5 * 60)
class Sogou(SearchEngine): name = 'Sogou' fake_url = True source_importance = 2 page_size = 20 def __init__(self): self.cookies = SimpleCookie() self.cookies['com_sohu_websearch_ITEM_PER_PAGE'] = str(self.page_size) asyncio.ensure_future(self.update_cookies()) def search_url(self, query): return 'https://www.sogou.com/web?query={}'.format(quote(query)) def page_requests(self, query, **kwargs): """ tsn=1&sourceid=inttime_day tsn=2&sourceid=inttime_week tsn=3&sourceid=inttime_month 北京+site%3A*.gov.cn """ max_records = kwargs.get('data_source_results') recent_days = kwargs.get('recent_days') site = kwargs.get('site') if site: query = query + " site:" + site else: query = query if recent_days: if recent_days == 1: tsn, sourceid = 1, "inttime_day" elif recent_days == 7: tsn, sourceid = 2, "inttime_week" elif recent_days == 30: tsn, sourceid = 3, "inttime_month" else: raise ValueError('recent_days: {}'.format(recent_days)) raw_url = 'https://www.sogou.com/web?query={}&tsn={}&sourceid={}'.format(quote(query), tsn, sourceid) else: raw_url = 'https://www.sogou.com/web?query={}'.format(quote(query)) if max_records is None: max_records = self.page_size for num in range(0, max_records, self.page_size): url = '{}&page={}&ie=utf8'.format(raw_url, num // self.page_size + 1) yield HttpRequest(url) def before_request(self, request): self.set_cookie_header(request, self.cookies) def after_request(self, response): self.cookies.update(self.get_cookies_in_response(response)) def extract_results(self, response): selector = Selector(response.text) for item in selector.css('div.vrwrap,div.rb'): h = item.css('h3>a') if len(h) <= 0: continue title = h[0].text.strip() text = None div_ft = item.css('div.ft') if len(div_ft) > 0: text = div_ft[0].text.strip() else: p_str = item.css('p.str_info') if len(p_str) > 0: text = p_str[0].text.strip() url = urljoin('https://www.sogou.com/', item.css('h3>a')[0].attr('href').strip()) if text is not None: yield {'title': title, 'text': text, 'url': url} async def update_cookies(self): """ 避免被BAN,定时通过主页刷新Cookie """ while True: try: req = HttpRequest('https://www.sogou.com/') await self.extension.handle_request(req) resp = await self.downloader.fetch(req) self.cookies.update(self.get_cookies_in_response(resp)) except Exception as e: log.warning('Failed to update cookies: %s', e) finally: await asyncio.sleep(5 * 60)