async def mock_async_get(status, text, lag): await asyncio.sleep(lag) if status == 'Timed out': raise asyncio.TimeoutError elif status == 'Invalid URL': raise aiohttp.InvalidURL() elif status == "Can't connect: blah": raise aiohttp.client_exceptions.ClientConnectionError('blah') return MockResponse(status, text)
async def spooled_data_from_url( url: str, headers: Dict[str, str] = {}, timeout: aiohttp.ClientTimeout = None, *, ssl: Optional[ssl.SSLContext] = None, ): """ Download `url` to a tempfile and yield `(bytesio, headers, charset)`. `bytesio` is backed by a temporary file: the file at path `bytesio.name` will exist within this context. Raise aiohttp.ClientError on generic error. Subclasses of note: * aiohttp.InvalidURL on invalid URL * aiohttp.ClientResponseError when HTTP status is not 200 * aiohttp.ClientPayloadError when server closes connection prematurely * aiohttp.ClientConnectionError (OSError) when connection fails Raise asyncio.TimeoutError when `timeout` seconds have expired. """ # aiohttp internally performs URL canonization before sending # request. DISABLE THIS: it breaks oauth and user's expectations. # # https://github.com/aio-libs/aiohttp/issues/3424 url = yarl.URL(url, encoded=True) # prevent magic if url.scheme not in ("http", "https"): raise aiohttp.InvalidURL("URL must start with http:// or https://") with tempfile_context(prefix="loadurl") as spool_path: async with aiohttp.ClientSession() as session: # raise aiohttp.ClientError, asyncio.TimeoutError async with session.get(url, headers=headers, timeout=timeout, ssl=ssl) as response: # raise aiohttp.ClientResponseError response.raise_for_status() headers = response.headers charset = response.charset with spool_path.open("wb") as spool: # raise aiohttp.ClientPayloadError async for blob in response.content.iter_chunked( _ChunkSize): spool.write(blob) yield spool_path.open("rb"), headers, charset
async def _update_model_from_server(model_server: EndpointConfig, agent: "Agent") -> None: """Load a zipped Rasa Core model from a URL and update the passed agent.""" if not is_url(model_server.url): raise aiohttp.InvalidURL(model_server.url) model_directory_and_fingerprint = await _pull_model_and_fingerprint( model_server, agent.fingerprint) if model_directory_and_fingerprint: model_directory, new_model_fingerprint = model_directory_and_fingerprint _load_and_set_updated_model(agent, model_directory, new_model_fingerprint) else: logger.debug(f"No new model found at URL {model_server.url}")
async def _update_model_from_server(model_server: EndpointConfig, agent: 'Agent') -> None: """Load a zipped Rasa Core model from a URL and update the passed agent.""" if not is_url(model_server.url): raise aiohttp.InvalidURL(model_server.url) model_directory = tempfile.mkdtemp() new_model_fingerprint = await _pull_model_and_fingerprint( model_server, model_directory, agent.fingerprint) if new_model_fingerprint: _load_and_set_updated_model(agent, model_directory, new_model_fingerprint) else: logger.debug("No new model found at " "URL {}".format(model_server.url))
async def get_next_page(session, csrf_token, insta_gis, query_hash, variables): """ Returns edges with media info and cursor for further query """ cookies = {'csrftoken': csrf_token} # Add cookies to given session (aiohttp.ClientSession) session.cookie_jar.update_cookies(cookies) # And make headers headers = prepare_headers(insta_gis) # Create url url = INSTAGRAM_URL + NEXT_PAGE_URL.format(query_hash=query_hash, variables=variables) response = await session.get(url, headers=headers) if response.status == 200: json_obj = await response.json() data = json_obj['data']['user']['edge_owner_to_timeline_media'] end_cursor = data['page_info']['end_cursor'] next_edges = data['edges'] return next_edges, end_cursor elif response.status == 404: raise aiohttp.InvalidURL(url=url)
async def session_get(url, *, timeout=None): # Silly mock HTTP GET computes the test's input based on its # expected output. This defeats the purpose of a test. row = results[results['url'] == url] if row.empty: raise ValueError('called with URL we did not expect') index = row.index[0] delay = response_times[index] await asyncio.sleep(delay) status = row.at[index, 'status'] text = row.at[index, 'html'] if status == 'Timed out': raise asyncio.TimeoutError elif status == 'Invalid URL': raise aiohttp.InvalidURL(url) elif status == "Can't connect: blah": raise aiohttp.client_exceptions.ClientConnectionError('blah') else: return MockResponse(int(status), text)
async def _update_model_from_server(model_server: EndpointConfig, agent: Agent) -> None: """Load a zipped Rasa Core model from a URL and update the passed agent.""" if not is_url(model_server.url): raise aiohttp.InvalidURL(model_server.url) with tempfile.TemporaryDirectory() as temporary_directory: try: new_fingerprint = await _pull_model_and_fingerprint( model_server, agent.fingerprint, temporary_directory ) if new_fingerprint: _load_and_set_updated_model(agent, temporary_directory, new_fingerprint) else: logger.debug(f"No new model found at URL {model_server.url}") except Exception: # skipcq: PYL-W0703 # TODO: Make this exception more specific, possibly print different log # for each one. logger.exception( "Failed to update model. The previous model will stay loaded instead." )
async def session_get(url, *, timeout=None): url = str(url) # undo yarl un-magick-ing # Silly mock HTTP GET computes the test's input based on its # expected output. This defeats the purpose of a test. row = results[results["url"] == url] if row.empty: raise ValueError("called with URL we did not expect") index = row.index[0] delay = response_times[index] await asyncio.sleep(delay) status = row.at[index, "status"] text = row.at[index, "html"] if status == "Timed out": raise asyncio.TimeoutError elif status == "Invalid URL": raise aiohttp.InvalidURL(url) elif status == "Can't connect: blah": raise aiohttp.client_exceptions.ClientConnectionError("blah") else: return MockResponse(int(status), text)
async def spooled_data_from_url(url: str, headers: Dict[str, str] = {}, timeout: aiohttp.ClientTimeout = None): """ Download `url` to a tempfile and yield `(bytesio, headers, charset)`. Raise aiohttp.ClientError on generic error. Subclasses of note: * aiohttp.InvalidURL on invalid URL * aiohttp.ClientResponseError when HTTP status is not 200 Raise asyncio.TimeoutError when `timeout` seconds have expired. """ # aiohttp internally performs URL canonization before sending # request. DISABLE THIS: it breaks oauth and user's expectations. # # https://github.com/aio-libs/aiohttp/issues/3424 url = yarl.URL(url, encoded=True) # prevent magic if url.scheme not in ("http", "https"): raise aiohttp.InvalidURL("URL must start with http:// or https://") with tempfile.TemporaryFile(prefix="loadurl") as spool: async with aiohttp.ClientSession() as session: async with session.get(url, headers=headers, timeout=timeout, raise_for_status=True) as response: response.raise_for_status() async for blob in response.content.iter_chunked(_ChunkSize): spool.write(blob) headers = response.headers charset = response.charset spool.seek(0) yield spool, headers, charset
async def scraper(account, session, ws): """ Connects to instagram account, parses it and returns list of media urls """ # Trying to connect notify await ws.send_json({'state': 'connection'}) url = f'{INSTAGRAM_URL}/{account}/' async with session.get(url, headers={"user-agent": USER_AGENT}) as response: if response.status == 200: # Successful connection and begining parse notifies await ws.send_json({'state': 'connection-completed'}) await ws.send_json({'state': 'parsing'}) html = await response.text() # Find info about account from script in html json_str = re.search(r'window._sharedData = (.*);</script>', html).group(1) page_info = json.loads(json_str) # Extract user from JSON user_ = page_info['entry_data']['ProfilePage'][0]['graphql'][ 'user'] user_id = user_['id'] # And other needful staff for further query end_cursor = user_['edge_owner_to_timeline_media']['page_info'][ 'end_cursor'] rhx_gis = page_info['rhx_gis'] # Extract csrf token from cookie csrf_token = response.cookies.get('csrftoken') # Send user avatar and amount of media avatar = user_['profile_pic_url'] total_media = user_['edge_owner_to_timeline_media']['count'] user_info = {'avatar': avatar, 'total_media': total_media} await ws.send_json({'info': ('user-info', user_info)}) # Get first 12 posts from home page edges = user_['edge_owner_to_timeline_media']['edges'] # and notify about that by simple increase parsing iteration await ws.send_json({'state': 'parsing-increase'}) # Get next posts to the end with a little bit tricky algorithm while end_cursor: # First of all make up right QUERY_PARAMETERS variables = QUERY_PARAMETERS.format(user_id=user_id, end_cursor=end_cursor) # which pass with rhx_gis to md5 for hash insta_gis = md5( (rhx_gis + ':' + variables).encode('utf-8')).hexdigest() # And then pass it and other staff to get_next_page function, # which returns edges with media info for extract urls and # end_cursor for next query next_edges, end_cursor = await get_next_page( session, csrf_token, insta_gis, QUERY_HASH, variables) edges.extend(next_edges) # Another increase parsing iteration notify await ws.send_json({'state': 'parsing-increase'}) # Take some break for decency await asyncio.sleep(2) # Parsing completed notify await ws.send_json({'state': 'parsing-completed'}) # Extract media urls urls = [edge['node']['display_url'] for edge in edges] return urls elif response.status == 404: raise aiohttp.InvalidURL(url=url)
async def test_async_open_Exception(self, web_monitor_app, response_mock): response_mock.get(url_list['url'], exception=aiohttp.InvalidURL('')) result = await web_monitor_app.async_open(url_list) assert 'Unknown' == result['err_status']