async def get_paste(link_id: int, session: aiohttp.client.ClientSession, url: str): serial = get_serial(link_id) async with session.post(f"https://publisher.linkvertise.com/api/v1/redirect/link/{url}/paste", json={ "serial": serial }, headers=headers()) as resp: json_content: dict = await resp.json() return json_content
async def fetch(url: str, session: aiohttp.client.ClientSession) -> dict: """ Asynchronously fetch a url, using specified ClientSession. """ async with session.get(url) as response: resp = await response.json() return resp
async def redirect_gatherer(url: str, session: aiohttp.client.ClientSession): async with session.get(url, headers={"User-Agent": get_random_user_agent()}) as resp: history = [str(x.url) for x in resp.history] history.append(str(resp.url)) return history
async def get_req_stats( pkg_req: requirements.requirement.Requirement, aiohttp_session: aiohttp.client.ClientSession, url: str = "https://pypi.org/pypi/{}/json", ) -> Dict: pkg_url = url.format(pkg_req.name) async with aiohttp_session.get(pkg_url) as response: try: pkg_json = await response.json() except aiohttp.client_exceptions.ContentTypeError: LOG.error(f"{pkg_req} does not return JSON ...") return {} version = str(pkg_req.specs[0][1]) try: version_json = pkg_json["releases"][version].pop() except KeyError: LOG.error(f"{pkg_req} version does not exist in JSON ...") return {} upload_dt = datetime.strptime(version_json["upload_time"], "%Y-%m-%dT%H:%M:%S") dt_now = datetime.now() return { "name": pkg_req.name, "latest": version == pkg_json["info"]["version"], "released_days_ago": (dt_now - upload_dt).days, "upload_time": version_json["upload_time"], "version": version, }
async def update_webrisk(url: str, session: aiohttp.client.ClientSession, pool: asyncpg.pool.Pool): log.info(f"Fetching webrisk: {url}") params = [("uri", url), ("key", config.webrisk_key), ("threatTypes", "MALWARE"), ("threatTypes", "SOCIAL_ENGINEERING")] async with session.get( "https://webrisk.googleapis.com/v1beta1/uris:search", params=params) as resp: json_content = await resp.json() if json_content == {}: await insert_blank_webrisk(pool, url) else: parsed_time = datetime.datetime.strptime( json_content["threat"]["expireTime"][:-4], "%Y-%m-%dT%H:%M:%S.%f") if json_content["threat"]["threatTypes"] == ["SOCIAL_ENGINEERING"]: await insert_webrisk(pool=pool, url=url, social_engineering=True, expire_time=parsed_time) elif json_content["threat"]["threatTypes"] == ["MALWARE"]: await insert_webrisk(pool=pool, url=url, malware=True, expire_time=parsed_time) else: await insert_webrisk(pool=pool, url=url, social_engineering=True, malware=True, expire_time=parsed_time)
async def __fetch_and_feed(self, client: aiohttp.client.ClientSession, url, parser: htmlParser, client_num: int): # logger.debug(f"client - {client_num}, try to get url <{url}>") resp: aiohttp.client.ClientResponse while True: async with client.get(url) as resp: if resp.status < 200 or resp.status >= 300: logger.warning(f"server response - {resp.status}") break try: html_data = await resp.text() except UnicodeDecodeError as err: logger.warning(err) break # print(html_data) parser.feed(html_data) m_v = html_url_regex_3.match(url) if m_v is None or m_v.group(4) == "": logger.warning(f"Processing url <{url}> cause an exception, url isn't correspond with content.") break PageId = int(m_v.group(4)) if parser.HasContent and (parser.BookName != "" and parser.BookName is not None): AppendContext(parser.BookName, PageId, parser.Content) break await self.__client_list.ReleaseClient(client_num) return
async def do_rating(session: aiohttp.client.ClientSession, reviewer: str, length: int): global index async with session.get(reviewer, headers=HEADERS_GET) as response: html = await response.text() reviews = [ str(i) for i in BeautifulSoup(html, 'html.parser').find_all( attrs={'class': 'review_stats'}) ] for review in reviews: grade = int( BeautifulSoup(review, 'html.parser').find(attrs={ 'class': 'review_score' }).text) game_name = BeautifulSoup(review, 'html.parser').find(attrs={ 'class': 'product_title' }).text if game_name not in games: games.insert(0, game_name) if grade >= 9: if prog_scores.get(game_name): prog_scores[game_name] = prog_scores.get(game_name) + 1 else: prog_scores[game_name] = 1 elif grade <= 2: if prog_scores.get(game_name): prog_scores[game_name] = prog_scores.get(game_name) - 1 else: prog_scores[game_name] = -1 print(str(index + 1) + "/" + str(length)) index += 1
async def update_internal_hsts(session: aiohttp.client.ClientSession, url, pool: asyncpg.pool.Pool): log.info("Updating HSTS status for: {}".format(url)) async with session.get("https://hstspreload.org/api/v2/status", params={"domain": url}) as resp: json_data = await resp.json() status = json_data.get("status") await update_db_hsts(pool, url, status) return status
async def fetch( url: str, session: aiohttp.client.ClientSession) -> Tuple[bool, Optional[bytes]]: async with session.get(url) as response: if response.status != 200: print('Bad response:', url, response.status, response.reason) return True, None else: return False, await response.read()
async def get_link_id(parsed: ParseResult, session: aiohttp.client.ClientSession, url: str): async with session.get(f"https://publisher.linkvertise.com/api/v1/redirect/link/static/{url}", headers=headers(), allow_redirects=False ) as resp: json_content: dict = await resp.json() if json_content["success"]: return json_content["data"]["link"]["id"], json_content["data"]["link"]["target_type"] raise Linkvertise(f"Got errors from {urlunparse(parsed)}: {', '.join(json_content['messages'])}")
async def update_internal_hsts(session: aiohttp.client.ClientSession, url, pool: asyncpg.pool.Pool, transaction: Hub.current.scope.transaction): with transaction.start_child(op="task", description="update_internal_hsts"): log.info("Updating HSTS status for: {}".format(url)) async with session.get("https://hstspreload.org/api/v2/status", params={"domain": url}) as resp: json_data = await resp.json() status = json_data.get("status") await update_db_hsts(pool, url, status) return status
async def DownloadImg_async(session: aiohttp.client.ClientSession, img_url: str, path: str, img_name: str): async with session.get(img_url) as img_response: #aiohttp下打开网页 await img_response.content.read(10) #aiohttp 的流读取 with open('%s/%s' % (path, img_name), 'wb') as f: #写入图片 while True: chunk = await img_response.content.read(128) #对aiohttp的流文件下载 if not chunk: break f.write(chunk)
async def manual_redirect_gatherer(url: str, session: aiohttp.client.ClientSession, db: asyncpg.pool.Pool): urls = [url] for internalUrl in urls: cached_data = await api.cached.cached(internalUrl, db) if cached_data: log.info(f"Using cached redirect data for {internalUrl}") cached_data = json.loads(cached_data) status, _, _, _, _, _, headers, _, _, _, _ = cached_data r_url = "" if status in (301, 302, 303, 307, 308): r_url = headers.get("Location") or headers.get( "location") or headers.get("uri") if r_url: if r_url.startswith("/"): parsed = urlparse(internalUrl) r_url = f"{parsed.scheme}://{parsed.netloc}/{r_url[1:]}" log.info( f"Found redirect for {internalUrl} to {r_url} due to status of {status}" ) if r_url not in urls and r_url != "": urls.append(r_url) continue try: async with session.get( internalUrl, headers={"User-Agent": get_random_user_agent()}, allow_redirects=False, timeout=10) as resp: r_url = "" if resp.status in (301, 302, 303, 307, 308): hdrs = resp.headers r_url = hdrs.get("Location") or hdrs.get( "Location") or hdrs.get("uri") if r_url: if r_url.startswith("/"): parsed = urlparse(internalUrl) r_url = f"{parsed.scheme}://{parsed.netloc}/{r_url[1:]}" log.info( f"Found redirect for {internalUrl} to {r_url} due to status of {resp.status}" ) if r_url not in urls and r_url != "": urls.append(r_url) except asyncio.exceptions.TimeoutError: log.warning(f"Timeout error on {internalUrl}") return urls
async def __fetch_results(self, query, number, language_code, user_agent=None, user: UserAsync = None, proxy=None, timeout=5.0, session: aiohttp.client.ClientSession = None, engine='google'): url = '' # preparation of request link if engine == 'bing': url = 'https://www.bing.com/search?q={}&count={}'.format( query, number) elif engine == 'google': url = 'https://www.google.com/search?q={}&num={}&hl={}'.format( query, number, language_code) elif engine == 'yahoo': url = 'https://search.yahoo.com/search?p={}&n={}&ei=UTF-8'.format( query, number) elif engine == 'youtube': url = 'https://www.youtube.com/results?search_query={}'.format( query) # get page with timeout (for imitation user activity) async with session.get(url, headers=user.agent, timeout=timeout, proxy=proxy) as response: # error checking if response.status != 200: response.raise_for_status() # get HTML code of page data = await response.text() # get cookies user.cookies = session.cookie_jar # user.cookies = session._cookie_jar._cookies # delay between requests await asyncio.sleep(timeout) # sleep(timeout) # return HTML code of page return data
async def platform_data(session: aiohttp.client.ClientSession, platform_url: str): async with session.get(platform_url, headers=HEADERS_GET) as response: req = await response.text() soup = BeautifulSoup(req, 'html.parser') sum_reviews = soup.find_all(attrs={'class': 'count'}) try: if "metascore_w user large game tbd" not in req: sum_reviews = sum_reviews[1] if len(str(sum_reviews).split('user-reviews">')) > 1: sum_reviews = str(sum_reviews).split( 'user-reviews">')[1].split("Ratings")[0] reviews.append((platform_url, sum_reviews)) except Exception as e: print(platform_url, e.with_traceback())
async def duckduckgo(parsed: ParseResult, session: aiohttp.client.ClientSession): if "q" in urllib.parse.parse_qs(parsed.query): ddg_query = urllib.parse.parse_qs(parsed.query).get("q")[0] if ddg_query.startswith("! site:"): async with session.get("https://api.duckduckgo.com/", params={ "q": ddg_query, "format": "json", "no_redirect": 1 }) as resp: json_content: dict = await resp.json( content_type="application/x-javascript") return json_content["Redirect"] else: log.warning(f"DDG: Unknown query string: {ddg_query}") return {"error": "Unknown query string"}
async def update_webrisk(url: str, session: aiohttp.client.ClientSession, pool: asyncpg.pool.Pool, transaction: Hub.current.scope.transaction): with transaction.start_child(op="task", description="Updating webrisk"): log.info(f"Fetching webrisk: {url}") params = [("uri", url), ("key", config.webrisk_key), ("threatTypes", "MALWARE"), ("threatTypes", "SOCIAL_ENGINEERING"), ("threatTypes", "UNWANTED_SOFTWARE")] async with session.get("https://webrisk.googleapis.com/v1/uris:search", params=params) as resp: json_content: dict = await resp.json() if json_content.get("error"): log.info("Error with Google API") log.error(json_content) if json_content == {}: await insert_blank_webrisk(pool, url) else: parsed_time = datetime.datetime.strptime( json_content["threat"]["expireTime"][:-4], "%Y-%m-%dT%H:%M:%S.%f") if json_content["threat"]["threatTypes"] == ["SOCIAL_ENGINEERING"]: await insert_webrisk(pool=pool, url=url, social_engineering=True, expire_time=parsed_time) elif json_content["threat"]["threatTypes"] == ["MALWARE"]: await insert_webrisk(pool=pool, url=url, malware=True, expire_time=parsed_time) else: await insert_webrisk(pool=pool, url=url, social_engineering=True, malware=True, expire_time=parsed_time)
async def fetch(session: aiohttp.client.ClientSession, url: str) -> tuple: """ Middleware for articles """ async def subfetch(url, subdomain_url): parsed = url_parsers.urlparse(url) replaced = parsed._replace(netloc=subdomain_url) url = replaced.geturl() # get acknowledged if server has required url res = await session.head(url) return res, url async with session.get(url) as response: if response.status != 200: for subdomain in configs.SUBDOMAINS: res, new_url = await subfetch(url, subdomain) if res.status != 404: return await fetch(session, new_url) else: continue content_type = response.content_type body = await response.read() return body, content_type
async def redirect_gatherer(url: str, session: aiohttp.client.ClientSession): async with session.get(url) as resp: history = [str(x.url) for x in resp.history] history.append(str(resp.url)) return history
async def fetch(session: aiohttp.client.ClientSession, uri: str) -> dict: """Helper function to keep session.get() calls clean.""" async with session.get(uri) as resp: return await resp.json()
async def fetch(session: aiohttp.client.ClientSession, url: str, **kwargs) -> dict: """From here: https://stackoverflow.com/questions/22190403/how-could-i-use-requests-in-asyncio/50312981#50312981""" async with session.get(url, **kwargs) as response: return await response.json()
async def fetch(session: aiohttp.client.ClientSession, url: str) -> str: async with session.get(url) as response: response.raise_for_status() return await response.text()
async def dark_sky(args: argparse.Namespace, client: aiohttp.client.ClientSession, location: geopy.location.Location) -> Any: url = 'https://api.forecast.io/forecast/{}/{},{}'.format(args.key, location.latitude, location.longitude) async with client.get(url) as response: assert response.status == 200 return await response.read(decode=True)
async def fetch_spell(*, eng_spell_name: str, session: aiohttp.client.ClientSession, debug: bool = False) \ -> typing.Optional[Spell]: searching_url = 'http://dungeon.su/spells/' # searching_url = f'http://dungeon.su/spells/{eng_spell_name}' async with session.get( searching_url, headers={ 'Accept': 'text/html,application/xhtml+xml,application/xml', 'Content-Type': 'text/html' }, params={'search': eng_spell_name}) as response: if debug: print(f'Fetching spell "{eng_spell_name}"') print(response.url) response_binary = await response.read() html = BeautifulSoup(response_binary.decode('utf-8'), 'html.parser') articles = html.find_all( name='div', attrs={'itemtype': "https://schema.org/Article" }) # type: typing.List[BeautifulSoup.element.Tag] if len(articles) == 0: print(f'No spells with name "{eng_spell_name}" found') return elif len(articles) > 1: names = [] for article in articles: name_tag = article.find('a', attrs={ 'class': 'item-link', 'itemprop': 'url' }) if '(' in name_tag.text: eng_name = name_tag.text.split('(')[1].strip().replace( ')', '') else: eng_name = name_tag.text names.append(eng_name) print( f'{len(articles)} spells with name "{eng_spell_name}" found: {names}\nPlease refine your search' ) return None spell_attributes_dict = { 'level': SpellAttribute(ru_name='уровень', ru_value=-1, en_name='level', en_value=-1), 'school': SpellAttribute(ru_name='школа', ru_value='нет', en_name='school', en_value='na'), 'cast_time': SpellAttribute(ru_name='время накладывания', ru_value='нет', en_name='cast_time', en_value='na'), 'duration': SpellAttribute(ru_name='длительность', ru_value='na', en_name='duration', en_value='na'), 'range': SpellAttribute(ru_name='дистанция', ru_value='na', en_name='range', en_value='na'), 'components': SpellAttribute(ru_name='компоненты', ru_value=[], en_name='components', en_value=[]), 'classes': SpellAttribute(ru_name='классы', ru_value=[], en_name='classes', en_value=[]), 'source': SpellAttribute(ru_name='источник', ru_value='na', en_name='source', en_value='na'), 'higher_levels': SpellAttribute(ru_name='на больших уровнях', ru_value='', en_name='higher levels', en_value='na'), 'name': SpellAttribute(ru_name='имя', ru_value=eng_spell_name, en_name='name', en_value=''), 'description': SpellAttribute(ru_name='описание', ru_value='Нет описания', en_name='description', en_value='No description') } article = articles[0] # type: BeautifulSoup.element.Tag name_tag = article.find('a', attrs={ 'class': 'item-link', 'itemprop': 'url' }) if not name_tag: print(f'Name tag not found for {eng_spell_name}') return spell_attributes_dict['name'] = SpellAttribute( ru_name='имя', ru_value=name_tag.text.split('(')[0].strip(), en_name='name', en_value=name_tag.text.split('(')[1].strip().replace(')', '')) article_body = article.find(name='div', attrs={ "class": "card-body", "itemprop": "articleBody" }) # type: BeautifulSoup.element.Tag if not article_body: print(f'Cannot find any spell on html page') return None for attribute_tag in article_body('ul')[0]( 'li'): # iterate over each <li> tag if attribute_tag.find(name='div', attrs={'itemprop': 'description'}): description_tag = attribute_tag.find( name='div', attrs={'itemprop': 'description'}) if "На больших уровнях:" in description_tag.text: ru_desc = description_tag.text.split( 'На больших уровнях:')[0].strip() ru_higher_levels = description_tag.text.split( 'На больших уровнях:')[1].strip() spell_attributes_dict['description'] = SpellAttribute( ru_name='описание', ru_value=ru_desc, en_name='description', en_value='') spell_attributes_dict['higher_levels'] = SpellAttribute( ru_name='на больших уровнях', ru_value=ru_higher_levels, en_name='higher levels', en_value='') else: spell_attributes_dict['description'] = SpellAttribute( ru_name='описание', ru_value=description_tag.text, en_name='description', en_value='') else: ru_name = attribute_tag('strong')[0].text.replace(':', '') if ru_name.lower() not in attributes_translations_dict: continue ru_value = attribute_tag.text.replace(f'{ru_name}:', '').strip().replace( '«', '').replace('»', '') en_name = attributes_translations_dict[ru_name.lower()] spell_attributes_dict[attributes_translations_dict[ ru_name.lower()]] = SpellAttribute(ru_name=ru_name, ru_value=ru_value, en_name=en_name, en_value='') return Spell( name=spell_attributes_dict['name'], level=spell_attributes_dict['level'], cast_time=spell_attributes_dict['cast_time'], classes=spell_attributes_dict['classes'], components=spell_attributes_dict['components'], duration=spell_attributes_dict['duration'], higher_levels=spell_attributes_dict['higher_levels'], range=spell_attributes_dict['range'], school=spell_attributes_dict['school'], source=spell_attributes_dict['source'], description=spell_attributes_dict['description'], )