def extend_webapp(session: requests.sessions.Session) -> None: r = session.get( "https://www.pythonanywhere.com/user/{}/webapps/".format(LOGIN)) csrfmiddlewaretoken = csrfgetter(r.text).csrfToken session.post( WEB_APP_URL, headers={ "Referer": "https://www.pythonanywhere.com/user/{}/webapps/".format(LOGIN) }, data={"csrfmiddlewaretoken": csrfmiddlewaretoken})
def get_episodes(self, url: str, sess: requests.sessions.Session) -> Optional[list[str]]: """ Get season's episodes """ logger.debug("Getting season's episodes") with sess.get(url) as resp: logger.debug("Getting url content") data = resp.content logger.debug("Creating beautiful soup parser") soup = BeautifulSoup(data, "html.parser") logger.debug("Using beautiful soup object to find pages elements") pages_element = soup.find("div", {"class": "pagination"}) pages = [] if pages_element: for child in pages_element.findChildren("a"): child = child.get('href') pages.append(child) else: logger.warning("Could not find pages element") episode_links = [] logger.debug("Using beautiful soup object to find elements matching regex") text_pattern = re.compile(fr'^Episode 0?({self.episodes_regex})') elements = soup.find_all("a", text=text_pattern) if elements: for element in elements: logger.debug("Getting href from beautiful soup element") episode_links.append(element.get('href')) if pages: logger.debug("Getting links from the other pages") for page in pages: with sess.get(page) as resp: logger.debug("Getting url content") data = resp.content logger.debug("Creating beautiful soup parser") soup = BeautifulSoup(data, "html.parser") logger.debug("Using beautiful soup object to find elements matching regex") text_pattern = re.compile(fr'^Episode 0?({self.episodes_regex})') elements = soup.find_all("a", text=text_pattern) if elements: for element in elements: logger.debug("Getting href from beautiful soup element") episode_links.append(element.get('href')) if episode_links: return episode_links logger.warning("No episode links found")
def email_and_password_auth(self, session: requests.sessions.Session, email: str, password: str) -> requests.models.Response: session.get(f"{base}/id/api/csrf") res = session.post( f"{base}/id/api/login", headers={"x-xsrf-token": session.cookies.get("XSRF-TOKEN")}, data={ "email": email, "password": password, "rememberMe": False, "captcha": "" }, cookies=session.cookies) return res
def check_Login(session:requests.sessions.Session): html=session.get("http://passport.shangxueba.com").text pattern = re.compile("<p class=\"persPcConRiOneP2\">\s+\S+,欢迎您!</p>",re.S) if re.search(pattern, html): return True else: return False
def _download_image(self, session: requests.sessions.Session, comic_url: str, filename: str) -> None: """ Download the image file. Args: session (class 'requests.sessions.Session'): the Session object. comic_url (str): String containing the image url. filename (str): String of the filename to save the image to. Returns: None """ # print(f'Downloading page http://xkcd.com/{url_number}...') response = session.get(comic_url) if response.status_code != 200: # At present two comics - 1608 and 1668 don't have an image - 403 # and 404 returns 404. # Is there a better way to handle this, in case there are redirects etc? return None with open(os.path.join('xkcd', filename), 'xb') as image_file: if not self.run_mode: print(f'Downloading image {comic_url}...') for chunk in response.iter_content(100000): image_file.write(chunk)
def get_answer(session:requests.sessions.Session,queston_id:int): answer_url="http://www.shangxueba.com/ask/ajax/zuijiainfo.aspx?id={queston_id}".format(queston_id=queston_id) html = session.get(answer_url).text pattern = re.compile("<div class=\"xj_contextinfo\">\n<h6>\n(.*?)\n</h6>\n</div>", re.S) res = re.search(pattern, html) answer = html2text.html2text(res.group(1)) return answer
def get_review_texts_by_url(relative_reviews_urls: list, session: requests.sessions.Session, root_url: str) -> List[Tuple[str, str]]: """ :param relative_reviews_urls: Список относительных URL относительно root_url. :param session: Сессия :param root_url: префикс любого URL отзыва на некоторую книгу :return: Список, состоящий из пар (абсолютный URL, текст книги) """ texts = [] for relative_url in tqdm(relative_reviews_urls): review_url = f"{root_url}/{relative_url}" response = session.get(review_url, headers={"User-Agent": "Mozilla/5.0"}) response.encoding = "utf-8" response = response.text response = BeautifulSoup(response, "html.parser") text = response.find("div", {"class": "universal-blocks-content"}).text text = re.sub(f"[\t ]+", " ", text) texts.append((review_url, text)) return texts
def parse_stories(session: requests.sessions.Session, num_of_stories: int): tags_counter = Counter() id_stories = set() i = 1 paramload = {'page': str(i)} complete = False while not complete: request = session.get(HOME, params=paramload) print('request for new page', request.status_code) soup = BeautifulSoup(request.text, 'lxml') stories = soup.find_all('article', class_='story') for i, story in enumerate(stories): if story['data-story-id'] not in id_stories: id_stories.add(story['data-story-id']) print('story id', story['data-story-id']) try: tags_str = story.find('div', class_='story__tags tags').text[1:-1] print(tags_str) for tag in tags_str.split(' '): tags_counter[tag] += 1 except AttributeError: print('story without tags') if len(id_stories) == num_of_stories: complete = True break print(len(id_stories)) i += 1 paramload['page'] = str(i) return tags_counter
def get_episode_quality_link(self, url: str, sess: requests.sessions.Session) -> Optional[str]: """ Get episode's quality download link """ logger.debug("Getting episode's quality download link") with sess.get(url) as resp: logger.debug("Getting url content") data = resp.content logger.debug("Creating beautiful soup parser") soup = BeautifulSoup(data, "html.parser") dl_pattern = re.compile( r'^Click to Download Episode \d{1,6}(.+)? in HD Mp4 Format$', re.IGNORECASE ) dl_pattern2 = re.compile( r'^Click to Download Episode \d{1,6}(.+)? in Mp4 Format$', re.IGNORECASE ) logger.debug("Using beautiful soup object to find elements matching dl_pattern regex") element = soup.find("a", text=dl_pattern) if not element: element = soup.find("a", text=dl_pattern2) if element: logger.debug("Getting href from beautiful soup element") return element.get('href') logger.warning("No episode quality links found")
def get_special_offer(self, session: requests.sessions.Session, email: str, password: str, user_agent: str, language: str) -> (str, None): launcher_access_token = AuthUtil.authenticate(self, session, email, password, user_agent) data = io.StringIO( session.get( "https://fortnite-public-service-prod11.ol.epicgames.com/fortnite/api/cloudstorage/system/a22d837b6a2b46349421259c0a5411bf", headers={ "Authorization": f"Bearer {launcher_access_token}" }).text).readlines() for text in data: if ('Key="AC1E7A1349AB80D63BFF31A642006C54"' in text) or ('NativeString="Special Featured"' in text): text = text break match = re.search(r'LocalizedStrings=.+', text) if match is not None: match = eval( match.group(0).replace("LocalizedStrings=", "", 1).replace(")", "", 1), globals()) for i in match: if i[0] == language: match = i[1] log.info(f"Special Offer: {match}") return match else: log.info(f"Special Offer: None") return None
def get_question(session:requests.sessions.Session,queston_id:int): question_url = "https://www.shangxueba.com/ask/{queston_id}.html".format(queston_id=queston_id) html = session.get(question_url).text pattern = re.compile("<div class=\"s_mess2_m\">(.*?)</div>", re.S) res = re.search(pattern, html) question = html2text.html2text(res.group(1)) return question
def GetImgUrlsFromSetUrl(session:requests.sessions.Session,imgset_url:str): #参数:已有session, 图集的url:img_set_url,图集的名称: imgset_name #返回 img_urls list img_urls=[] #初始化图片的url的list imgset_response = session.get(imgset_url) # 得到一个图片集的response imgset_html = imgset_response.text # 得到图片集的html imgset_soup = BeautifulSoup(imgset_html, 'lxml') # 用BeautifulSoup处理图片集html img_tags = imgset_soup.find_all('img') #得到所有的Img tags # 一个图片在一个img tag里的src #创建图片的url list for img_tag in img_tags: img_url = img_tag['src'] # 得到单个图片url img_urls.append(img_url) #将这个图片的url加入list # imgset_name = imgset_url.split('/')[-1] # #暂时的图集名字 return img_urls
def GetSetUrlsFromTypeUrl(session:requests.sessions.Session,type_url:str,source_url:str): # imgset_urls_dic = {} # 初始化图集信息的dic type_response = session.get(type_url) # 进入类型网页 type_html = type_response.text # 得到类型的网页html type_soup = BeautifulSoup(type_html, 'lxml') # 用Beautifulsoup解析 imgset_url_tag_list = type_soup.find('ul', {'class': 'textList'}).find_all('a') # 得到图集的url-list # 从中选择textLIst为url-list所在,其中a tag 装了href for a_tag in imgset_url_tag_list: imgset_name=a_tag.get_text()[5:] #从atag解析出去了日期的名字部分 imgset_url = source_url + a_tag['href'] #得到url imgset_urls_dic[imgset_name]=imgset_url #放入图集信息dic #key: imgsetname value url return imgset_urls_dic
def put_piece( session: requests.sessions.Session, server_address: str, piece_number: int, dice: int, ) -> Dict: req = session.get(f"{server_address}/play/out/{piece_number}/{dice}") return req.json()
def getPage( url: str, timeout: float = 5, session: requests.sessions.Session = makeSession()) -> str: try: return session.get(url, timeout=timeout).text except requests.exceptions.ConnectionError: return None
def get_film_data(s: requests.sessions.Session, film_id: str, delay: float = 0, proxy: Dict = None) -> Dict: film_data = {} film_link = f'https://www.kinopoisk.ru/film/{film_id}/' page = s.get(film_link, proxies=proxy) tree = html.fromstring(page.text) title = tree.xpath("//span[@class='styles_title__2l0HH']")[0].text # info = tree.xpath('//*[@id="__next"]/div/div[2]/div[1]/div[2]/div/div[3]/div/div/div[2]/div[1]/div')[0] info = tree.xpath('.//h3[text()="О фильме"]')[0].find('..')[1] release_date = info[0][1][0].text country = info[1][1][0].text box_office = info[13][1][0].text buf = box_office.find('=') box_office = ''.join(box_office[buf + 3:].split()) film_data['title'] = title film_data['release_date'] = release_date film_data['country'] = country film_data['box_office'] = box_office time.sleep(delay) logging.info(f'Информация о фильме {title} загружена') actors_link = film_link + 'cast/' page = s.get(actors_link, proxies=proxy) tree = html.fromstring(page.text) actors = [] z = tree.xpath('//*[@id="block_left"]/div')[0] current_type = '' for i in range(len(z)): name = z[i].attrib.get('name', None) if name is not None: current_type = name cls = z[i].attrib.get('class', None) if cls is not None and 'dub' in cls: fio = z[i].find_class('name')[0][0].text actors.append([fio, current_type]) film_data['actors'] = actors logging.info(f'Информация об актерах фильма {title} загружена') return film_data
def login(session: requests.sessions.Session): url="https://passport.shangxueba.com/user/userlogin.aspx?url=https%3A//www.shangxueba.com/" code_url="https://passport.shangxueba.com/VerifyCode.aspx" page=session.get(url) soup=BeautifulSoup(page.text,'lxml') #登录账号 flag=False max_login_time=15 while max_login_time > 0 and not flag: code_image=session.get(code_url).content with open("code.jpeg","wb") as f: f.write(code_image) code = get_code("./code.jpeg") headers={ "User-Agent":"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36", "Referer":"https://passport.shangxueba.com/user/userlogin.aspx?url=https%3A//www.shangxueba.com/" } data={ "__EVENTTARGET":"", "__EVENTARGUMENT":"", "__VIEWSTATE":soup.find('input', id='__VIEWSTATE')["value"], "__EVENTVALIDATION":soup.find('input', id='__EVENTVALIDATION')["value"], "txtName": "username", "txtPassword": "******", "txtVerifycode":code, "hidcode":"", "hidflag":"1", "Button1":"" } req=session.post(url,headers=headers,data=data) if "欢迎您回来" in req.text: flag=True break if "验证码错误" in req.text: max_login_time -= 1 continue else: max_login_time -= 1 continue if flag == False: return False else: save_cookie(session) return session
def extend_task(session: requests.sessions.Session) -> None: r = session.get( "https://www.pythonanywhere.com/user/{}/tasks_tab/".format(LOGIN)) CSRFToken = csrfgetter(r.text).csrfToken r = session.post( TASK_URL, headers={ "Referer": "https://www.pythonanywhere.com/user/{}/tasks_tab/".format(LOGIN), "X-CSRFToken": CSRFToken }) if r.headers.get("Content-Type") == "application/json": r = r.json() if r.get("status") != "success": raise Exception( "[Update task] status != success. Response json: {}".format(r)) else: raise Exception("[Update task] Server returns non json")
def generate_device_auth(self, session: requests.sessions.Session, client_id: str, launcher_access_token: str, user_agent: str) -> requests.models.Response: res = session.post( f"https://account-public-service-prod.ol.epicgames.com/account/api/public/account/{client_id}/deviceAuth", headers={ "Authorization": f"Bearer {launcher_access_token}", "User-Agent": user_agent }) return res
def _get_comic_json(self, session: requests.sessions.Session, comic_number: int) -> Optional[Dict]: """ """ response = session.get('https://xkcd.com/' + str(comic_number) + '/info.0.json') if response.status_code != 200: # handle return None return response.json()
def DownloadImg(session: requests.sessions.Session, img_url: str, path: str, img_name: str): #在此处理响应。 img_response = session.get(img_url, stream=True) #用已有session,得到图片,stream with open('%s/%s' % (path, img_name), 'wb') as f: for chunk in img_response.iter_content(chunk_size=128): f.write(chunk) # 写入图片 return True
def exchange_code_auth(self, session: requests.sessions.Session, email: str) -> requests.models.Response: res = session.post( "https://account-public-service-prod.ol.epicgames.com/account/api/oauth/token", headers={"Authorization": f"basic {launcher_token}"}, data={ "grant_type": "exchange_code", "exchange_code": input(f"Enter exchange code for {email}: "), "token_type": "eg1" }) return res
def device_auth(self, session: requests.sessions.Session, deviceId: str, accountId: str, secret: str) -> requests.models.Response: res = session.post( "https://account-public-service-prod.ol.epicgames.com/account/api/oauth/token", headers={"Authorization": f"basic {ios_token}"}, data={ "grant_type": "device_auth", "device_id": deviceId, "account_id": accountId, "secret": secret, "token_type": "eg1" }) return res
def session_post( session: requests.sessions.Session, endpoint: str, data: Dict[str, Any] ) -> dict: """ Posts JSON `data` to `endpoint` using the `session`. Handles errors and returns a json response dict. """ endpoint = get_full_endpoint_url(endpoint) response = session.post( endpoint, json=data, cert=REQUESTS_CERT, headers=REQUESTS_HEADERS ) json_response = response.json() handle_code_msg(json_response) return json_response
def get_wiki_response( url: str, session: requests.sessions.Session = requests.Session() ) -> requests.Response: """ Get a wiki response for a URL in a (relatively) safe manner (for bots) """ try: response = session.get(url) while (response.status_code == 503 or response.status_code == 200) and "Retry-After" in response.headers: timeout = response.headers["Retry-After"] logging.info(f"Sleeping for {timeout}") time.sleep(response.headers["Retry-After"]) response = session.get(url) time.sleep(1) response.raise_for_status() return response except requests.ConnectionError as e: logging.exception(e) if e.response: logging.error(e.response.text()) time.sleep(10) return get_wiki_response(url, session=session)
def download_last_payslip(session: requests.sessions.Session, eurecia_host: str, payslip_name: str): print("Download last payslip using API") eurecia_host = config["eurecia_host"] baseurl = f"https://{eurecia_host}/eurecia/api/v1/payslip" response = session.get(baseurl) if response.ok: payslip_list = response.json() else: print(response.content) raise ValueError(response.status_code) last_payslip_url = ( f"https://{eurecia_host}/" + payslip_list["2020"][0]["files"][0]["urlContent"] ) filename = payslip_name + payslip_list["2020"][0]["description"] filename = filename.replace(" ", "-") response = session.get(last_payslip_url) if response.status_code == 200: with open(f"{filename}.pdf", "wb") as f: f.write(response.content) print("OK")
def update_environment_build_status( status: str, session: requests.sessions.Session, environment_build_uuid, ) -> Any: """Update environment build status.""" data = {"status": status} if data["status"] == "STARTED": data["started_time"] = datetime.utcnow().isoformat() elif data["status"] in ["SUCCESS", "FAILURE"]: data["finished_time"] = datetime.utcnow().isoformat() url = f"{CONFIG_CLASS.ORCHEST_API_ADDRESS}/environment-builds/{environment_build_uuid}" with session.put(url, json=data) as response: return response.json()
def update_jupyter_image_build_status( status: str, session: requests.sessions.Session, jupyter_image_build_uuid, ) -> Any: """Update Jupyter build status.""" data = {"status": status} if data["status"] == "STARTED": data["started_time"] = datetime.utcnow().isoformat() elif data["status"] in ["SUCCESS", "FAILURE"]: data["finished_time"] = datetime.utcnow().isoformat() url = (f"{CONFIG_CLASS.ORCHEST_API_ADDRESS}/jupyter-builds/" f"{jupyter_image_build_uuid}") with session.put(url, json=data) as response: return response.json()
def submit_snapshot_request(session: requests.sessions.Session, url: str, headers: dict) -> requests.models.Response: save_url = url_for_savepage(url) sub_headers = headers.copy() sub_headers.update({"Referer": SAVE_ENDPOINT}) resp = session.post(save_url, headers=sub_headers, data={ "url": url, "capture_all": "on" }) # todo: error out on status code != 200 if resp.status_code != 200: raise ServerStatusError( f"""Server status was NOT OK; returned {resp.status_code} for: {save_url}""" ) else: return resp
def validate_access(s: requests.sessions.Session) -> bool: base_page = s.get( 'http://desarrollo.lda/CheckingPRO/dashboard/view.run?category=requests' ) try: soup = BeautifulSoup(base_page.content, 'html.parser') if HOME_TITLE == soup.title.get_text(): print("Acceso verificado.") return True else: print("No se ha logrado acceder.") except Exception as e: print("error de acceso", e) return False