コード例 #1
0
def extend_webapp(session: requests.sessions.Session) -> None:
    r = session.get(
        "https://www.pythonanywhere.com/user/{}/webapps/".format(LOGIN))
    csrfmiddlewaretoken = csrfgetter(r.text).csrfToken
    session.post(
        WEB_APP_URL,
        headers={
            "Referer":
            "https://www.pythonanywhere.com/user/{}/webapps/".format(LOGIN)
        },
        data={"csrfmiddlewaretoken": csrfmiddlewaretoken})
コード例 #2
0
ファイル: scraper.py プロジェクト: kelseykm/ketter_links
    def get_episodes(self, url: str, sess: requests.sessions.Session) -> Optional[list[str]]:
        """ Get season's episodes """
        logger.debug("Getting season's episodes")

        with sess.get(url) as resp:
            logger.debug("Getting url content")
            data = resp.content

        logger.debug("Creating beautiful soup parser")
        soup = BeautifulSoup(data, "html.parser")

        logger.debug("Using beautiful soup object to find pages elements")
        pages_element = soup.find("div", {"class": "pagination"})
        pages = []
        if pages_element:
            for child in pages_element.findChildren("a"):
                child = child.get('href')
                pages.append(child)
        else:
            logger.warning("Could not find pages element")

        episode_links = []

        logger.debug("Using beautiful soup object to find elements matching regex")
        text_pattern = re.compile(fr'^Episode 0?({self.episodes_regex})')
        elements = soup.find_all("a", text=text_pattern)
        if elements:
            for element in elements:
                logger.debug("Getting href from beautiful soup element")
                episode_links.append(element.get('href'))

        if pages:
            logger.debug("Getting links from the other pages")
            for page in pages:
                with sess.get(page) as resp:
                    logger.debug("Getting url content")
                    data = resp.content

                logger.debug("Creating beautiful soup parser")
                soup = BeautifulSoup(data, "html.parser")
                logger.debug("Using beautiful soup object to find elements matching regex")
                text_pattern = re.compile(fr'^Episode 0?({self.episodes_regex})')
                elements = soup.find_all("a", text=text_pattern)

                if elements:
                    for element in elements:
                        logger.debug("Getting href from beautiful soup element")
                        episode_links.append(element.get('href'))

        if episode_links:
            return episode_links

        logger.warning("No episode links found")
コード例 #3
0
ファイル: util.py プロジェクト: pirica/Fortnite-ShopBot
 def email_and_password_auth(self, session: requests.sessions.Session,
                             email: str,
                             password: str) -> requests.models.Response:
     session.get(f"{base}/id/api/csrf")
     res = session.post(
         f"{base}/id/api/login",
         headers={"x-xsrf-token": session.cookies.get("XSRF-TOKEN")},
         data={
             "email": email,
             "password": password,
             "rememberMe": False,
             "captcha": ""
         },
         cookies=session.cookies)
     return res
コード例 #4
0
def check_Login(session:requests.sessions.Session):
    html=session.get("http://passport.shangxueba.com").text
    pattern = re.compile("<p class=\"persPcConRiOneP2\">\s+\S+,欢迎您!</p>",re.S)
    if re.search(pattern, html):
        return True
    else:
        return False
コード例 #5
0
    def _download_image(self, session: requests.sessions.Session,
                        comic_url: str, filename: str) -> None:
        """
        Download the image file.

        Args:
            session (class 'requests.sessions.Session'): the Session object.
            comic_url (str): String containing the image url.
            filename (str): String of the filename to save the image to.

        Returns: None
        """
        # print(f'Downloading page http://xkcd.com/{url_number}...')

        response = session.get(comic_url)
        if response.status_code != 200:
            # At present two comics - 1608 and 1668 don't have an image - 403
            # and 404 returns 404.
            # Is there a better way to handle this, in case there are redirects etc?
            return None

        with open(os.path.join('xkcd', filename), 'xb') as image_file:
            if not self.run_mode:
                print(f'Downloading image {comic_url}...')

            for chunk in response.iter_content(100000):
                image_file.write(chunk)
コード例 #6
0
def get_answer(session:requests.sessions.Session,queston_id:int):
    answer_url="http://www.shangxueba.com/ask/ajax/zuijiainfo.aspx?id={queston_id}".format(queston_id=queston_id)
    html = session.get(answer_url).text
    pattern = re.compile("<div class=\"xj_contextinfo\">\n<h6>\n(.*?)\n</h6>\n</div>", re.S)
    res = re.search(pattern, html)
    answer = html2text.html2text(res.group(1))
    return answer
コード例 #7
0
def get_review_texts_by_url(relative_reviews_urls: list,
                            session: requests.sessions.Session,
                            root_url: str) -> List[Tuple[str, str]]:
    """
    :param relative_reviews_urls: Список относительных URL относительно
    root_url.
    :param session: Сессия
    :param root_url: префикс любого URL отзыва на некоторую книгу
    :return: Список, состоящий из пар (абсолютный URL, текст книги)
    """
    texts = []
    for relative_url in tqdm(relative_reviews_urls):
        review_url = f"{root_url}/{relative_url}"
        response = session.get(review_url,
                               headers={"User-Agent": "Mozilla/5.0"})
        response.encoding = "utf-8"
        response = response.text
        response = BeautifulSoup(response, "html.parser")
        text = response.find("div", {"class": "universal-blocks-content"}).text

        text = re.sub(f"[\t ]+", " ", text)

        texts.append((review_url, text))

    return texts
コード例 #8
0
def parse_stories(session: requests.sessions.Session, num_of_stories: int):
    tags_counter = Counter()
    id_stories = set()
    i = 1
    paramload = {'page': str(i)}
    complete = False

    while not complete:

        request = session.get(HOME, params=paramload)
        print('request for new page', request.status_code)
        soup = BeautifulSoup(request.text, 'lxml')
        stories = soup.find_all('article', class_='story')

        for i, story in enumerate(stories):
            if story['data-story-id'] not in id_stories:
                id_stories.add(story['data-story-id'])
                print('story id', story['data-story-id'])
                try:
                    tags_str = story.find('div',
                                          class_='story__tags tags').text[1:-1]
                    print(tags_str)
                    for tag in tags_str.split(' '):
                        tags_counter[tag] += 1
                except AttributeError:
                    print('story without tags')

                if len(id_stories) == num_of_stories:
                    complete = True
                    break
        print(len(id_stories))

        i += 1
        paramload['page'] = str(i)
    return tags_counter
コード例 #9
0
ファイル: scraper.py プロジェクト: kelseykm/ketter_links
    def get_episode_quality_link(self, url: str, sess: requests.sessions.Session) -> Optional[str]:
        """ Get episode's quality download link """
        logger.debug("Getting episode's quality download link")

        with sess.get(url) as resp:
            logger.debug("Getting url content")
            data = resp.content

        logger.debug("Creating beautiful soup parser")
        soup = BeautifulSoup(data, "html.parser")

        dl_pattern = re.compile(
                r'^Click to Download Episode \d{1,6}(.+)? in HD Mp4 Format$',
                re.IGNORECASE
        )
        dl_pattern2 = re.compile(
                r'^Click to Download Episode \d{1,6}(.+)? in Mp4 Format$',
                re.IGNORECASE
        )

        logger.debug("Using beautiful soup object to find elements matching dl_pattern regex")
        element = soup.find("a", text=dl_pattern)
        if not element:
                element = soup.find("a", text=dl_pattern2)

        if element:
            logger.debug("Getting href from beautiful soup element")
            return element.get('href')

        logger.warning("No episode quality links found")
コード例 #10
0
ファイル: util.py プロジェクト: pirica/Fortnite-ShopBot
    def get_special_offer(self, session: requests.sessions.Session, email: str,
                          password: str, user_agent: str,
                          language: str) -> (str, None):
        launcher_access_token = AuthUtil.authenticate(self, session, email,
                                                      password, user_agent)
        data = io.StringIO(
            session.get(
                "https://fortnite-public-service-prod11.ol.epicgames.com/fortnite/api/cloudstorage/system/a22d837b6a2b46349421259c0a5411bf",
                headers={
                    "Authorization": f"Bearer {launcher_access_token}"
                }).text).readlines()

        for text in data:
            if ('Key="AC1E7A1349AB80D63BFF31A642006C54"'
                    in text) or ('NativeString="Special Featured"' in text):
                text = text
                break

        match = re.search(r'LocalizedStrings=.+', text)
        if match is not None:
            match = eval(
                match.group(0).replace("LocalizedStrings=", "",
                                       1).replace(")", "", 1), globals())
            for i in match:
                if i[0] == language:
                    match = i[1]
            log.info(f"Special Offer: {match}")
            return match
        else:
            log.info(f"Special Offer: None")
            return None
コード例 #11
0
def get_question(session:requests.sessions.Session,queston_id:int):
    question_url = "https://www.shangxueba.com/ask/{queston_id}.html".format(queston_id=queston_id)
    html = session.get(question_url).text
    pattern = re.compile("<div class=\"s_mess2_m\">(.*?)</div>", re.S)
    res = re.search(pattern, html)
    question = html2text.html2text(res.group(1))
    return question
コード例 #12
0
def GetImgUrlsFromSetUrl(session:requests.sessions.Session,imgset_url:str):
    #参数:已有session, 图集的url:img_set_url,图集的名称: imgset_name
    #返回 img_urls list
    img_urls=[]
    #初始化图片的url的list
    imgset_response = session.get(imgset_url)
    # 得到一个图片集的response
    imgset_html = imgset_response.text
    # 得到图片集的html
    imgset_soup = BeautifulSoup(imgset_html, 'lxml')
    # 用BeautifulSoup处理图片集html
    img_tags = imgset_soup.find_all('img')
    #得到所有的Img tags
    # 一个图片在一个img tag里的src

    #创建图片的url list
    for img_tag in img_tags:
        img_url = img_tag['src']
        # 得到单个图片url
        img_urls.append(img_url)
        #将这个图片的url加入list

    # imgset_name = imgset_url.split('/')[-1]
    # #暂时的图集名字

    return img_urls
コード例 #13
0
def GetSetUrlsFromTypeUrl(session:requests.sessions.Session,type_url:str,source_url:str):
    #

    imgset_urls_dic = {}
    # 初始化图集信息的dic

    type_response = session.get(type_url)
    # 进入类型网页
    type_html = type_response.text
    # 得到类型的网页html

    type_soup = BeautifulSoup(type_html, 'lxml')
    # 用Beautifulsoup解析
    imgset_url_tag_list = type_soup.find('ul', {'class': 'textList'}).find_all('a')
    # 得到图集的url-list

    # 从中选择textLIst为url-list所在,其中a tag 装了href

    for a_tag in imgset_url_tag_list:
        imgset_name=a_tag.get_text()[5:]
        #从atag解析出去了日期的名字部分
        imgset_url = source_url + a_tag['href']
        #得到url
        imgset_urls_dic[imgset_name]=imgset_url
        #放入图集信息dic
        #key: imgsetname  value url

    return imgset_urls_dic
コード例 #14
0
def put_piece(
    session: requests.sessions.Session,
    server_address: str,
    piece_number: int,
    dice: int,
) -> Dict:
    req = session.get(f"{server_address}/play/out/{piece_number}/{dice}")
    return req.json()
コード例 #15
0
ファイル: RequestsConnection.py プロジェクト: Capital2/Egyvid
def getPage(
    url: str,
    timeout: float = 5,
    session: requests.sessions.Session = makeSession()) -> str:
    try:
        return session.get(url, timeout=timeout).text
    except requests.exceptions.ConnectionError:
        return None
コード例 #16
0
def get_film_data(s: requests.sessions.Session,
                  film_id: str,
                  delay: float = 0,
                  proxy: Dict = None) -> Dict:
    film_data = {}
    film_link = f'https://www.kinopoisk.ru/film/{film_id}/'
    page = s.get(film_link, proxies=proxy)

    tree = html.fromstring(page.text)
    title = tree.xpath("//span[@class='styles_title__2l0HH']")[0].text

    # info = tree.xpath('//*[@id="__next"]/div/div[2]/div[1]/div[2]/div/div[3]/div/div/div[2]/div[1]/div')[0]
    info = tree.xpath('.//h3[text()="О фильме"]')[0].find('..')[1]

    release_date = info[0][1][0].text
    country = info[1][1][0].text
    box_office = info[13][1][0].text
    buf = box_office.find('=')
    box_office = ''.join(box_office[buf + 3:].split())

    film_data['title'] = title
    film_data['release_date'] = release_date
    film_data['country'] = country
    film_data['box_office'] = box_office

    time.sleep(delay)
    logging.info(f'Информация о фильме {title} загружена')
    actors_link = film_link + 'cast/'
    page = s.get(actors_link, proxies=proxy)
    tree = html.fromstring(page.text)

    actors = []
    z = tree.xpath('//*[@id="block_left"]/div')[0]
    current_type = ''
    for i in range(len(z)):
        name = z[i].attrib.get('name', None)
        if name is not None:
            current_type = name
        cls = z[i].attrib.get('class', None)
        if cls is not None and 'dub' in cls:
            fio = z[i].find_class('name')[0][0].text
            actors.append([fio, current_type])
    film_data['actors'] = actors
    logging.info(f'Информация об актерах фильма {title} загружена')
    return film_data
コード例 #17
0
def login(session: requests.sessions.Session):
    url="https://passport.shangxueba.com/user/userlogin.aspx?url=https%3A//www.shangxueba.com/"
    code_url="https://passport.shangxueba.com/VerifyCode.aspx"
    page=session.get(url)
    soup=BeautifulSoup(page.text,'lxml')
    #登录账号
    flag=False
    max_login_time=15
    while max_login_time > 0 and not flag:
        code_image=session.get(code_url).content
        with open("code.jpeg","wb") as f:
            f.write(code_image)
        code = get_code("./code.jpeg")
        headers={
            "User-Agent":"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36",
            "Referer":"https://passport.shangxueba.com/user/userlogin.aspx?url=https%3A//www.shangxueba.com/"
        }
        data={
            "__EVENTTARGET":"",
            "__EVENTARGUMENT":"",
            "__VIEWSTATE":soup.find('input', id='__VIEWSTATE')["value"],
            "__EVENTVALIDATION":soup.find('input', id='__EVENTVALIDATION')["value"],
            "txtName": "username",
            "txtPassword": "******",
            "txtVerifycode":code,
            "hidcode":"",
            "hidflag":"1",
            "Button1":""
        }
        req=session.post(url,headers=headers,data=data)
        if "欢迎您回来" in req.text:
            flag=True
            break
        if "验证码错误" in req.text:
            max_login_time -= 1
            continue
        else:
            max_login_time -= 1
            continue
    if flag == False:
        return False
    else:
        save_cookie(session)
        return session
コード例 #18
0
def extend_task(session: requests.sessions.Session) -> None:
    r = session.get(
        "https://www.pythonanywhere.com/user/{}/tasks_tab/".format(LOGIN))
    CSRFToken = csrfgetter(r.text).csrfToken
    r = session.post(
        TASK_URL,
        headers={
            "Referer":
            "https://www.pythonanywhere.com/user/{}/tasks_tab/".format(LOGIN),
            "X-CSRFToken":
            CSRFToken
        })
    if r.headers.get("Content-Type") == "application/json":
        r = r.json()
        if r.get("status") != "success":
            raise Exception(
                "[Update task] status != success. Response json: {}".format(r))
    else:
        raise Exception("[Update task] Server returns non json")
コード例 #19
0
ファイル: util.py プロジェクト: pirica/Fortnite-ShopBot
 def generate_device_auth(self, session: requests.sessions.Session,
                          client_id: str, launcher_access_token: str,
                          user_agent: str) -> requests.models.Response:
     res = session.post(
         f"https://account-public-service-prod.ol.epicgames.com/account/api/public/account/{client_id}/deviceAuth",
         headers={
             "Authorization": f"Bearer {launcher_access_token}",
             "User-Agent": user_agent
         })
     return res
コード例 #20
0
 def _get_comic_json(self, session: requests.sessions.Session,
                     comic_number: int) -> Optional[Dict]:
     """
     """
     response = session.get('https://xkcd.com/' + str(comic_number) +
                            '/info.0.json')
     if response.status_code != 200:
         # handle
         return None
     return response.json()
コード例 #21
0
def DownloadImg(session: requests.sessions.Session, img_url: str, path: str,
                img_name: str):
    #在此处理响应。
    img_response = session.get(img_url, stream=True)
    #用已有session,得到图片,stream
    with open('%s/%s' % (path, img_name), 'wb') as f:
        for chunk in img_response.iter_content(chunk_size=128):
            f.write(chunk)
            # 写入图片
    return True
コード例 #22
0
ファイル: util.py プロジェクト: pirica/Fortnite-ShopBot
 def exchange_code_auth(self, session: requests.sessions.Session,
                        email: str) -> requests.models.Response:
     res = session.post(
         "https://account-public-service-prod.ol.epicgames.com/account/api/oauth/token",
         headers={"Authorization": f"basic {launcher_token}"},
         data={
             "grant_type": "exchange_code",
             "exchange_code": input(f"Enter exchange code for {email}: "),
             "token_type": "eg1"
         })
     return res
コード例 #23
0
ファイル: util.py プロジェクト: pirica/Fortnite-ShopBot
 def device_auth(self, session: requests.sessions.Session, deviceId: str,
                 accountId: str, secret: str) -> requests.models.Response:
     res = session.post(
         "https://account-public-service-prod.ol.epicgames.com/account/api/oauth/token",
         headers={"Authorization": f"basic {ios_token}"},
         data={
             "grant_type": "device_auth",
             "device_id": deviceId,
             "account_id": accountId,
             "secret": secret,
             "token_type": "eg1"
         })
     return res
コード例 #24
0
ファイル: api.py プロジェクト: AndreMiras/mysodexo
def session_post(
    session: requests.sessions.Session, endpoint: str, data: Dict[str, Any]
) -> dict:
    """
    Posts JSON `data` to `endpoint` using the `session`.
    Handles errors and returns a json response dict.
    """
    endpoint = get_full_endpoint_url(endpoint)
    response = session.post(
        endpoint, json=data, cert=REQUESTS_CERT, headers=REQUESTS_HEADERS
    )
    json_response = response.json()
    handle_code_msg(json_response)
    return json_response
コード例 #25
0
def get_wiki_response(
    url: str, session: requests.sessions.Session = requests.Session()
) -> requests.Response:
    """
    Get a wiki response for a URL in a (relatively) safe manner (for bots)
    """
    try:
        response = session.get(url)
        while (response.status_code == 503 or response.status_code
               == 200) and "Retry-After" in response.headers:
            timeout = response.headers["Retry-After"]
            logging.info(f"Sleeping for {timeout}")
            time.sleep(response.headers["Retry-After"])
            response = session.get(url)
        time.sleep(1)
        response.raise_for_status()
        return response
    except requests.ConnectionError as e:
        logging.exception(e)
        if e.response:
            logging.error(e.response.text())
        time.sleep(10)
        return get_wiki_response(url, session=session)
コード例 #26
0
def download_last_payslip(session: requests.sessions.Session, eurecia_host: str, payslip_name: str):
    print("Download last payslip using API")
    eurecia_host = config["eurecia_host"]

    baseurl = f"https://{eurecia_host}/eurecia/api/v1/payslip"

    response = session.get(baseurl)
    if response.ok:
        payslip_list = response.json()
    else:
        print(response.content)
        raise ValueError(response.status_code)

    last_payslip_url = (
        f"https://{eurecia_host}/" + payslip_list["2020"][0]["files"][0]["urlContent"]
    )

    filename = payslip_name + payslip_list["2020"][0]["description"]
    filename = filename.replace(" ", "-")
    response = session.get(last_payslip_url)
    if response.status_code == 200:
        with open(f"{filename}.pdf", "wb") as f:
            f.write(response.content)
        print("OK")
コード例 #27
0
def update_environment_build_status(
    status: str,
    session: requests.sessions.Session,
    environment_build_uuid,
) -> Any:
    """Update environment build status."""
    data = {"status": status}
    if data["status"] == "STARTED":
        data["started_time"] = datetime.utcnow().isoformat()
    elif data["status"] in ["SUCCESS", "FAILURE"]:
        data["finished_time"] = datetime.utcnow().isoformat()

    url = f"{CONFIG_CLASS.ORCHEST_API_ADDRESS}/environment-builds/{environment_build_uuid}"

    with session.put(url, json=data) as response:
        return response.json()
コード例 #28
0
def update_jupyter_image_build_status(
    status: str,
    session: requests.sessions.Session,
    jupyter_image_build_uuid,
) -> Any:
    """Update Jupyter build status."""
    data = {"status": status}
    if data["status"] == "STARTED":
        data["started_time"] = datetime.utcnow().isoformat()
    elif data["status"] in ["SUCCESS", "FAILURE"]:
        data["finished_time"] = datetime.utcnow().isoformat()

    url = (f"{CONFIG_CLASS.ORCHEST_API_ADDRESS}/jupyter-builds/"
           f"{jupyter_image_build_uuid}")

    with session.put(url, json=data) as response:
        return response.json()
コード例 #29
0
ファイル: wayback.py プロジェクト: dannguyen/pgark
def submit_snapshot_request(session: requests.sessions.Session, url: str,
                            headers: dict) -> requests.models.Response:
    save_url = url_for_savepage(url)
    sub_headers = headers.copy()
    sub_headers.update({"Referer": SAVE_ENDPOINT})
    resp = session.post(save_url,
                        headers=sub_headers,
                        data={
                            "url": url,
                            "capture_all": "on"
                        })
    # todo: error out on status code != 200
    if resp.status_code != 200:
        raise ServerStatusError(
            f"""Server status was NOT OK; returned {resp.status_code} for: {save_url}"""
        )
    else:
        return resp
コード例 #30
0
def validate_access(s: requests.sessions.Session) -> bool:
    base_page = s.get(
        'http://desarrollo.lda/CheckingPRO/dashboard/view.run?category=requests'
    )
    try:
        soup = BeautifulSoup(base_page.content, 'html.parser')

        if HOME_TITLE == soup.title.get_text():
            print("Acceso verificado.")
            return True

        else:
            print("No se ha logrado acceder.")

    except Exception as e:
        print("error de acceso", e)

    return False