Python get_htmls 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: rnc.corpora_requests

메소드/함수: get_htmls

hotexamples.com에서의 예제들: 5

Python get_htmls - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 rnc.corpora_requests.get_htmls에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

    def _get_additional_info(self,
                             first_page: str = None) -> None:
        """ Get additional info (amount of found
        docs and contexts, link to the graphic).
        """
        params = self.params.copy()
        params['lang'] = 'ru'
        params.pop('expand', None)
        try:
            first_page = first_page or creq.get_htmls(RNC_URL, **params)[0]
        except creq.BaseRequestError:
            raise

        soup = bs4.BeautifulSoup(first_page, 'lxml')
        content = soup.find('div', {'class': 'content'})

        try:
            additional_info = Corpus._get_where_query_found(content)
            graphic_url = Corpus._get_graphic_url(content)
        except Exception as e:
            logger.error("Sth went wrong while "
                         f"getting additional info:\n{e}")
        else:
            if graphic_url:
                additional_info['graphic_link'] = graphic_url

            self._add_info = additional_info

예제 #2

파일 보기

    def request_examples(self) -> None:
        """ Request examples, parse them and update the data.

        If there are no results found, last page does not exist,
        params or query is wrong then exception.

        :return: None.

        :exception RuntimeError: if the data still exist.
        """
        if self.data:
            logger.error("Tried to request new examples, however data exist")
            raise RuntimeError("Data still exist")

        start = time.time()
        try:
            first, last = creq.is_request_correct(
                RNC_URL, self.p_count, **self.params)
        except creq.BaseRequestError as e:
            msg = f"Query = {self.forms_in_query}, " \
                  f"{self.p_count}, {self.params}\ne = {e}"
            logger.error(msg)
            raise

        # get additional info from the first RNC page.
        logger.debug("Getting additional info from the first RNC page")
        if self.out == 'normal':
            self._get_additional_info(first)
        else:
            self._get_additional_info()
        logger.debug("Additional info received")

        if self.p_count > 2:
            logger.debug("Main request")
            htmls = creq.get_htmls(RNC_URL, 1, self.p_count - 1, **self.params)
            htmls = [first] + htmls + [last]
            logger.debug("Main request completed")
        else:
            htmls = [first]
            if self.p_count == 2:
                htmls += [last]

        logger.debug("Parsing html started")
        try:
            parsing_start = time.time()
            parsed = self._parse_all_pages(htmls)
            parsing_stop = time.time()
        except Exception as e:
            logger.error(f"Error while parsing, query = {self.params}\n{e}")
            raise
        else:
            logger.debug("Parsing completed")
            logger.info(f"Parsing time: {parsing_stop - parsing_start:.2f}")
            logger.info(f"Overall time: {parsing_stop - start:.2f}")
            self._data = parsed[:]

예제 #3

파일 보기

def test_wrong_params():
    req.get_htmls(RNC_URL, **wrong_params)

예제 #4

파일 보기

def test_wait_some_time():
    correct_params['lex1'] = 'я'
    html_codes = req.get_htmls(RNC_URL, 0, 15, **correct_params)
    assert len(html_codes) == 15

예제 #5

파일 보기

def test_wrong_range():
    assert len(req.get_htmls(RNC_URL, 10, 0)) == 0