def _get_additional_info(self, first_page: str = None) -> None: """ Get additional info (amount of found docs and contexts, link to the graphic). """ params = self.params.copy() params['lang'] = 'ru' params.pop('expand', None) try: first_page = first_page or creq.get_htmls(RNC_URL, **params)[0] except creq.BaseRequestError: raise soup = bs4.BeautifulSoup(first_page, 'lxml') content = soup.find('div', {'class': 'content'}) try: additional_info = Corpus._get_where_query_found(content) graphic_url = Corpus._get_graphic_url(content) except Exception as e: logger.error("Sth went wrong while " f"getting additional info:\n{e}") else: if graphic_url: additional_info['graphic_link'] = graphic_url self._add_info = additional_info
def request_examples(self) -> None: """ Request examples, parse them and update the data. If there are no results found, last page does not exist, params or query is wrong then exception. :return: None. :exception RuntimeError: if the data still exist. """ if self.data: logger.error("Tried to request new examples, however data exist") raise RuntimeError("Data still exist") start = time.time() try: first, last = creq.is_request_correct( RNC_URL, self.p_count, **self.params) except creq.BaseRequestError as e: msg = f"Query = {self.forms_in_query}, " \ f"{self.p_count}, {self.params}\ne = {e}" logger.error(msg) raise # get additional info from the first RNC page. logger.debug("Getting additional info from the first RNC page") if self.out == 'normal': self._get_additional_info(first) else: self._get_additional_info() logger.debug("Additional info received") if self.p_count > 2: logger.debug("Main request") htmls = creq.get_htmls(RNC_URL, 1, self.p_count - 1, **self.params) htmls = [first] + htmls + [last] logger.debug("Main request completed") else: htmls = [first] if self.p_count == 2: htmls += [last] logger.debug("Parsing html started") try: parsing_start = time.time() parsed = self._parse_all_pages(htmls) parsing_stop = time.time() except Exception as e: logger.error(f"Error while parsing, query = {self.params}\n{e}") raise else: logger.debug("Parsing completed") logger.info(f"Parsing time: {parsing_stop - parsing_start:.2f}") logger.info(f"Overall time: {parsing_stop - start:.2f}") self._data = parsed[:]
def test_wrong_params(): req.get_htmls(RNC_URL, **wrong_params)
def test_wait_some_time(): correct_params['lex1'] = 'я' html_codes = req.get_htmls(RNC_URL, 0, 15, **correct_params) assert len(html_codes) == 15
def test_wrong_range(): assert len(req.get_htmls(RNC_URL, 10, 0)) == 0