def wait_until_title_contains_keyword(self): try: WebDriverWait(self.webdriver, 5).until(EC.title_contains(self.query)) except TimeoutException: logger.debug( SeleniumSearchError( '{}: Keyword "{}" not found in title: {}'.format( self.name, self.query, self.webdriver.title)))
def wait_until_serp_loaded(self): """ This method tries to wait until the page requested is loaded. We know that the correct page is loaded when self.page_number appears in the navigation of the page. """ if self.search_type == 'normal': if self.search_engine_name == 'google': selector = '#navcnt td.cur' elif self.search_engine_name == 'yandex': selector = '.pager__item_current_yes font font' elif self.search_engine_name == 'bing': selector = 'nav li a.sb_pagS' elif self.search_engine_name == 'yahoo': selector = '.compPagination strong' elif self.search_engine_name == 'baidu': selector = '#page .fk_cur + .pc' elif self.search_engine_name == 'duckduckgo': # no pagination in duckduckgo pass elif self.search_engine_name == 'ask': selector = '#paging .pgcsel .pg' # content = None try: time.sleep(1) WebDriverWait(self.webdriver, 5).until( EC.text_to_be_present_in_element( (By.CSS_SELECTOR, selector), str(self.page_number) ) ) except TimeoutException: self._save_debug_screenshot() try: self.webdriver.find_element_by_css_selector(selector).text except NoSuchElementException: logger.error('Skip it, no such element - SeleniumSearchError') self.quit() raise SeleniumSearchError('Stop Scraping, seems we are blocked') except Exception: logger.error('Scrape Exception pass. Selector: ' + str(selector)) self._save_debug_screenshot() pass elif self.search_type == 'image': self.wait_until_title_contains_keyword() else: self.wait_until_title_contains_keyword()
def wait_until_serp_loaded(self): """ This method tries to wait until the page requested is loaded. We know that the correct page is loaded when self.page_number appears in the navigation of the page. """ if self.search_type == 'normal': if self.search_engine_name == 'google': selector = '#resultStats' elif self.search_engine_name == 'yandex': selector = '.pager__item_current_yes font font' elif self.search_engine_name == 'bing': selector = 'nav li a.sb_pagS' elif self.search_engine_name == 'yahoo': selector = '.compPagination strong' elif self.search_engine_name == 'baidu': selector = '#page .fk_cur + .pc' elif self.search_engine_name == 'duckduckgo': # no pagination in duckduckgo pass elif self.search_engine_name == 'ask': selector = '#paging .pgcsel .pg' try: WebDriverWait(self.webdriver, 5).until( EC.visibility_of_element_located( (By.CSS_SELECTOR, selector))) except NoSuchElementException: logger.error( 'No such element. Seeing if title matches before raising SeleniumSearchError' ) self._save_debug_screenshot() try: self.wait_until_title_contains_keyword() except TimeoutException: self.quit() raise SeleniumSearchError( 'Stop Scraping, seems we are blocked') except Exception as e: logger.error('Scrape Exception pass. Selector: ' + str(selector)) logger.error('Error: ' + str(e)) self._save_debug_screenshot() pass else: self.wait_until_title_contains_keyword()