def get_next_page_url(self): Logger.log_it("Retrieving next page url...") next_url = self.driver.find_element_by_css_selector( 'div.ui_pagination a.next').get_attribute("href") f = lambda x: "None" if next_url is None else next_url Logger.log_it("Next url: " + f(next_url)) return next_url
def save_to_file(reviews, location_name, current_page, last_page): filename = 'scraped_data/data_reviews/selenium_reviews-%s-%s-%s.csv' % ( location_name, current_page, last_page) with open(filename, 'w') as f: f.write(Review.get_csv_header()) for review in reviews: f.write(review.get_csv_line()) Logger.log_it('Saved file %s' % filename)
def next_page(self): try: if not self.is_all_languages_selected(): Logger.log_it( "All language not selected... selecting all language") self.select_all_languages() self.driver.find_element_by_css_selector( 'div.ui_pagination a.next').click() except WebDriverException: Logger.log_it("There is no more pages!") self.driver.implicitly_wait(2)
def __init__(self, url): Logger.log_it("##########################################") self.timer = Timer() self.timer.start_timer() # self.driver = gecko_utils.get_gecko_driver() self.driver = webdriver.Firefox() # driver.add_cookie({'name': 'TALanguage', 'value': 'ALL'}) self.driver.get(url) self.driver.implicitly_wait(2)
def wrapper(*args, **kwargs): counter = 3 while counter != 0: try: result = f(*args, **kwargs) return result except StaleElementReferenceException: Logger.log_it("Stale element... retrying") counter -= 1 except WebDriverException: Logger.log_it("Web driver exception... retrying") counter -= 1 return None
def has_next_review_page(self): Logger.log_it("Checking if next page exists...") return not (self.get_next_page_url() is None)
def stop_spider(self): Logger.log_it("-------------------------------------------") self.driver.close() self.timer.stop_timer() Logger.log_it(self.timer.print_time())
def refresh_page(self): Logger.log_it("Refreshing") self.driver.refresh()