def scrape(self, pc=None, change_url=None): """ :param change_url is the changing part of wider site url, if there are multiple sections to hit. :param pc is an integer indicating where to start with a paginated url. """ self.run = True # initialization of a site/section. if pc is not None: self.pc = pc while self.run is True: url = self.next_page_url(build_search_url(self.site_url, change_url)) try: page = self.get_page(url) except Exception as e: self.logger.error("Error with %s and skipped" % url) continue self.get_list(page) if change_url is None: self.logger.info("Site %s finished" % self.site_url) else: self.logger.info("Section %s finished" % change_url)
def scrape(self, pc=None, change_url=None): """ :param change_url is the changing part of wider site url, if there are multiple sections to hit. :param pc is an integer indicating where to start with a paginated url. It also acts as a recursion limit, when to move on from a section. """ self.run = True # initialization of a site/section. if pc is not None: self.pc = pc while self.run is True: url = self.next_page_url(build_search_url(self.site_url, change_url)) try: page = self.get_page(url) except Exception as e: print("Error with %s and skipped" % url) continue self.get_list(page) if change_url is None: print("Site %s finished" % self.site_url) else: print("Section %s finished" % change_url) self.pc = 0 # re-init pc to run next section