Beispiel #1
0
    def scrape(self, pc=None, change_url=None):
        """

        :param change_url is the changing part of wider site url, if there
        are multiple sections to hit.
        :param pc is an integer indicating where to start with a paginated url.
        """
        self.run = True  # initialization of a site/section.
        if pc is not None:
            self.pc = pc
        while self.run is True:
            url = self.next_page_url(build_search_url(self.site_url, change_url))
            try:
                page = self.get_page(url)
            except Exception as e:
                self.logger.error("Error with %s and skipped" % url)
                continue
            self.get_list(page)
        if change_url is None:
            self.logger.info("Site %s finished" % self.site_url)
        else:
            self.logger.info("Section %s finished" % change_url)
Beispiel #2
0
    def scrape(self, pc=None, change_url=None):
        """

        :param change_url is the changing part of wider site url, if there
        are multiple sections to hit.
        :param pc is an integer indicating where to start with a paginated url.
        It also acts as a recursion limit, when to move on from a section.
        """
        self.run = True  # initialization of a site/section.
        if pc is not None:
            self.pc = pc
        while self.run is True:
            url = self.next_page_url(build_search_url(self.site_url, change_url))
            try:
                page = self.get_page(url)
            except Exception as e:
                print("Error with %s and skipped" % url)
                continue
            self.get_list(page)
        if change_url is None:
            print("Site %s finished" % self.site_url)
        else:
            print("Section %s finished" % change_url)
        self.pc = 0  # re-init pc to run next section