Exemple #1
0
    def _scrape_item(self):
        soup = self._get_soup(self.inp)

        yield ScrapeResults(
            self.inp, {
                "title": soup.select("title")[0].get_text(),
                "header": soup.select("h1")[0].get_text(),
                "charset": soup.select("meta[charset]")[0].get("charset")
            })
Exemple #2
0
    def _scrape_paged(self):
        url = self.inp

        while True:
            data, next_url = self._get_overview(url)
            yield ScrapeResults(url, data)

            if not next_url:
                logging.debug("No more pages, finishing up")
                break
            else:
                url = next_url
Exemple #3
0
    def _scrape_paged(self):
        url = self.inp
        index = 1

        while True:
            page, next_url = self._parse_page(url)
            yield ScrapeResults(url, page)

            if not next_url:
                logging.debug("No more pages, finishing up")
                break
            else:
                url = next_url
                index = index + 1
Exemple #4
0
    def _scrape_paged(self):
        url = self.inp
        logging.debug(f"Starting with < {url} >")

        while True:
            data = self._get_soup(url, "#z-nvg-cognac-props")

            yield ScrapeResults(url, data["articles"])

            if "next_page_path" not in data:
                logging.debug("No more pages, finishing up")
                break
            else:
                url = f"https://www.{self.domain}" + data["next_page_path"]
                logging.debug(f"Setting next url: {url}")
Exemple #5
0
    def _scrape_item(self):
        soup = self._get_soup(self.inp)

        data_raw = soup.select(".gallery [data-options]")[0].get("data-options")
        data = html.unescape(data_raw)
        data = json.loads(data)
        product_id = soup.select(".product-panel__id p")[0].get_text()

        yield ScrapeResults(self.inp, {
            "set" : data["set"],
            "name" : data["product"]["name"],
            "description" : data["product"]["description"],
            "price" : to_number(data["product"]["price"]),
            "product_id" : to_number(product_id)
        })
Exemple #6
0
    def _scrape_paged(self):
        offset = 0

        while True:
            url = f"{self.inp}?offset={offset}&page-size={self.PAGE_SIZE}"

            try:
                data = self._get_search(url)
            except ZeroItems:
                logging.debug("No more pages, finishing up")
                break

            yield ScrapeResults(url, data)

            offset += self.PAGE_SIZE
            logging.debug(f"Setting next url: {url}")
Exemple #7
0
    def _scrape_paged(self):
        page = 0

        while True:
            if "?q=" in self.inp:
                url = f"{self.inp}&page={page}"
            else:
                url = f"{self.inp}?q=%3Arelevance&page={page}"

            try:
                data = self._get_search(url)
            except ZeroItems:
                logging.debug("No more pages, finishing up")
                break

            yield ScrapeResults(url, data)

            page += 1
            logging.debug(f"Setting next url: {url}")
Exemple #8
0
 def _scrape_item(self):
     page = self._parse_page(self.inp)
     yield ScrapeResults(self.inp, page)
Exemple #9
0
 def _scrape_item(self):
     soup = self._get_soup(self.inp)
     data_el = soup.select("#pdpMain")[0]
     yield ScrapeResults(self.inp, parse_json(data_el))
Exemple #10
0
    def _scrape_item(self):
        data = self._get_soup(self.inp, "#z-vegas-pdp-props")

        yield ScrapeResults(self.inp, data)
Exemple #11
0
    def _scrape_item(self):
        soup = self._get_soup(self.inp)
        ld = soup.select('script[type="application/ld+json"]')
        data = json.loads(ld[0].get_text())

        yield ScrapeResults(self.inp, data)