Ejemplo n.º 1
0
    def parse(self, response):
        logging.debug(f"Parse started")
        itemcount = 0
        for item in response.xpath("//tr[@data-gtm-name]"):
            itemcount += 1
            logging.debug(f"Parsing item {itemcount}")
            
            yield ProductItem(
                location = None,
                title = item.xpath("@data-gtm-name").get(),
                price = Helpers.getNumber(item.xpath("@data-gtm-price").get()),
                seller = None,
                image_urls = Helpers.imageUrl(None, item.xpath(".//td[@class='listing-item-picture']/span/img/@src").get()),

                url = item.xpath(".//a[@class='itemlink']/@href").get(),

                
                extraid = item.xpath("@data-gtm-id").get(),
                currency = "HUF",
                
                crawlerid = self.crawlerid,
                spiderbotid = self.spiderbotid,
                pageitemcount = itemcount,
                pagenumber = self.scrapedpages,
                pageurl = response.url
            )

        next_page = response.xpath("//a[contains(img/@src, '/arw_frw.gif')]/@href").get()
        if next_page and self.scrapedpages<self.maxpages:
                self.scrapedpages += 1
                logging.debug(f"Next page (#{str(self.scrapedpages)} of {self.maxpages}): {next_page}")
                yield response.follow(next_page, self.parse)
Ejemplo n.º 2
0
    def parse(self, response):
        logging.debug(f"Parse started")
        itemcount = 0
        for item in response.xpath("//figure"):
            itemcount += 1
            logging.debug(f"Parsing item {itemcount}")

            yield ProductItem(
                title=Helpers.getString(
                    item.xpath("./div[@class='relative']/div/a/@title").get()),
                url=response.urljoin(
                    item.xpath("./div[@class='relative']/div/a/@href").get()),
                image_urls=Helpers.imageUrl(
                    None,
                    item.xpath("./div/div/a/img/@src").get()),
                extraid=item.xpath(
                    "./div[@class='relative']/div/a/@href").get(),
                seller=Helpers.getString(
                    item.xpath(
                        ".//span[@class='fw_bold color_light']/text()").get()),
                price=Helpers.getNumber(
                    item.xpath(".//span[@class='d_block']/text()").get()),
                currency=Helpers.getCurrency(
                    item.xpath(".//span[@class='d_block']/text()").get()),
                location=Helpers.getString(
                    item.xpath(
                        ".//span[@class='d_block fw_bold color_light']/text()"
                    ).get()),
                crawlerid=self.crawlerid,
                spiderbotid=self.spiderbotid,
                pageitemcount=itemcount,
                pagenumber=self.scrapedpages,
                pageurl=response.url)

        next_page = response.xpath(
            "//a[i/@class='fa fa-angle-right d_inline_m']/@href").get()
        if next_page and self.scrapedpages < self.maxpages:
            self.scrapedpages += 1
            logging.debug(
                f"Next page (#{str(self.scrapedpages)} of {self.maxpages}): {next_page}"
            )
            yield response.follow(next_page, self.parse)
Ejemplo n.º 3
0
    def parse_sorted(self, response):
        logging.debug(f"Parse storted")
        itemcount = 0

        for item in response.xpath("//div[@class='h0_elem']"):
            itemcount += 1
            logging.debug(f"Parsing item {itemcount}")

            yield ProductItem(
                title=Helpers.getString(
                    item.xpath(".//div[@ class='h_nev']//a/text()").get()),
                url=response.urljoin(
                    item.xpath(
                        ".//div[@class='h_info']/a[@style='color:#427CB3']/@href"
                    ).get()),
                image_urls=Helpers.imageUrl(
                    None,
                    item.xpath(
                        ".//div[@class='h_left_inner']//img/@src").get()),
                extraid=item.xpath(
                    ".//div[@class='h_info']/a[@style='color:#427CB3']/@href").
                get(),
                seller=None,
                price=Helpers.getNumber(
                    item.xpath(".//div[@class='h_ar']/text()").get()),
                currency=Helpers.getCurrency(
                    item.xpath(".//div[@class='h_ar']/text()").get()),
                location=None,
                crawlerid=self.crawlerid,
                spiderbotid=self.spiderbotid,
                pageitemcount=itemcount,
                pagenumber=self.scrapedpages,
                pageurl=response.url)

        next_page = response.xpath(
            "//a[@class = 'h_oldal_kovetkezo']/@href").get()
        if next_page and self.scrapedpages < self.maxpages:
            self.scrapedpages += 1
            logging.debug(
                f"Next page (#{str(self.scrapedpages)} of {self.maxpages}): {next_page}"
            )
            yield response.follow(next_page, self.parse_sorted)
Ejemplo n.º 4
0
    def parse(self, response):
        logging.debug(f"Parse started")
        itemcount = 0
        for item in response.xpath("//li[@class='srBlock']"):
            itemcount += 1
            logging.debug(f"Parsing item {itemcount}")

            yield ProductItem(
                title=Helpers.getString(
                    item.xpath(".//div[@class='srData floatL']/div/h3/a/text()"
                               ).get()),
                url=item.xpath(
                    ".//div[@class='srData floatL']/div/h3/a/@href").get(),
                seller=None,
                image_urls=Helpers.imageUrl(
                    None,
                    item.xpath(
                        ".//div[@class='srImg floatL']//img/@data-original").
                    get()),
                extraid=item.xpath(
                    ".//div[@class='srData floatL']/div/h3/a/@href").get(),
                price=Helpers.getNumber(
                    item.xpath(".//div[@class='srPrice']/text()").get()),
                currency=Helpers.getCurrency(
                    item.xpath(".//div[@class='srPrice']/text()").get()),
                location=Helpers.getString(
                    item.xpath(".//div[@class='location']/i/text()").get()),
                crawlerid=self.crawlerid,
                spiderbotid=self.spiderbotid,
                pageitemcount=itemcount,
                pagenumber=self.scrapedpages,
                pageurl=response.url)

        next_page = response.xpath(
            "//div[@id='searchResultPagination']/a[contains(text(), 'Következő')]/@href"
        ).get()
        if next_page and self.scrapedpages < self.maxpages:
            self.scrapedpages += 1
            logging.debug(
                f"Next page (#{str(self.scrapedpages)} of {self.maxpages}): {next_page}"
            )
            yield response.follow(next_page, self.parse)
Ejemplo n.º 5
0
    def parse(self, response):
        logging.debug(f"Parse started")
        itemcount = 0
        for item in response.xpath("//div[@class='gtm-impression prod']"):
            itemcount += 1
            logging.debug(f"Parsing item {itemcount}")

            location = item.xpath(
                ".//div[contains(text(),'Termék helye:')]/text()").get()
            if location:
                location = Helpers.getString(
                    location.replace("Termék helye:", ""))

            yield ProductItem(
                title=item.xpath("@data-gtm-name").get(),
                price=Helpers.getNumber(item.xpath("@data-gtm-price").get()),
                seller=Helpers.getString(
                    item.xpath(".//span[@class='userrating']/a/text()").get()),
                image_urls=Helpers.imageUrl(
                    None,
                    item.xpath(
                        ".//div[@class='picbox']/img/@data-original").get()),
                url=item.xpath(".//a[@class='product_link']/@href").get(),
                location=location,
                extraid=item.xpath("@data-product-id").get(),
                currency="HUF",
                crawlerid=self.crawlerid,
                spiderbotid=self.spiderbotid,
                pageitemcount=itemcount,
                pagenumber=self.scrapedpages,
                pageurl=response.url)

        next_page = response.xpath(
            "//a[@aria-label='Következő oldal']/@href").get()
        if next_page and self.scrapedpages < self.maxpages:
            self.scrapedpages += 1
            logging.debug(
                f"Next page (#{str(self.scrapedpages)} of {self.maxpages}): {next_page}"
            )
            yield response.follow(next_page, self.parse)
Ejemplo n.º 6
0
    def parse(self, response):
        logging.debug(f"Parse started")
        itemcount = 0
        for item in response.xpath("//li[@class='media']"):
            itemcount += 1
            logging.debug(f"Parsing item")
            yield ProductItem(
                title=Helpers.getString(item.xpath(".//h1/a/text()").get()),
                seller=Helpers.getString(
                    item.xpath(
                        ".//div[@class='uad-misc']/div/a/text()").get()),
                image_urls=Helpers.imageUrl(response,
                                            item.xpath("./a/img/@src").get()),
                url=response.urljoin(item.xpath(".//h1/a/@href").get()),
                extraid=item.xpath(".//h1/a/@href").get(),
                price=Helpers.getNumber(
                    item.xpath(
                        ".//div[@class='uad-info']/div[@class='uad-price']/text()"
                    ).get()),
                currency='HUF',
                location=Helpers.getString(
                    item.xpath(
                        ".//div[@class='uad-info']/div[@class='uad-light']/text()"
                    ).get()),
                crawlerid=self.crawlerid,
                spiderbotid=self.spiderbotid,
                pageitemcount=itemcount,
                pagenumber=self.scrapedpages,
                pageurl=response.url)

        next_page = response.xpath(
            "//li[@class='nav-arrow']/a[@rel='next']/@href").get()
        if next_page and self.scrapedpages < self.maxpages:
            self.scrapedpages += 1
            logging.debug(
                f"Next page (#{str(self.scrapedpages)} of {self.maxpages}): {next_page}"
            )
            yield response.follow(next_page, self.parse)
Ejemplo n.º 7
0
    def parse(self, response):
        logging.debug(f"Parse started")
        itemcount = 0
        for item in response.xpath("//div[@data-hirdetes-id]"):
            itemcount += 1
            logging.debug(f"Parsing item {itemcount}")

            yield ProductItem(
                extraid=item.xpath("./@data-hirdetes-id").get(),
                image_urls=Helpers.imageUrl(None,
                                            item.xpath(".//img/@src").get()),
                url=response.urljoin(
                    item.xpath(".//a[@class='tile-link']/@href").get()),
                title=item.xpath(".//img/@alt").get(),
                seller=None,
                price=Helpers.getNumber(
                    item.xpath(".//span[@class='h4 mr-1 text-primary']/text()"
                               ).get()),
                currency="HUF",
                location=Helpers.getString(
                    item.xpath(".//p[@class='mb-0 text-muted text-sm']/text()"
                               ).get()),
                crawlerid=self.crawlerid,
                spiderbotid=self.spiderbotid,
                pageitemcount=itemcount,
                pagenumber=self.scrapedpages,
                pageurl=response.url)

        next_page = response.xpath(
            "//li[@class='page-item active']/a[@class='page-link' and i/@class='fa fa-angle-right']/@href"
        ).get()
        if next_page and self.scrapedpages < self.maxpages:
            self.scrapedpages += 1
            logging.debug(
                f"Next page (#{str(self.scrapedpages)} of {self.maxpages}): {next_page}"
            )
            yield response.follow(next_page, self.parse)
Ejemplo n.º 8
0
    def parse(self, response):
        logging.debug(f"Parse started")
        itemcount = 0
        for item in response.xpath("//div[@class='indent']"):
            itemcount += 1
            logging.debug(f"Parsing item {itemcount}")

            yield ProductItem(
                title=Helpers.getString(
                    item.xpath(".//a[@class='product_name']/text()").get()),
                url=response.urljoin(
                    item.xpath(".//a[@class='product_name']/@href").get()),
                extraid=item.xpath(".//a[@class='product_name']/@href").get(),
                seller=None,
                price=Helpers.getNumber(
                    item.xpath(".//div[@class='price']/b/text()").get()),
                currency=Helpers.getNumber(
                    item.xpath(".//div[@class='price']/b/text()").get()),
                location=Helpers.getString(
                    item.xpath(".//div[@class='price']/i/b[1]/text()").get()),
                image_urls=Helpers.imageUrl(
                    response,
                    item.xpath(".//a[@class='product_name']/img/@src").get()),
                crawlerid=self.crawlerid,
                spiderbotid=self.spiderbotid,
                pageitemcount=itemcount,
                pagenumber=self.scrapedpages,
                pageurl=response.url)

        next_page = response.xpath("//a[@class='next']/@href").get()
        if next_page and self.scrapedpages < self.maxpages:
            self.scrapedpages += 1
            logging.debug(
                f"Next page (#{str(self.scrapedpages)} of {self.maxpages}): {next_page}"
            )
            yield response.follow(next_page, self.parse)