def parse(self, response): logging.debug(f"Parse started") itemcount = 0 for item in response.xpath("//tr[@data-gtm-name]"): itemcount += 1 logging.debug(f"Parsing item {itemcount}") yield ProductItem( location = None, title = item.xpath("@data-gtm-name").get(), price = Helpers.getNumber(item.xpath("@data-gtm-price").get()), seller = None, image_urls = Helpers.imageUrl(None, item.xpath(".//td[@class='listing-item-picture']/span/img/@src").get()), url = item.xpath(".//a[@class='itemlink']/@href").get(), extraid = item.xpath("@data-gtm-id").get(), currency = "HUF", crawlerid = self.crawlerid, spiderbotid = self.spiderbotid, pageitemcount = itemcount, pagenumber = self.scrapedpages, pageurl = response.url ) next_page = response.xpath("//a[contains(img/@src, '/arw_frw.gif')]/@href").get() if next_page and self.scrapedpages<self.maxpages: self.scrapedpages += 1 logging.debug(f"Next page (#{str(self.scrapedpages)} of {self.maxpages}): {next_page}") yield response.follow(next_page, self.parse)
def parse(self, response): logging.debug(f"Parse started") itemcount = 0 for item in response.xpath("//figure"): itemcount += 1 logging.debug(f"Parsing item {itemcount}") yield ProductItem( title=Helpers.getString( item.xpath("./div[@class='relative']/div/a/@title").get()), url=response.urljoin( item.xpath("./div[@class='relative']/div/a/@href").get()), image_urls=Helpers.imageUrl( None, item.xpath("./div/div/a/img/@src").get()), extraid=item.xpath( "./div[@class='relative']/div/a/@href").get(), seller=Helpers.getString( item.xpath( ".//span[@class='fw_bold color_light']/text()").get()), price=Helpers.getNumber( item.xpath(".//span[@class='d_block']/text()").get()), currency=Helpers.getCurrency( item.xpath(".//span[@class='d_block']/text()").get()), location=Helpers.getString( item.xpath( ".//span[@class='d_block fw_bold color_light']/text()" ).get()), crawlerid=self.crawlerid, spiderbotid=self.spiderbotid, pageitemcount=itemcount, pagenumber=self.scrapedpages, pageurl=response.url) next_page = response.xpath( "//a[i/@class='fa fa-angle-right d_inline_m']/@href").get() if next_page and self.scrapedpages < self.maxpages: self.scrapedpages += 1 logging.debug( f"Next page (#{str(self.scrapedpages)} of {self.maxpages}): {next_page}" ) yield response.follow(next_page, self.parse)
def parse_sorted(self, response): logging.debug(f"Parse storted") itemcount = 0 for item in response.xpath("//div[@class='h0_elem']"): itemcount += 1 logging.debug(f"Parsing item {itemcount}") yield ProductItem( title=Helpers.getString( item.xpath(".//div[@ class='h_nev']//a/text()").get()), url=response.urljoin( item.xpath( ".//div[@class='h_info']/a[@style='color:#427CB3']/@href" ).get()), image_urls=Helpers.imageUrl( None, item.xpath( ".//div[@class='h_left_inner']//img/@src").get()), extraid=item.xpath( ".//div[@class='h_info']/a[@style='color:#427CB3']/@href"). get(), seller=None, price=Helpers.getNumber( item.xpath(".//div[@class='h_ar']/text()").get()), currency=Helpers.getCurrency( item.xpath(".//div[@class='h_ar']/text()").get()), location=None, crawlerid=self.crawlerid, spiderbotid=self.spiderbotid, pageitemcount=itemcount, pagenumber=self.scrapedpages, pageurl=response.url) next_page = response.xpath( "//a[@class = 'h_oldal_kovetkezo']/@href").get() if next_page and self.scrapedpages < self.maxpages: self.scrapedpages += 1 logging.debug( f"Next page (#{str(self.scrapedpages)} of {self.maxpages}): {next_page}" ) yield response.follow(next_page, self.parse_sorted)
def parse(self, response): logging.debug(f"Parse started") itemcount = 0 for item in response.xpath("//li[@class='srBlock']"): itemcount += 1 logging.debug(f"Parsing item {itemcount}") yield ProductItem( title=Helpers.getString( item.xpath(".//div[@class='srData floatL']/div/h3/a/text()" ).get()), url=item.xpath( ".//div[@class='srData floatL']/div/h3/a/@href").get(), seller=None, image_urls=Helpers.imageUrl( None, item.xpath( ".//div[@class='srImg floatL']//img/@data-original"). get()), extraid=item.xpath( ".//div[@class='srData floatL']/div/h3/a/@href").get(), price=Helpers.getNumber( item.xpath(".//div[@class='srPrice']/text()").get()), currency=Helpers.getCurrency( item.xpath(".//div[@class='srPrice']/text()").get()), location=Helpers.getString( item.xpath(".//div[@class='location']/i/text()").get()), crawlerid=self.crawlerid, spiderbotid=self.spiderbotid, pageitemcount=itemcount, pagenumber=self.scrapedpages, pageurl=response.url) next_page = response.xpath( "//div[@id='searchResultPagination']/a[contains(text(), 'Következő')]/@href" ).get() if next_page and self.scrapedpages < self.maxpages: self.scrapedpages += 1 logging.debug( f"Next page (#{str(self.scrapedpages)} of {self.maxpages}): {next_page}" ) yield response.follow(next_page, self.parse)
def parse(self, response): logging.debug(f"Parse started") itemcount = 0 for item in response.xpath("//div[@class='gtm-impression prod']"): itemcount += 1 logging.debug(f"Parsing item {itemcount}") location = item.xpath( ".//div[contains(text(),'Termék helye:')]/text()").get() if location: location = Helpers.getString( location.replace("Termék helye:", "")) yield ProductItem( title=item.xpath("@data-gtm-name").get(), price=Helpers.getNumber(item.xpath("@data-gtm-price").get()), seller=Helpers.getString( item.xpath(".//span[@class='userrating']/a/text()").get()), image_urls=Helpers.imageUrl( None, item.xpath( ".//div[@class='picbox']/img/@data-original").get()), url=item.xpath(".//a[@class='product_link']/@href").get(), location=location, extraid=item.xpath("@data-product-id").get(), currency="HUF", crawlerid=self.crawlerid, spiderbotid=self.spiderbotid, pageitemcount=itemcount, pagenumber=self.scrapedpages, pageurl=response.url) next_page = response.xpath( "//a[@aria-label='Következő oldal']/@href").get() if next_page and self.scrapedpages < self.maxpages: self.scrapedpages += 1 logging.debug( f"Next page (#{str(self.scrapedpages)} of {self.maxpages}): {next_page}" ) yield response.follow(next_page, self.parse)
def parse(self, response): logging.debug(f"Parse started") itemcount = 0 for item in response.xpath("//li[@class='media']"): itemcount += 1 logging.debug(f"Parsing item") yield ProductItem( title=Helpers.getString(item.xpath(".//h1/a/text()").get()), seller=Helpers.getString( item.xpath( ".//div[@class='uad-misc']/div/a/text()").get()), image_urls=Helpers.imageUrl(response, item.xpath("./a/img/@src").get()), url=response.urljoin(item.xpath(".//h1/a/@href").get()), extraid=item.xpath(".//h1/a/@href").get(), price=Helpers.getNumber( item.xpath( ".//div[@class='uad-info']/div[@class='uad-price']/text()" ).get()), currency='HUF', location=Helpers.getString( item.xpath( ".//div[@class='uad-info']/div[@class='uad-light']/text()" ).get()), crawlerid=self.crawlerid, spiderbotid=self.spiderbotid, pageitemcount=itemcount, pagenumber=self.scrapedpages, pageurl=response.url) next_page = response.xpath( "//li[@class='nav-arrow']/a[@rel='next']/@href").get() if next_page and self.scrapedpages < self.maxpages: self.scrapedpages += 1 logging.debug( f"Next page (#{str(self.scrapedpages)} of {self.maxpages}): {next_page}" ) yield response.follow(next_page, self.parse)
def parse(self, response): logging.debug(f"Parse started") itemcount = 0 for item in response.xpath("//div[@data-hirdetes-id]"): itemcount += 1 logging.debug(f"Parsing item {itemcount}") yield ProductItem( extraid=item.xpath("./@data-hirdetes-id").get(), image_urls=Helpers.imageUrl(None, item.xpath(".//img/@src").get()), url=response.urljoin( item.xpath(".//a[@class='tile-link']/@href").get()), title=item.xpath(".//img/@alt").get(), seller=None, price=Helpers.getNumber( item.xpath(".//span[@class='h4 mr-1 text-primary']/text()" ).get()), currency="HUF", location=Helpers.getString( item.xpath(".//p[@class='mb-0 text-muted text-sm']/text()" ).get()), crawlerid=self.crawlerid, spiderbotid=self.spiderbotid, pageitemcount=itemcount, pagenumber=self.scrapedpages, pageurl=response.url) next_page = response.xpath( "//li[@class='page-item active']/a[@class='page-link' and i/@class='fa fa-angle-right']/@href" ).get() if next_page and self.scrapedpages < self.maxpages: self.scrapedpages += 1 logging.debug( f"Next page (#{str(self.scrapedpages)} of {self.maxpages}): {next_page}" ) yield response.follow(next_page, self.parse)
def parse(self, response): logging.debug(f"Parse started") itemcount = 0 for item in response.xpath("//div[@class='indent']"): itemcount += 1 logging.debug(f"Parsing item {itemcount}") yield ProductItem( title=Helpers.getString( item.xpath(".//a[@class='product_name']/text()").get()), url=response.urljoin( item.xpath(".//a[@class='product_name']/@href").get()), extraid=item.xpath(".//a[@class='product_name']/@href").get(), seller=None, price=Helpers.getNumber( item.xpath(".//div[@class='price']/b/text()").get()), currency=Helpers.getNumber( item.xpath(".//div[@class='price']/b/text()").get()), location=Helpers.getString( item.xpath(".//div[@class='price']/i/b[1]/text()").get()), image_urls=Helpers.imageUrl( response, item.xpath(".//a[@class='product_name']/img/@src").get()), crawlerid=self.crawlerid, spiderbotid=self.spiderbotid, pageitemcount=itemcount, pagenumber=self.scrapedpages, pageurl=response.url) next_page = response.xpath("//a[@class='next']/@href").get() if next_page and self.scrapedpages < self.maxpages: self.scrapedpages += 1 logging.debug( f"Next page (#{str(self.scrapedpages)} of {self.maxpages}): {next_page}" ) yield response.follow(next_page, self.parse)