def parse_src(self, response): self.item = Item() content = "" for con in response.xpath('//p[@class="Normal"]/text()').extract(): if "video" in con or "Video" in con or con == "\n": content = content else: content += con self.item["content"] = content self.item["description"] = response.xpath( '//h2[@class="description"]/text()').extract() self.item["title"] = response.xpath( '//h1[@class="title_news_detail mb10"]/text()').extract() if content != "": yield self.item
def parse_src(self, response): self.item = Item() self.item["time"] = response.xpath( '//span[@class="fr fon7 mr2 tt-capitalize"]/text()').extract() self.item["title"] = response.xpath( '//h1[@class="fon31 mgb15"]/text()').extract() self.item["description"] = response.xpath( '//h2[@class="fon33 mt1 sapo"]/text()').extract() content = "" for con in response.xpath( '//div[@id="divNewsContent"]/p/text()').extract(): content += con self.item["content"] = content if content != "": yield self.item
def parse_src(self, response): self.item = Item() self.item["time"] = response.xpath( '//li[@class="the-article-publish cms-date"]/text()').extract() self.item["title"] = response.xpath( '//h1[@class="the-article-title cms-title"]/text()').extract() self.item["description"] = response.xpath( '//p[@class="the-article-summary cms-desc"]/text()').extract() content = "" for con in response.xpath( '//div[@class="the-article-body cms-body"]/p/text()').extract( ): content += con self.item["content"] = content if content != "": yield self.item
def parse_src(self, response): self.item = Item() self.item["time"] = response.xpath( '//div[@class="ArticleDateTime"]/span[@class="ArticleDate"]/text()' ).extract() self.item["title"] = response.xpath( '//div[@class="ArticleDetail"]/h1[@class="title"]/text()').extract( ) self.item["description"] = response.xpath( '//div[@id="ArticleContent"]/p/strong/text()').extract() content = "" for con in response.xpath( '//div[@id="ArticleContent"]/p/text()').extract(): content += con self.item["content"] = content if content != "": yield self.item
def parse_ad(self, ad_html): selector = Selector(text=ad_html) source_id = selector.css("div.ad-options::attr(ad-id)").get() title_elem = selector.css("a.adName") title = title_elem.css("::text").get().strip() price = selector.css("span.adPrice::text").get().strip() url = title_elem.css("::attr(href)").get().strip() url = f"{self.base_url}{url}" image = selector.css("div.adImgWrapper img::attr(src)").get().strip() image = f"https:{image}" item = Item() item["site"] = self.site item["source_id"] = source_id item["url"] = url item["title"] = title item["price"] = price item["image"] = image return item
def parse_ad(self, ad_html): selector = Selector(text=ad_html) title_elem = selector.css(".offer-title a") title = title_elem.css("::text").get().strip() url = title_elem.css("::attr(href)").get().strip() url = f"{self.base_url}{url}" source_id = url.rsplit("/", 2)[1] image = selector.css( "picture.advert-picture img::attr(data-src)").get().strip() price = selector.css("p.offer-price span::text").get().strip() item = Item() item["site"] = self.site item["source_id"] = source_id item["url"] = url item["title"] = title item["price"] = price item["image"] = image return item
def parse_item(response): item = Item() item['name'] = response.xpath( '//div[@class="prod-spec-title"]/h1/text()').extract() item['brand'] = response.xpath( '//div[@class="prod-spec-title"]/h2/a/text()').extract() item['description'] = response.xpath( '//p[@id="product_description"]/text()').extract() item['price'] = response.xpath( '//div[@class="prod-spec-title"]/p/text()').extract() item['url'] = response.url item['original_price'] = response.xpath( '//div[@class="prod-spec-title"]/p/span/text()').extract() if len(item['original_price']) == 0: item['original_price'] = item['price'] images = [ response.xpath( '//div[@class="detail-photo left"]/div[@class="big-photo left"]/a/img/@src' ).extract() ] item['image_urls'] = images[0] + response.xpath( '//div[@class="detail-photo left"]/div[@class="small-photo left"]/ul/li/a/img/@src' ).extract() item['source'] = 'berrybenka' string_size_xpath = '//div[@class="filter-size filter-content"]/ul/li/div/label/text()' size_xpath = response.xpath(string_size_xpath).extract() #item['sizes'] = parse_sizes() item['sizes'] = size_xpath return item
def parse_item(response): item = Item() item['name'] = response.xpath( '//div[@class="product__title fsm"]/text()').extract() item['brand'] = response.xpath( '//div[@class="js-prd-brand product__brand"]/a/text()').extract() item['description'] = response.xpath( '//div[@class="product__title fsm"]/text()').extract() item['url'] = response.url item['original_price'] = response.xpath( '//span[@id="js-price"]/text()').extract() item['price'] = response.xpath( '//span[@class="js-detail_updateSku_lowestPrice"]/text()').extract() if len(item['price']) == 0: # no discount item['price'] = item['original_price'] image_urls = response.xpath( '//ul[@class="prd-moreImagesList ui-listItemBorder ui-listLight swiper-wrapper"]/li/a/img/@src' ).extract() #item['image_urls'] = image_urls item['image_urls'] = parse_images_urls(image_urls) item['source'] = 'zalora' string_size_xpath = '//option[(contains(@data-attribute,"size")) and not(contains(@disabled, "disabled"))]' string_size_xpath += '/text()' size_xpath = response.xpath(string_size_xpath).extract() #item['sizes'] = parse_sizes() item['sizes'] = size_xpath return item
def parse_ad(self, response): content = response.css("div.uk-container.body") title = content.css("div.table-cell-left > h1::text").get().strip() image = content.css("ul#image-gallery li img::attr(src)").get().strip() # old way # price = content.css('div.price-item-discount::text').extract() # if price: # price = next((p.strip() for p in price if p.strip())) # else: # price = content.css('div.price-item::text').get().strip() # new way price = content.css("span.priceClassified::text").get().strip() item = Item() item["site"] = self.site item["source_id"] = response.meta["ad_id"] item["url"] = response.url item["title"] = title item["price"] = price item["image"] = image return item