def parse_artist_page(self, response): item = response.meta['item'] loader = ProductItemLoader(item, response=response) loader.add_xpath( 'artist_urls', '//div[contains(@class, "category-userprofile-info")]//a/@href') return loader.load_item()
def parse(self, response): expiry = response.xpath( '//div[@class="index-countdown"]/@data-time').extract_first() for index, sel in enumerate(response.css('.big-slide.tee > div')): product_id = sel.xpath('@data-id').extract_first() artist_id = sel.xpath('@data-user-id').extract_first() currencies = [ self.build_currency_string(sel, c) for c in ['usd', 'gbp', 'eur'] ] loader = ProductItemLoader(response=response, selector=sel) loader.add_xpath('name', '@data-name') loader.add_value('url', response.url) loader.add_value('image_url', self.build_image_url(product_id)) loader.add_xpath('artist_name', '@data-user') loader.add_value('prices', ' / '.join(currencies)) loader.add_value('last_chance', index > 2) loader.add_value('expires_at', expiry) request = scrapy.Request(urlparse.urljoin(response.url, '/profile/' + artist_id), callback=self.parse_artist_page, dont_filter=True) request.meta['item'] = loader.load_item() yield request
def parse(self, response): for product_url in response.xpath('//a[contains(@href, "productos") and @class="btn btn-ficha"]/@href').extract(): loader = ProductItemLoader(response=response) loader.add_xpath('expires_at', '//input[@id="segundos_restantes"]/@value') request = scrapy.Request(product_url, callback=self.__parse_product_page) request.meta['item'] = loader.load_item() yield request
def __parse_product_page(self, response): if '/product/' in response.url: product_id = response.xpath( '//a[contains(@class, "buy-button")]/@data-id').extract_first( ) artist = response.xpath('//span[@class="author"]/a') currency_sel = response.xpath('//p[@class="product-price"]') currencies = [ self.build_currency_string(currency_sel, c) for c in ['usd', 'gbp', 'eur'] ] loader = ProductItemLoader(response=response) loader.add_xpath('name', '//span[@class="name"]/text()') loader.add_value('url', response.url) loader.add_value('image_url', self.build_image_url(product_id)) loader.add_value('artist_name', artist.xpath('text()').extract()) loader.add_value('prices', ' / '.join(currencies)) loader.add_value( 'last_chance', response.url == 'https://www.qwertee.com/last-chance') request = scrapy.Request(urlparse.urljoin( response.url, artist.xpath('@href').extract_first()), callback=self.parse_artist_page, dont_filter=True) request.meta['item'] = loader.load_item() return request
def __parse_artist_page(self, response): item = response.meta['item'] loader = ProductItemLoader(item, response=response) loader.add_xpath('artist_name', '//h3/text()') loader.add_xpath( 'artist_urls', '//div[@class="col-sm-4"]//a[contains(@class, "socialIcon")]/@href' ) return loader.load_item()
def __parse_product_page(self, response): loader = ProductItemLoader(response=response) loader.add_xpath('name', '//h1/text()') loader.add_value('url', response.url) loader.add_xpath('artist_name', '//span[text()="Artist:"]/following::a[1]/text()') loader.add_xpath('artist_urls', '//span[text()="Artist:"]/following::a[1]/@href') loader.add_xpath('image_url', '//meta[@property="og:image"]/@content') loader.add_xpath('prices', '//span[@class="price-new"]/text()') return loader.load_item()
def __parse_product_page(self, response): loader = ProductItemLoader(response=response) loader.add_xpath('name', '//h1/text()') loader.add_value('url', response.url) loader.add_value('artist_name', 'teeminus24') loader.add_value('artist_urls', ['http://teeminus24.com']) loader.add_xpath('prices', '//span[@id="listPrice"]/text()') loader.add_value('image_url', response.body, re=r'"fullsize":{"url":"(.*?)"') return loader.load_item()
def parse_product_page(self, response): loader = ProductItemLoader(response=response) loader.add_xpath('name', '//h1/text()') loader.add_value('url', response.url) loader.add_xpath('image_url', '//img[contains(@src, "detail")]/@src') loader.add_xpath('image_url', '//img[contains(@src, "1024x1024")]/@src') loader.add_xpath('artist_name', '//a[contains(@href, "/pages/member/")]/text()') loader.add_xpath('prices', '(//span[@class="price"])[1]/text()') return loader.load_item()
def parse_product_page(self, response): image_url = response.xpath( '//meta[@property="og:image"]/@content').extract_first() loader = ProductItemLoader(response=response) loader.add_xpath('name', '//h1/text()') loader.add_value('url', response.url) loader.add_value('image_url', image_url.replace('400x400', '750x750')) loader.add_xpath('fabric_colors', '//img[contains(@src, "swatches")]/@alt') loader.add_value('artist_name', '6 Dollar Shirts') loader.add_value('artist_urls', ['http://6dollarshirts.com/']) loader.add_value('prices', '$6') return loader.load_item()
def __parse_product_page(self, response): loader = ProductItemLoader(response=response) loader.add_xpath('name', '//div[@class="t-shirt-design-name"]/text()') loader.add_value('url', response.url) loader.add_xpath( 'image_url', '//img[contains(@data-opaque, "user_design")]/@data-opaque') loader.add_xpath('artist_name', '//div[@class="t-shirt-designer-name"]/text()', re='Artist : (.+)') loader.add_xpath('prices', '//div[@class="addtocart-tab"]/span/text()') loader.add_xpath('fabric_colors', '//div[@class="mens-color-box normalTip"]/@data-hex') return loader.load_item()
def parse(self, response): for sel in response.xpath('//input[@name="DesignID"]/@value'): product_url = 'http://www.teetournament.com/product-detail.cfm?designid=' + sel.extract( ) loader = ProductItemLoader(response=response) loader.add_xpath('prices', '//div[@class="price priceLeft"]/text()') loader.add_xpath('expires_at', 'string(//script/text())', re=r'until: (\d+)') request = scrapy.Request(product_url, callback=self.__parse_product_page) request.meta['item'] = loader.load_item() yield request
def __parse_product_page(self, response): item = response.meta['item'] loader = ProductItemLoader(item, response=response) loader.add_xpath('name', '//div[@id="datos_autor"]/h2/text()') loader.add_value('url', response.url) loader.add_xpath('artist_name', '//div[@id="datos_autor"]/a/text()') loader.add_xpath('image_url', '//div[@id="contenedor_fotos"]/div[2]/img/@data-src') loader.add_xpath('prices', '//div[@class="precio-xt-euros"]//text()', Join()) return loader.load_item()
def parse(self, response): loader = ProductItemLoader(response=response) loader.add_xpath('name', '//div[@id="app_info"]/strong/text()') loader.add_value('url', response.url) loader.add_xpath('image_url', '//img[contains(@src, "zoom")]/@src') loader.add_xpath('artist_name', '//div[@id="profile-box"]//strong/text()') loader.add_xpath('artist_urls', '//a[@class="followBox"]/@href') loader.add_value('prices', '8.99 EUR') loader.add_value( 'last_chance', response.url == 'https://teebusters.com/index/lastchance.html') loader.add_xpath('expires_at', 'string(//script/text())', re=r'until: (\d+)') return loader.load_item()
def parse_product_page(self, response): print_url = response.xpath('//a[text()="Art Print"]/@href').extract_first() username = response.xpath('//div[@class="user-avatar"]/a/@href').extract_first() loader = ProductItemLoader(response=response) loader.add_xpath('name', '//h1[@data-dmc="prod-name"]/text()') loader.add_xpath('description', '//p[@id="about-the-art-description"]/text()') loader.add_value('url', response.url) loader.add_xpath('prices', '//meta[@itemprop="price"]/@content') loader.add_xpath('artist_name', '//h1[@data-dmc="prod-name"]/following::a/text()') loader.add_value('artist_urls', 'https://society6.com' + username) loader.add_xpath('tags', '//p[@class="tag-container"]/a/text()') loader.add_value('active', response.xpath( '//meta[@property="og:availability"]/@content').extract_first() == 'instock') request = Request(urlparse.urljoin(response.url, print_url), callback=self.parse_image) request.meta['item'] = loader.load_item() return request
def parse_product_page(self, response): loader = ProductItemLoader(response=response) loader.add_xpath('name', '//section[@class="product-detail-info"]/h2/text()') loader.add_value('url', response.url) loader.add_xpath('artist_name', '//div[@class="product-artist"]/a/text()') loader.add_xpath('image_url', '//meta[@property="og:image"]/@content') loader.add_xpath('prices', '//meta[@property="og:price:amount"]/@content') artist_url = urlparse.urljoin(response.url, response.xpath('//div[@class="product-artist"]/a/@href').extract_first()) request = scrapy.Request(artist_url, callback=self.parse_artist_page, dont_filter=True) request.meta['item'] = loader.load_item() yield request
def parse_product_page(self, response): print response.url loader = ProductItemLoader(response=response) loader.add_xpath('name', '//span[@itemprop="name"]/text()', re=r'(.+) by') loader.add_value('url', response.url) loader.add_xpath('artist_name', '//aside[@class="artist-info"]//h2/text()') loader.add_xpath('artist_name', '//span[@itemprop="name"]/text()', re=r'\sby\s(.+)') loader.add_xpath('image_url', '(//div[@data-zoom-url]/@data-zoom-url)[last()]') loader.add_xpath( 'prices', '//div[@itemprop="priceSpecification"]/span[@itemprop="minPrice" or @itemprop="price"]/text()' ) return loader.load_item()
def __parse_product_page(self, response): loader = ProductItemLoader(response=response) loader.add_xpath('name', '//div[@class="name"]/text()') loader.add_value('url', response.url) loader.add_xpath('image_url', '//section[@class="product-info"]//img[1]/@ref') loader.add_xpath('artist_name', '//a[contains(@href, "/artists/")]/text()') loader.add_xpath('prices', '//span[@id="price"]/text()') artist_url = urlparse.urljoin(response.url, response.xpath( '//a[contains(@href, "/artists/")]//@href').extract_first()) request = scrapy.Request(artist_url, callback=self.__parse_artist_page, dont_filter=True) request.meta['item'] = loader.load_item() yield request
def parse_product_page(self, response): loader = ProductItemLoader(response=response) loader.add_xpath( 'name', '//div[@class="product_identity clearfix"]/h1/text()') loader.add_value('url', response.url) loader.add_xpath( 'image_url', '//a[@class="design" and contains(@href, "design")]/@href') loader.add_xpath('artist_name', '//p[@class="tip"]/@data-title') loader.add_xpath('prices', '//div[@data-glname="Mens Tee"]/@data-glprice') loader.add_xpath('prices', '//div/@data-glprice') artist_url = response.xpath( '//p[@class="tip"]/a/@href').extract_first() request = scrapy.Request(urlparse.urljoin(response.url, artist_url), callback=self.parse_artist_page, dont_filter=True) request.meta['item'] = loader.load_item() yield request
def parse_product_page(self, response, deal=False): loader = ProductItemLoader(response=response) loader.add_xpath('name', '//input[@name="txtSaleDesignTitle"]/@value') loader.add_value('url', response.url) loader.add_value( 'image_url', urlparse.urljoin( response.url, response.xpath( '//div[@class="tee-des"]/a/@href').extract_first())) loader.add_xpath('artist_name', '//span[@id="lblArtistname"]/text()') loader.add_xpath('artist_urls', '//div[@class="social-icon"]/a/@href') loader.add_xpath('prices', '//span[@class="tagfullprice"]//text()', Join(separator=u'')) loader.add_value('fabric_colors', self.__parse_fabric_colors(response)) if deal: loader.add_xpath('expires_at', '//span[@id="lbldatetime"]/text()') return loader.load_item()
def __parse_product_page(self, response): item = response.meta['item'] loader = ProductItemLoader(item, response=response) loader.add_xpath('name', '//meta[@property="og:title"]/@content') loader.add_value('url', response.url) loader.add_value( 'image_url', urlparse.urljoin( response.url, response.xpath('//a[@data-title]/@href').extract_first())) artist_url = urlparse.urljoin( response.url, response.xpath( '//a[contains(@href, "artist.cfm")]/@href').extract_first()) request = scrapy.Request(artist_url, callback=self.__parse_artist_page, dont_filter=True) request.meta['item'] = loader.load_item() yield request
def parse(self, response): loader = ProductItemLoader(response=response) loader.add_xpath('name', '//span[@class="product-name"]/text()') loader.add_value('url', response.url) loader.add_xpath('image_url', '//div[@class="showcase-image"]/img/@src') loader.add_xpath('artist_name', '//span[@class="product-artist"]/a/text()') loader.add_value('prices', '$10') return loader.load_item()
def parse(self, response): loader = ProductItemLoader(response=response) loader.add_xpath('name', '//div[@id="shirt-name"]/h1/text()') loader.add_value('url', response.url) loader.add_xpath('image_url', '//div[@id="design-large"]/a/@href') loader.add_value('artist_name', 'Tee Gravy') loader.add_value('artist_urls', ['http://www.teegravy.com']) loader.add_xpath('prices', '//p[@class="shirt-dollars"]/text()') return loader.load_item()
def __parse_product_page(self, response): loader = ProductItemLoader(response=response) artist_re = re.compile('by\s+(.+)', re.IGNORECASE) artist = response.xpath('//h1[@class="pos-title"]/text()').extract() loader.add_xpath('name', '//meta[@itemprop="itemreviewed"]/@content') loader.add_value('url', ['http://www.teevolt.com']) loader.add_xpath('image_url', '//img[contains(@src,"com_zoo")][1]/@src') loader.add_value('artist_name', ''.join(artist), re=artist_re) loader.add_xpath('artist_urls', '//div[@class="pos-description"]//a/@href') loader.add_value('prices', '7.50 GBP / 9 EUR / 12 USD') return loader.load_item()
def __parse_product_page(self, response): loader = ProductItemLoader(response=response) loader.add_xpath('name', '//span[@class="productname"]/text()') loader.add_value('url', response.url) loader.add_xpath('image_url', '//meta[@property="og:image"]/@content') loader.add_value( 'artist_name', response.xpath('//meta[@property="og:title"]/@content'). extract_first().rsplit(' by ', 1)[1]) loader.add_xpath('prices', '(//div[@class="productdt"]//span/text())[last()]') return loader.load_item()
def parse(self, response): loader = ProductItemLoader(response=response) loader.add_xpath( 'name', '//div[contains(@class, "wpb_single_image")]/h2/text()') loader.add_xpath( 'url', '//div[contains(@class, "wpb_single_image")]//a/@href') loader.add_xpath( 'image_url', '//div[contains(@class, "wpb_single_image")]//img/@src') loader.add_value('artist_name', 'Anglotees') loader.add_value('artist_urls', ['http://anglotees.com/']) loader.add_value('prices', '$22.99') return loader.load_item()
def __parse_product_page(self, response): loader = ProductItemLoader(response=response) loader.add_xpath('name', '//meta[@property="og:title"]/@content') loader.add_value('url', response.url) loader.add_value('artist_name', 'Inkogneeto') loader.add_value('artist_urls', ['http://www.inkogneeto.com']) loader.add_xpath( 'image_url', '//div[@id="productThumbnails"]//img[contains(@data-src, "SMALL")]/@data-src' ) loader.add_xpath('prices', '//meta[@property="product:price:amount"]/@content') return loader.load_item()
def __parse_product_page(self, response): item = response.meta['item'] loader = ProductItemLoader(item, response=response) loader.add_xpath('name', '//meta[@property="og:title"]/@content', re=r'(.*?) By') loader.add_value('url', response.url) loader.add_xpath('artist_name', '//meta[@property="og:title"]/@content', re=r'.*By\s(.+)') loader.add_xpath('artist_urls', '//div[@class="mini_description"]//a/@href') loader.add_value('prices', '$25') return loader.load_item()
def parse_product_page(self, response): loader = ProductItemLoader(response=response) loader.add_xpath('name', '//meta[@property="og:title"]/@content') loader.add_value('url', response.url) loader.add_xpath( 'image_url', '//div[contains(@class,"artwork-slide")]/@data-full-image') loader.add_xpath('artist_name', '//h2[@class="artist-name"]//span/text()') loader.add_xpath('prices', '(//span[@itemprop="price"])[1]/text()') loader.add_value('prices', response.body, re=r'ecomm_totalvalue: (\d+)') loader.add_value('last_chance', loader.get_output_value('prices') == '14') artist_url = response.xpath( '//h2[@class="artist-name"]//a/@href').extract_first() request = scrapy.Request(artist_url, callback=self.parse_artist_page, dont_filter=True) request.meta['item'] = loader.load_item() yield request
def parse_product_page(self, response): loader = ProductItemLoader(response=response) style = response.xpath( '//label[@class="color-label selected"]//span/@style' ).extract_first() artist_name = response.xpath( '//span[@class="name"]/text()').extract_first() loader.add_xpath('name', '//h1[@id="product-title"]/text()') loader.add_value('url', response.url) loader.add_xpath('image_url', '//meta[@property="og:image"]/@content') loader.add_xpath('prices', '//span[@id="product_price"]/text()') loader.add_value('artist_name', artist_name) loader.add_value('artist_urls', 'https://www.designbyhumans.com/shop/' + artist_name) return loader.load_item()
def __parse_product_page(self, response): loader = ProductItemLoader(response=response) loader.add_xpath('name', '//h2[@class="work-information_title"]/text()') loader.add_xpath('description', '//meta[@name="product:price:amount"]/@content') loader.add_value('url', response.url) loader.add_value('image_url', response.body, re=r'https:\/\/[\w|\.]+\/[\w|.]+\/raf.*?\.jpg') loader.add_xpath('prices', '//meta[@property="product:price:amount"]/@content') loader.add_xpath('artist_name', '//a[@rel="author"]/text()') loader.add_xpath('artist_urls', '//a[@rel="author"]/@href') loader.add_xpath('tags', '//p[@class="description_tag-list"]/a/text()', output_processor=MapCompose(lambda x: x.split(','))) return loader.load_item()