def __parse_products(self, response): json_response = json.loads(response.body_as_unicode()) if self.shuffle_products: random.shuffle(json_response['products']) added_products = 0 for product in json_response['products']: if len(product['images']) == 0 or not any(v['available'] for v in product['variants']): continue if self.product_limit != -1 and added_products >= self.product_limit: break loader = ProductItemLoader() loader.add_value('name', product['title']) loader.add_value('url', 'http://%s/products/%s' % (self.shopify_domain, product['handle'])) loader.add_value('image_url', product['images'][0]['src']) loader.add_value('artist_name', product['vendor']) loader.add_value('prices', '$' + product['variants'][0]['price']) loader.add_value('tags', product['tags']) loader = self.before_save(loader, product) if self.validate(loader, product): yield loader.load_item() added_products += 1
def parse(self, response): for product_url in response.xpath('//a[contains(@href, "productos") and @class="btn btn-ficha"]/@href').extract(): loader = ProductItemLoader(response=response) loader.add_xpath('expires_at', '//input[@id="segundos_restantes"]/@value') request = scrapy.Request(product_url, callback=self.__parse_product_page) request.meta['item'] = loader.load_item() yield request
def __parse_product_page(self, response): if '/product/' in response.url: product_id = response.xpath( '//a[contains(@class, "buy-button")]/@data-id').extract_first( ) artist = response.xpath('//span[@class="author"]/a') currency_sel = response.xpath('//p[@class="product-price"]') currencies = [ self.build_currency_string(currency_sel, c) for c in ['usd', 'gbp', 'eur'] ] loader = ProductItemLoader(response=response) loader.add_xpath('name', '//span[@class="name"]/text()') loader.add_value('url', response.url) loader.add_value('image_url', self.build_image_url(product_id)) loader.add_value('artist_name', artist.xpath('text()').extract()) loader.add_value('prices', ' / '.join(currencies)) loader.add_value( 'last_chance', response.url == 'https://www.qwertee.com/last-chance') request = scrapy.Request(urlparse.urljoin( response.url, artist.xpath('@href').extract_first()), callback=self.parse_artist_page, dont_filter=True) request.meta['item'] = loader.load_item() return request
def parse(self, response): expiry = response.xpath( '//div[@class="index-countdown"]/@data-time').extract_first() for index, sel in enumerate(response.css('.big-slide.tee > div')): product_id = sel.xpath('@data-id').extract_first() artist_id = sel.xpath('@data-user-id').extract_first() currencies = [ self.build_currency_string(sel, c) for c in ['usd', 'gbp', 'eur'] ] loader = ProductItemLoader(response=response, selector=sel) loader.add_xpath('name', '@data-name') loader.add_value('url', response.url) loader.add_value('image_url', self.build_image_url(product_id)) loader.add_xpath('artist_name', '@data-user') loader.add_value('prices', ' / '.join(currencies)) loader.add_value('last_chance', index > 2) loader.add_value('expires_at', expiry) request = scrapy.Request(urlparse.urljoin(response.url, '/profile/' + artist_id), callback=self.parse_artist_page, dont_filter=True) request.meta['item'] = loader.load_item() yield request
def parse_artist_page(self, response): item = response.meta['item'] loader = ProductItemLoader(item, response=response) loader.add_xpath( 'artist_urls', '//div[contains(@class, "category-userprofile-info")]//a/@href') return loader.load_item()
def parse(self, response): loader = ProductItemLoader(response=response) loader.add_xpath('name', '//span[@class="product-name"]/text()') loader.add_value('url', response.url) loader.add_xpath('image_url', '//div[@class="showcase-image"]/img/@src') loader.add_xpath('artist_name', '//span[@class="product-artist"]/a/text()') loader.add_value('prices', '$10') return loader.load_item()
def __parse_product_page(self, response): item = response.meta['item'] loader = ProductItemLoader(item, response=response) loader.add_xpath('name', '//div[@id="datos_autor"]/h2/text()') loader.add_value('url', response.url) loader.add_xpath('artist_name', '//div[@id="datos_autor"]/a/text()') loader.add_xpath('image_url', '//div[@id="contenedor_fotos"]/div[2]/img/@data-src') loader.add_xpath('prices', '//div[@class="precio-xt-euros"]//text()', Join()) return loader.load_item()
def __parse_product_page(self, response): loader = ProductItemLoader(response=response) loader.add_xpath('name', '//h1/text()') loader.add_value('url', response.url) loader.add_value('artist_name', 'teeminus24') loader.add_value('artist_urls', ['http://teeminus24.com']) loader.add_xpath('prices', '//span[@id="listPrice"]/text()') loader.add_value('image_url', response.body, re=r'"fullsize":{"url":"(.*?)"') return loader.load_item()
def __parse_product_page(self, response): loader = ProductItemLoader(response=response) loader.add_xpath('name', '//h1/text()') loader.add_value('url', response.url) loader.add_xpath('artist_name', '//span[text()="Artist:"]/following::a[1]/text()') loader.add_xpath('artist_urls', '//span[text()="Artist:"]/following::a[1]/@href') loader.add_xpath('image_url', '//meta[@property="og:image"]/@content') loader.add_xpath('prices', '//span[@class="price-new"]/text()') return loader.load_item()
def __parse_artist_page(self, response): item = response.meta['item'] loader = ProductItemLoader(item, response=response) loader.add_xpath('artist_name', '//h3/text()') loader.add_xpath( 'artist_urls', '//div[@class="col-sm-4"]//a[contains(@class, "socialIcon")]/@href' ) return loader.load_item()
def parse(self, response): loader = ProductItemLoader(response=response) loader.add_xpath('name', '//div[@id="shirt-name"]/h1/text()') loader.add_value('url', response.url) loader.add_xpath('image_url', '//div[@id="design-large"]/a/@href') loader.add_value('artist_name', 'Tee Gravy') loader.add_value('artist_urls', ['http://www.teegravy.com']) loader.add_xpath('prices', '//p[@class="shirt-dollars"]/text()') return loader.load_item()
def parse_image(self, response): item = response.meta['item'] loader = ProductItemLoader(item, response=response) regex = re.compile(r'"full":{"url":"(http.*?artwork\\/~artwork.*?)"') url = re.search(regex, response.body_as_unicode()).group(1).replace('\\', '') loader.add_value('image_url', url) return loader.load_item()
def parse_product_page(self, response): loader = ProductItemLoader(response=response) loader.add_xpath('name', '//h1/text()') loader.add_value('url', response.url) loader.add_xpath('image_url', '//img[contains(@src, "detail")]/@src') loader.add_xpath('image_url', '//img[contains(@src, "1024x1024")]/@src') loader.add_xpath('artist_name', '//a[contains(@href, "/pages/member/")]/text()') loader.add_xpath('prices', '(//span[@class="price"])[1]/text()') return loader.load_item()
def __parse_product_page(self, response): loader = ProductItemLoader(response=response) artist_re = re.compile('by\s+(.+)', re.IGNORECASE) artist = response.xpath('//h1[@class="pos-title"]/text()').extract() loader.add_xpath('name', '//meta[@itemprop="itemreviewed"]/@content') loader.add_value('url', ['http://www.teevolt.com']) loader.add_xpath('image_url', '//img[contains(@src,"com_zoo")][1]/@src') loader.add_value('artist_name', ''.join(artist), re=artist_re) loader.add_xpath('artist_urls', '//div[@class="pos-description"]//a/@href') loader.add_value('prices', '7.50 GBP / 9 EUR / 12 USD') return loader.load_item()
def __parse_product_page(self, response): loader = ProductItemLoader(response=response) loader.add_xpath('name', '//span[@class="productname"]/text()') loader.add_value('url', response.url) loader.add_xpath('image_url', '//meta[@property="og:image"]/@content') loader.add_value( 'artist_name', response.xpath('//meta[@property="og:title"]/@content'). extract_first().rsplit(' by ', 1)[1]) loader.add_xpath('prices', '(//div[@class="productdt"]//span/text())[last()]') return loader.load_item()
def parse_product_page(self, response): loader = ProductItemLoader(response=response) loader.add_xpath('name', '//section[@class="product-detail-info"]/h2/text()') loader.add_value('url', response.url) loader.add_xpath('artist_name', '//div[@class="product-artist"]/a/text()') loader.add_xpath('image_url', '//meta[@property="og:image"]/@content') loader.add_xpath('prices', '//meta[@property="og:price:amount"]/@content') artist_url = urlparse.urljoin(response.url, response.xpath('//div[@class="product-artist"]/a/@href').extract_first()) request = scrapy.Request(artist_url, callback=self.parse_artist_page, dont_filter=True) request.meta['item'] = loader.load_item() yield request
def parse_product_page(self, response): image_url = response.xpath( '//meta[@property="og:image"]/@content').extract_first() loader = ProductItemLoader(response=response) loader.add_xpath('name', '//h1/text()') loader.add_value('url', response.url) loader.add_value('image_url', image_url.replace('400x400', '750x750')) loader.add_xpath('fabric_colors', '//img[contains(@src, "swatches")]/@alt') loader.add_value('artist_name', '6 Dollar Shirts') loader.add_value('artist_urls', ['http://6dollarshirts.com/']) loader.add_value('prices', '$6') return loader.load_item()
def __parse_product_page(self, response): loader = ProductItemLoader(response=response) loader.add_xpath('name', '//meta[@property="og:title"]/@content') loader.add_value('url', response.url) loader.add_value('artist_name', 'Inkogneeto') loader.add_value('artist_urls', ['http://www.inkogneeto.com']) loader.add_xpath( 'image_url', '//div[@id="productThumbnails"]//img[contains(@data-src, "SMALL")]/@data-src' ) loader.add_xpath('prices', '//meta[@property="product:price:amount"]/@content') return loader.load_item()
def parse(self, response): loader = ProductItemLoader(response=response) loader.add_xpath( 'name', '//div[contains(@class, "wpb_single_image")]/h2/text()') loader.add_xpath( 'url', '//div[contains(@class, "wpb_single_image")]//a/@href') loader.add_xpath( 'image_url', '//div[contains(@class, "wpb_single_image")]//img/@src') loader.add_value('artist_name', 'Anglotees') loader.add_value('artist_urls', ['http://anglotees.com/']) loader.add_value('prices', '$22.99') return loader.load_item()
def __parse_product_page(self, response): loader = ProductItemLoader(response=response) loader.add_xpath('name', '//div[@class="name"]/text()') loader.add_value('url', response.url) loader.add_xpath('image_url', '//section[@class="product-info"]//img[1]/@ref') loader.add_xpath('artist_name', '//a[contains(@href, "/artists/")]/text()') loader.add_xpath('prices', '//span[@id="price"]/text()') artist_url = urlparse.urljoin(response.url, response.xpath( '//a[contains(@href, "/artists/")]//@href').extract_first()) request = scrapy.Request(artist_url, callback=self.__parse_artist_page, dont_filter=True) request.meta['item'] = loader.load_item() yield request
def parse(self, response): for product_link in response.xpath( '//div[contains(@class, "wc-shortcodes-column")]/p/a[contains(@href, "limited-edition-hip-hop-t-shirts")]' ): loader = ProductItemLoader(response=response) loader.add_value( 'image_url', product_link.xpath('.//img[@srcset]/@src').extract_first()) request = scrapy.Request( product_link.xpath('@href').extract_first(), callback=self.__parse_product_page) request.meta['item'] = loader.load_item() yield request
def parse_product_page(self, response): loader = ProductItemLoader(response=response) loader.add_xpath('name', '//h1/text()') loader.add_xpath('description', '//div[@id="description"]/text()') loader.add_value('url', response.url) loader.add_xpath('image_url', '//div[@class="gallery-image"]/img[contains(@src, ".gif")]/@src') loader.add_xpath('artist_name', '//div[@id="description"]/text()', re=r'Design by (.+?)\.?\s*$') loader.add_value('artist_name', 'BustedTees') loader.add_xpath('prices', '//span[@id="sale_price"]/text()') loader.add_xpath('prices', '//span[@class="price"]/text()') tags = response.xpath('//meta[@name="keywords"]/@content').extract_first() loader.add_value('tags', tags.split(',')) yield loader.load_item()
def __parse_product_page(self, response): item = response.meta['item'] loader = ProductItemLoader(item, response=response) loader.add_xpath('name', '//meta[@property="og:title"]/@content', re=r'(.*?) By') loader.add_value('url', response.url) loader.add_xpath('artist_name', '//meta[@property="og:title"]/@content', re=r'.*By\s(.+)') loader.add_xpath('artist_urls', '//div[@class="mini_description"]//a/@href') loader.add_value('prices', '$25') return loader.load_item()
def __parse_product_page(self, response): loader = ProductItemLoader(response=response) loader.add_xpath('name', '//div[@class="t-shirt-design-name"]/text()') loader.add_value('url', response.url) loader.add_xpath( 'image_url', '//img[contains(@data-opaque, "user_design")]/@data-opaque') loader.add_xpath('artist_name', '//div[@class="t-shirt-designer-name"]/text()', re='Artist : (.+)') loader.add_xpath('prices', '//div[@class="addtocart-tab"]/span/text()') loader.add_xpath('fabric_colors', '//div[@class="mens-color-box normalTip"]/@data-hex') return loader.load_item()
def parse(self, response): for sel in response.xpath('//input[@name="DesignID"]/@value'): product_url = 'http://www.teetournament.com/product-detail.cfm?designid=' + sel.extract( ) loader = ProductItemLoader(response=response) loader.add_xpath('prices', '//div[@class="price priceLeft"]/text()') loader.add_xpath('expires_at', 'string(//script/text())', re=r'until: (\d+)') request = scrapy.Request(product_url, callback=self.__parse_product_page) request.meta['item'] = loader.load_item() yield request
def parse_product_page(self, response): loader = ProductItemLoader(response=response) default_color = response.xpath('//span[@class="color-name"]/text()').extract_first().strip() artist_url = response.xpath('//a[contains(@href, "/user/")]/@href').extract_first() loader.add_xpath('name', '//meta[@property="og:title"]/@content', re=r'(.*?) by ') loader.add_xpath('description', '//div[contains(@class, "product-description")]/p/text()') loader.add_xpath('fabric_colors', '//div[@class="design-show-color"]/label/@for') loader.add_value('url', response.url) loader.add_xpath( 'image_url', '//span[contains(@class, "color-box") and @title="%s"]/@data-preview' % default_color) loader.add_xpath('prices', '//meta[@property="og:price:amount"]/@content') loader.add_xpath('artist_name', '//meta[@property="og:title"]/@content', re=r' by (.*)$') loader.add_value('artist_urls', 'https://www.teepublic.com' + artist_url) loader.add_xpath('tags', '//div[contains(@class, "tags")]/a/text()') return loader.load_item()
def parse(self, response): loader = ProductItemLoader(response=response) loader.add_xpath('name', '//div[@id="app_info"]/strong/text()') loader.add_value('url', response.url) loader.add_xpath('image_url', '//img[contains(@src, "zoom")]/@src') loader.add_xpath('artist_name', '//div[@id="profile-box"]//strong/text()') loader.add_xpath('artist_urls', '//a[@class="followBox"]/@href') loader.add_value('prices', '8.99 EUR') loader.add_value( 'last_chance', response.url == 'https://teebusters.com/index/lastchance.html') loader.add_xpath('expires_at', 'string(//script/text())', re=r'until: (\d+)') return loader.load_item()
def parse_product_page(self, response): artist_url = response.xpath( '//a[contains(@href, "/artist/")]/@href').extract_first() loader = ProductItemLoader(response=response) loader.add_xpath('name', '//meta[@itemprop="name"]/@content') loader.add_xpath('description', '//meta[@property="og:description"]/@content') loader.add_xpath('fabric_colors', '//select[contains(@class, "color-tshirt")]/option/@value') loader.add_value('url', response.url) loader.add_xpath('image_url', '//img[@itemprop="image"]/@src') loader.add_xpath('prices', '//spam[@id="price"]/text()') loader.add_xpath('prices', '//meta[@itemprop="price"]/@content') loader.add_xpath('tags', '//a[contains(@href, "/tag/")]/text()') artist_url = response.xpath('//a[contains(@href, "/artist/")]//@href').extract_first() request = Request(urlparse.urljoin(response.url, artist_url), callback=self.__parse_artist_page, dont_filter=True) request.meta['item'] = loader.load_item() yield request
def parse_product_page(self, response): print_url = response.xpath('//a[text()="Art Print"]/@href').extract_first() username = response.xpath('//div[@class="user-avatar"]/a/@href').extract_first() loader = ProductItemLoader(response=response) loader.add_xpath('name', '//h1[@data-dmc="prod-name"]/text()') loader.add_xpath('description', '//p[@id="about-the-art-description"]/text()') loader.add_value('url', response.url) loader.add_xpath('prices', '//meta[@itemprop="price"]/@content') loader.add_xpath('artist_name', '//h1[@data-dmc="prod-name"]/following::a/text()') loader.add_value('artist_urls', 'https://society6.com' + username) loader.add_xpath('tags', '//p[@class="tag-container"]/a/text()') loader.add_value('active', response.xpath( '//meta[@property="og:availability"]/@content').extract_first() == 'instock') request = Request(urlparse.urljoin(response.url, print_url), callback=self.parse_image) request.meta['item'] = loader.load_item() return request
def parse_product_page(self, response): loader = ProductItemLoader(response=response) style = response.xpath( '//label[@class="color-label selected"]//span/@style' ).extract_first() artist_name = response.xpath( '//span[@class="name"]/text()').extract_first() loader.add_xpath('name', '//h1[@id="product-title"]/text()') loader.add_value('url', response.url) loader.add_xpath('image_url', '//meta[@property="og:image"]/@content') loader.add_xpath('prices', '//span[@id="product_price"]/text()') loader.add_value('artist_name', artist_name) loader.add_value('artist_urls', 'https://www.designbyhumans.com/shop/' + artist_name) return loader.load_item()