Beispiel #1
0
 def parse_artist_page(self, response):
     item = response.meta['item']
     loader = ProductItemLoader(item, response=response)
     loader.add_xpath(
         'artist_urls',
         '//div[contains(@class, "category-userprofile-info")]//a/@href')
     return loader.load_item()
    def parse(self, response):
        expiry = response.xpath(
            '//div[@class="index-countdown"]/@data-time').extract_first()

        for index, sel in enumerate(response.css('.big-slide.tee > div')):
            product_id = sel.xpath('@data-id').extract_first()

            artist_id = sel.xpath('@data-user-id').extract_first()
            currencies = [
                self.build_currency_string(sel, c)
                for c in ['usd', 'gbp', 'eur']
            ]

            loader = ProductItemLoader(response=response, selector=sel)
            loader.add_xpath('name', '@data-name')
            loader.add_value('url', response.url)
            loader.add_value('image_url', self.build_image_url(product_id))
            loader.add_xpath('artist_name', '@data-user')
            loader.add_value('prices', ' / '.join(currencies))
            loader.add_value('last_chance', index > 2)
            loader.add_value('expires_at', expiry)

            request = scrapy.Request(urlparse.urljoin(response.url,
                                                      '/profile/' + artist_id),
                                     callback=self.parse_artist_page,
                                     dont_filter=True)
            request.meta['item'] = loader.load_item()
            yield request
 def parse(self, response):
     for product_url in response.xpath('//a[contains(@href, "productos") and @class="btn btn-ficha"]/@href').extract():
         loader = ProductItemLoader(response=response)
         loader.add_xpath('expires_at', '//input[@id="segundos_restantes"]/@value')
         request = scrapy.Request(product_url, callback=self.__parse_product_page)
         request.meta['item'] = loader.load_item()
         yield request
    def __parse_product_page(self, response):
        if '/product/' in response.url:
            product_id = response.xpath(
                '//a[contains(@class, "buy-button")]/@data-id').extract_first(
                )
            artist = response.xpath('//span[@class="author"]/a')
            currency_sel = response.xpath('//p[@class="product-price"]')
            currencies = [
                self.build_currency_string(currency_sel, c)
                for c in ['usd', 'gbp', 'eur']
            ]

            loader = ProductItemLoader(response=response)
            loader.add_xpath('name', '//span[@class="name"]/text()')
            loader.add_value('url', response.url)
            loader.add_value('image_url', self.build_image_url(product_id))
            loader.add_value('artist_name', artist.xpath('text()').extract())
            loader.add_value('prices', ' / '.join(currencies))
            loader.add_value(
                'last_chance',
                response.url == 'https://www.qwertee.com/last-chance')

            request = scrapy.Request(urlparse.urljoin(
                response.url,
                artist.xpath('@href').extract_first()),
                                     callback=self.parse_artist_page,
                                     dont_filter=True)
            request.meta['item'] = loader.load_item()
            return request
 def __parse_artist_page(self, response):
     item = response.meta['item']
     loader = ProductItemLoader(item, response=response)
     loader.add_xpath('artist_name', '//h3/text()')
     loader.add_xpath(
         'artist_urls',
         '//div[@class="col-sm-4"]//a[contains(@class, "socialIcon")]/@href'
     )
     return loader.load_item()
Beispiel #6
0
 def __parse_product_page(self, response):
     loader = ProductItemLoader(response=response)
     loader.add_xpath('name', '//h1/text()')
     loader.add_value('url', response.url)
     loader.add_xpath('artist_name', '//span[text()="Artist:"]/following::a[1]/text()')
     loader.add_xpath('artist_urls', '//span[text()="Artist:"]/following::a[1]/@href')
     loader.add_xpath('image_url', '//meta[@property="og:image"]/@content')
     loader.add_xpath('prices', '//span[@class="price-new"]/text()')
     return loader.load_item()
 def __parse_product_page(self, response):
     loader = ProductItemLoader(response=response)
     loader.add_xpath('name', '//h1/text()')
     loader.add_value('url', response.url)
     loader.add_value('artist_name', 'teeminus24')
     loader.add_value('artist_urls', ['http://teeminus24.com'])
     loader.add_xpath('prices', '//span[@id="listPrice"]/text()')
     loader.add_value('image_url', response.body, re=r'"fullsize":{"url":"(.*?)"')
     return loader.load_item()
 def parse_product_page(self, response):
     loader = ProductItemLoader(response=response)
     loader.add_xpath('name', '//h1/text()')
     loader.add_value('url', response.url)
     loader.add_xpath('image_url', '//img[contains(@src, "detail")]/@src')
     loader.add_xpath('image_url',
                      '//img[contains(@src, "1024x1024")]/@src')
     loader.add_xpath('artist_name',
                      '//a[contains(@href, "/pages/member/")]/text()')
     loader.add_xpath('prices', '(//span[@class="price"])[1]/text()')
     return loader.load_item()
 def parse_product_page(self, response):
     image_url = response.xpath(
         '//meta[@property="og:image"]/@content').extract_first()
     loader = ProductItemLoader(response=response)
     loader.add_xpath('name', '//h1/text()')
     loader.add_value('url', response.url)
     loader.add_value('image_url', image_url.replace('400x400', '750x750'))
     loader.add_xpath('fabric_colors',
                      '//img[contains(@src, "swatches")]/@alt')
     loader.add_value('artist_name', '6 Dollar Shirts')
     loader.add_value('artist_urls', ['http://6dollarshirts.com/'])
     loader.add_value('prices', '$6')
     return loader.load_item()
Beispiel #10
0
    def __parse_product_page(self, response):
        loader = ProductItemLoader(response=response)
        loader.add_xpath('name', '//div[@class="t-shirt-design-name"]/text()')
        loader.add_value('url', response.url)
        loader.add_xpath(
            'image_url',
            '//img[contains(@data-opaque, "user_design")]/@data-opaque')
        loader.add_xpath('artist_name',
                         '//div[@class="t-shirt-designer-name"]/text()',
                         re='Artist : (.+)')
        loader.add_xpath('prices', '//div[@class="addtocart-tab"]/span/text()')
        loader.add_xpath('fabric_colors',
                         '//div[@class="mens-color-box normalTip"]/@data-hex')

        return loader.load_item()
    def parse(self, response):
        for sel in response.xpath('//input[@name="DesignID"]/@value'):
            product_url = 'http://www.teetournament.com/product-detail.cfm?designid=' + sel.extract(
            )

            loader = ProductItemLoader(response=response)
            loader.add_xpath('prices',
                             '//div[@class="price priceLeft"]/text()')
            loader.add_xpath('expires_at',
                             'string(//script/text())',
                             re=r'until: (\d+)')

            request = scrapy.Request(product_url,
                                     callback=self.__parse_product_page)
            request.meta['item'] = loader.load_item()
            yield request
 def __parse_product_page(self, response):
     item = response.meta['item']
     loader = ProductItemLoader(item, response=response)
     loader.add_xpath('name', '//div[@id="datos_autor"]/h2/text()')
     loader.add_value('url', response.url)
     loader.add_xpath('artist_name', '//div[@id="datos_autor"]/a/text()')
     loader.add_xpath('image_url', '//div[@id="contenedor_fotos"]/div[2]/img/@data-src')
     loader.add_xpath('prices', '//div[@class="precio-xt-euros"]//text()', Join())
     return loader.load_item()
    def parse(self, response):
        loader = ProductItemLoader(response=response)
        loader.add_xpath('name', '//div[@id="app_info"]/strong/text()')
        loader.add_value('url', response.url)
        loader.add_xpath('image_url', '//img[contains(@src, "zoom")]/@src')
        loader.add_xpath('artist_name',
                         '//div[@id="profile-box"]//strong/text()')
        loader.add_xpath('artist_urls', '//a[@class="followBox"]/@href')
        loader.add_value('prices', '8.99 EUR')
        loader.add_value(
            'last_chance',
            response.url == 'https://teebusters.com/index/lastchance.html')
        loader.add_xpath('expires_at',
                         'string(//script/text())',
                         re=r'until: (\d+)')

        return loader.load_item()
    def parse_product_page(self, response):
        print_url = response.xpath('//a[text()="Art Print"]/@href').extract_first()
        username = response.xpath('//div[@class="user-avatar"]/a/@href').extract_first()

        loader = ProductItemLoader(response=response)
        loader.add_xpath('name', '//h1[@data-dmc="prod-name"]/text()')
        loader.add_xpath('description', '//p[@id="about-the-art-description"]/text()')
        loader.add_value('url', response.url)
        loader.add_xpath('prices', '//meta[@itemprop="price"]/@content')
        loader.add_xpath('artist_name', '//h1[@data-dmc="prod-name"]/following::a/text()')
        loader.add_value('artist_urls', 'https://society6.com' + username)
        loader.add_xpath('tags', '//p[@class="tag-container"]/a/text()')
        loader.add_value('active', response.xpath(
            '//meta[@property="og:availability"]/@content').extract_first() == 'instock')

        request = Request(urlparse.urljoin(response.url, print_url), callback=self.parse_image)
        request.meta['item'] = loader.load_item()
        return request
Beispiel #15
0
    def parse_product_page(self, response):
        loader = ProductItemLoader(response=response)
        loader.add_xpath('name', '//section[@class="product-detail-info"]/h2/text()')
        loader.add_value('url', response.url)
        loader.add_xpath('artist_name', '//div[@class="product-artist"]/a/text()')
        loader.add_xpath('image_url', '//meta[@property="og:image"]/@content')
        loader.add_xpath('prices', '//meta[@property="og:price:amount"]/@content')

        artist_url = urlparse.urljoin(response.url, response.xpath('//div[@class="product-artist"]/a/@href').extract_first())
        request = scrapy.Request(artist_url, callback=self.parse_artist_page, dont_filter=True)
        request.meta['item'] = loader.load_item()
        yield request
 def parse_product_page(self, response):
     print response.url
     loader = ProductItemLoader(response=response)
     loader.add_xpath('name',
                      '//span[@itemprop="name"]/text()',
                      re=r'(.+) by')
     loader.add_value('url', response.url)
     loader.add_xpath('artist_name',
                      '//aside[@class="artist-info"]//h2/text()')
     loader.add_xpath('artist_name',
                      '//span[@itemprop="name"]/text()',
                      re=r'\sby\s(.+)')
     loader.add_xpath('image_url',
                      '(//div[@data-zoom-url]/@data-zoom-url)[last()]')
     loader.add_xpath(
         'prices',
         '//div[@itemprop="priceSpecification"]/span[@itemprop="minPrice" or @itemprop="price"]/text()'
     )
     return loader.load_item()
Beispiel #17
0
    def __parse_product_page(self, response):
        loader = ProductItemLoader(response=response)
        loader.add_xpath('name', '//div[@class="name"]/text()')
        loader.add_value('url', response.url)
        loader.add_xpath('image_url', '//section[@class="product-info"]//img[1]/@ref')
        loader.add_xpath('artist_name', '//a[contains(@href, "/artists/")]/text()')
        loader.add_xpath('prices', '//span[@id="price"]/text()')

        artist_url = urlparse.urljoin(response.url, response.xpath(
            '//a[contains(@href, "/artists/")]//@href').extract_first())
        request = scrapy.Request(artist_url, callback=self.__parse_artist_page, dont_filter=True)
        request.meta['item'] = loader.load_item()
        yield request
    def parse_product_page(self, response):
        loader = ProductItemLoader(response=response)
        loader.add_xpath(
            'name', '//div[@class="product_identity clearfix"]/h1/text()')
        loader.add_value('url', response.url)
        loader.add_xpath(
            'image_url',
            '//a[@class="design" and contains(@href, "design")]/@href')
        loader.add_xpath('artist_name', '//p[@class="tip"]/@data-title')
        loader.add_xpath('prices',
                         '//div[@data-glname="Mens Tee"]/@data-glprice')
        loader.add_xpath('prices', '//div/@data-glprice')

        artist_url = response.xpath(
            '//p[@class="tip"]/a/@href').extract_first()
        request = scrapy.Request(urlparse.urljoin(response.url, artist_url),
                                 callback=self.parse_artist_page,
                                 dont_filter=True)
        request.meta['item'] = loader.load_item()
        yield request
    def parse_product_page(self, response, deal=False):
        loader = ProductItemLoader(response=response)
        loader.add_xpath('name', '//input[@name="txtSaleDesignTitle"]/@value')
        loader.add_value('url', response.url)
        loader.add_value(
            'image_url',
            urlparse.urljoin(
                response.url,
                response.xpath(
                    '//div[@class="tee-des"]/a/@href').extract_first()))
        loader.add_xpath('artist_name', '//span[@id="lblArtistname"]/text()')
        loader.add_xpath('artist_urls', '//div[@class="social-icon"]/a/@href')
        loader.add_xpath('prices', '//span[@class="tagfullprice"]//text()',
                         Join(separator=u''))
        loader.add_value('fabric_colors', self.__parse_fabric_colors(response))

        if deal:
            loader.add_xpath('expires_at', '//span[@id="lbldatetime"]/text()')

        return loader.load_item()
    def __parse_product_page(self, response):
        item = response.meta['item']
        loader = ProductItemLoader(item, response=response)
        loader.add_xpath('name', '//meta[@property="og:title"]/@content')
        loader.add_value('url', response.url)
        loader.add_value(
            'image_url',
            urlparse.urljoin(
                response.url,
                response.xpath('//a[@data-title]/@href').extract_first()))

        artist_url = urlparse.urljoin(
            response.url,
            response.xpath(
                '//a[contains(@href, "artist.cfm")]/@href').extract_first())

        request = scrapy.Request(artist_url,
                                 callback=self.__parse_artist_page,
                                 dont_filter=True)
        request.meta['item'] = loader.load_item()
        yield request
 def parse(self, response):
     loader = ProductItemLoader(response=response)
     loader.add_xpath('name', '//span[@class="product-name"]/text()')
     loader.add_value('url', response.url)
     loader.add_xpath('image_url', '//div[@class="showcase-image"]/img/@src')
     loader.add_xpath('artist_name', '//span[@class="product-artist"]/a/text()')
     loader.add_value('prices', '$10')
     return loader.load_item()
Beispiel #22
0
 def parse(self, response):
     loader = ProductItemLoader(response=response)
     loader.add_xpath('name', '//div[@id="shirt-name"]/h1/text()')
     loader.add_value('url', response.url)
     loader.add_xpath('image_url', '//div[@id="design-large"]/a/@href')
     loader.add_value('artist_name', 'Tee Gravy')
     loader.add_value('artist_urls', ['http://www.teegravy.com'])
     loader.add_xpath('prices', '//p[@class="shirt-dollars"]/text()')
     return loader.load_item()
Beispiel #23
0
 def __parse_product_page(self, response):
     loader = ProductItemLoader(response=response)
     artist_re = re.compile('by\s+(.+)', re.IGNORECASE)
     artist = response.xpath('//h1[@class="pos-title"]/text()').extract()
     loader.add_xpath('name', '//meta[@itemprop="itemreviewed"]/@content')
     loader.add_value('url', ['http://www.teevolt.com'])
     loader.add_xpath('image_url', '//img[contains(@src,"com_zoo")][1]/@src')
     loader.add_value('artist_name', ''.join(artist), re=artist_re)
     loader.add_xpath('artist_urls', '//div[@class="pos-description"]//a/@href')
     loader.add_value('prices', '7.50 GBP / 9 EUR / 12 USD')
     return loader.load_item()
Beispiel #24
0
 def __parse_product_page(self, response):
     loader = ProductItemLoader(response=response)
     loader.add_xpath('name', '//span[@class="productname"]/text()')
     loader.add_value('url', response.url)
     loader.add_xpath('image_url', '//meta[@property="og:image"]/@content')
     loader.add_value(
         'artist_name',
         response.xpath('//meta[@property="og:title"]/@content').
         extract_first().rsplit(' by ', 1)[1])
     loader.add_xpath('prices',
                      '(//div[@class="productdt"]//span/text())[last()]')
     return loader.load_item()
Beispiel #25
0
 def parse(self, response):
     loader = ProductItemLoader(response=response)
     loader.add_xpath(
         'name', '//div[contains(@class, "wpb_single_image")]/h2/text()')
     loader.add_xpath(
         'url', '//div[contains(@class, "wpb_single_image")]//a/@href')
     loader.add_xpath(
         'image_url',
         '//div[contains(@class, "wpb_single_image")]//img/@src')
     loader.add_value('artist_name', 'Anglotees')
     loader.add_value('artist_urls', ['http://anglotees.com/'])
     loader.add_value('prices', '$22.99')
     return loader.load_item()
Beispiel #26
0
 def __parse_product_page(self, response):
     loader = ProductItemLoader(response=response)
     loader.add_xpath('name', '//meta[@property="og:title"]/@content')
     loader.add_value('url', response.url)
     loader.add_value('artist_name', 'Inkogneeto')
     loader.add_value('artist_urls', ['http://www.inkogneeto.com'])
     loader.add_xpath(
         'image_url',
         '//div[@id="productThumbnails"]//img[contains(@data-src, "SMALL")]/@data-src'
     )
     loader.add_xpath('prices',
                      '//meta[@property="product:price:amount"]/@content')
     return loader.load_item()
Beispiel #27
0
    def __parse_product_page(self, response):
        item = response.meta['item']
        loader = ProductItemLoader(item, response=response)

        loader.add_xpath('name',
                         '//meta[@property="og:title"]/@content',
                         re=r'(.*?) By')
        loader.add_value('url', response.url)
        loader.add_xpath('artist_name',
                         '//meta[@property="og:title"]/@content',
                         re=r'.*By\s(.+)')
        loader.add_xpath('artist_urls',
                         '//div[@class="mini_description"]//a/@href')
        loader.add_value('prices', '$25')
        return loader.load_item()
Beispiel #28
0
    def parse_product_page(self, response):
        loader = ProductItemLoader(response=response)
        loader.add_xpath('name', '//meta[@property="og:title"]/@content')
        loader.add_value('url', response.url)
        loader.add_xpath(
            'image_url',
            '//div[contains(@class,"artwork-slide")]/@data-full-image')
        loader.add_xpath('artist_name',
                         '//h2[@class="artist-name"]//span/text()')

        loader.add_xpath('prices', '(//span[@itemprop="price"])[1]/text()')
        loader.add_value('prices',
                         response.body,
                         re=r'ecomm_totalvalue: (\d+)')
        loader.add_value('last_chance',
                         loader.get_output_value('prices') == '14')

        artist_url = response.xpath(
            '//h2[@class="artist-name"]//a/@href').extract_first()
        request = scrapy.Request(artist_url,
                                 callback=self.parse_artist_page,
                                 dont_filter=True)
        request.meta['item'] = loader.load_item()
        yield request
    def parse_product_page(self, response):
        loader = ProductItemLoader(response=response)

        style = response.xpath(
            '//label[@class="color-label selected"]//span/@style'
        ).extract_first()

        artist_name = response.xpath(
            '//span[@class="name"]/text()').extract_first()

        loader.add_xpath('name', '//h1[@id="product-title"]/text()')
        loader.add_value('url', response.url)
        loader.add_xpath('image_url', '//meta[@property="og:image"]/@content')
        loader.add_xpath('prices', '//span[@id="product_price"]/text()')
        loader.add_value('artist_name', artist_name)
        loader.add_value('artist_urls',
                         'https://www.designbyhumans.com/shop/' + artist_name)
        return loader.load_item()
Beispiel #30
0
    def __parse_product_page(self, response):
        loader = ProductItemLoader(response=response)
        loader.add_xpath('name',
                         '//h2[@class="work-information_title"]/text()')
        loader.add_xpath('description',
                         '//meta[@name="product:price:amount"]/@content')
        loader.add_value('url', response.url)
        loader.add_value('image_url',
                         response.body,
                         re=r'https:\/\/[\w|\.]+\/[\w|.]+\/raf.*?\.jpg')
        loader.add_xpath('prices',
                         '//meta[@property="product:price:amount"]/@content')
        loader.add_xpath('artist_name', '//a[@rel="author"]/text()')
        loader.add_xpath('artist_urls', '//a[@rel="author"]/@href')
        loader.add_xpath('tags',
                         '//p[@class="description_tag-list"]/a/text()',
                         output_processor=MapCompose(lambda x: x.split(',')))

        return loader.load_item()