Beispiel #1
0
    def __parse_products(self, response):
        json_response = json.loads(response.body_as_unicode())

        if self.shuffle_products:
            random.shuffle(json_response['products'])

        added_products = 0
        for product in json_response['products']:
            if len(product['images']) == 0 or not any(v['available'] for v in product['variants']):
                continue

            if self.product_limit != -1 and added_products >= self.product_limit:
                break

            loader = ProductItemLoader()
            loader.add_value('name', product['title'])
            loader.add_value('url', 'http://%s/products/%s' %
                             (self.shopify_domain, product['handle']))
            loader.add_value('image_url', product['images'][0]['src'])
            loader.add_value('artist_name', product['vendor'])
            loader.add_value('prices', '$' + product['variants'][0]['price'])
            loader.add_value('tags', product['tags'])
            loader = self.before_save(loader, product)
            if self.validate(loader, product):
                yield loader.load_item()
                added_products += 1
 def parse(self, response):
     for product_url in response.xpath('//a[contains(@href, "productos") and @class="btn btn-ficha"]/@href').extract():
         loader = ProductItemLoader(response=response)
         loader.add_xpath('expires_at', '//input[@id="segundos_restantes"]/@value')
         request = scrapy.Request(product_url, callback=self.__parse_product_page)
         request.meta['item'] = loader.load_item()
         yield request
    def __parse_product_page(self, response):
        if '/product/' in response.url:
            product_id = response.xpath(
                '//a[contains(@class, "buy-button")]/@data-id').extract_first(
                )
            artist = response.xpath('//span[@class="author"]/a')
            currency_sel = response.xpath('//p[@class="product-price"]')
            currencies = [
                self.build_currency_string(currency_sel, c)
                for c in ['usd', 'gbp', 'eur']
            ]

            loader = ProductItemLoader(response=response)
            loader.add_xpath('name', '//span[@class="name"]/text()')
            loader.add_value('url', response.url)
            loader.add_value('image_url', self.build_image_url(product_id))
            loader.add_value('artist_name', artist.xpath('text()').extract())
            loader.add_value('prices', ' / '.join(currencies))
            loader.add_value(
                'last_chance',
                response.url == 'https://www.qwertee.com/last-chance')

            request = scrapy.Request(urlparse.urljoin(
                response.url,
                artist.xpath('@href').extract_first()),
                                     callback=self.parse_artist_page,
                                     dont_filter=True)
            request.meta['item'] = loader.load_item()
            return request
    def parse(self, response):
        expiry = response.xpath(
            '//div[@class="index-countdown"]/@data-time').extract_first()

        for index, sel in enumerate(response.css('.big-slide.tee > div')):
            product_id = sel.xpath('@data-id').extract_first()

            artist_id = sel.xpath('@data-user-id').extract_first()
            currencies = [
                self.build_currency_string(sel, c)
                for c in ['usd', 'gbp', 'eur']
            ]

            loader = ProductItemLoader(response=response, selector=sel)
            loader.add_xpath('name', '@data-name')
            loader.add_value('url', response.url)
            loader.add_value('image_url', self.build_image_url(product_id))
            loader.add_xpath('artist_name', '@data-user')
            loader.add_value('prices', ' / '.join(currencies))
            loader.add_value('last_chance', index > 2)
            loader.add_value('expires_at', expiry)

            request = scrapy.Request(urlparse.urljoin(response.url,
                                                      '/profile/' + artist_id),
                                     callback=self.parse_artist_page,
                                     dont_filter=True)
            request.meta['item'] = loader.load_item()
            yield request
Beispiel #5
0
 def parse_artist_page(self, response):
     item = response.meta['item']
     loader = ProductItemLoader(item, response=response)
     loader.add_xpath(
         'artist_urls',
         '//div[contains(@class, "category-userprofile-info")]//a/@href')
     return loader.load_item()
 def parse(self, response):
     loader = ProductItemLoader(response=response)
     loader.add_xpath('name', '//span[@class="product-name"]/text()')
     loader.add_value('url', response.url)
     loader.add_xpath('image_url', '//div[@class="showcase-image"]/img/@src')
     loader.add_xpath('artist_name', '//span[@class="product-artist"]/a/text()')
     loader.add_value('prices', '$10')
     return loader.load_item()
 def __parse_product_page(self, response):
     item = response.meta['item']
     loader = ProductItemLoader(item, response=response)
     loader.add_xpath('name', '//div[@id="datos_autor"]/h2/text()')
     loader.add_value('url', response.url)
     loader.add_xpath('artist_name', '//div[@id="datos_autor"]/a/text()')
     loader.add_xpath('image_url', '//div[@id="contenedor_fotos"]/div[2]/img/@data-src')
     loader.add_xpath('prices', '//div[@class="precio-xt-euros"]//text()', Join())
     return loader.load_item()
 def __parse_product_page(self, response):
     loader = ProductItemLoader(response=response)
     loader.add_xpath('name', '//h1/text()')
     loader.add_value('url', response.url)
     loader.add_value('artist_name', 'teeminus24')
     loader.add_value('artist_urls', ['http://teeminus24.com'])
     loader.add_xpath('prices', '//span[@id="listPrice"]/text()')
     loader.add_value('image_url', response.body, re=r'"fullsize":{"url":"(.*?)"')
     return loader.load_item()
Beispiel #9
0
 def __parse_product_page(self, response):
     loader = ProductItemLoader(response=response)
     loader.add_xpath('name', '//h1/text()')
     loader.add_value('url', response.url)
     loader.add_xpath('artist_name', '//span[text()="Artist:"]/following::a[1]/text()')
     loader.add_xpath('artist_urls', '//span[text()="Artist:"]/following::a[1]/@href')
     loader.add_xpath('image_url', '//meta[@property="og:image"]/@content')
     loader.add_xpath('prices', '//span[@class="price-new"]/text()')
     return loader.load_item()
 def __parse_artist_page(self, response):
     item = response.meta['item']
     loader = ProductItemLoader(item, response=response)
     loader.add_xpath('artist_name', '//h3/text()')
     loader.add_xpath(
         'artist_urls',
         '//div[@class="col-sm-4"]//a[contains(@class, "socialIcon")]/@href'
     )
     return loader.load_item()
Beispiel #11
0
 def parse(self, response):
     loader = ProductItemLoader(response=response)
     loader.add_xpath('name', '//div[@id="shirt-name"]/h1/text()')
     loader.add_value('url', response.url)
     loader.add_xpath('image_url', '//div[@id="design-large"]/a/@href')
     loader.add_value('artist_name', 'Tee Gravy')
     loader.add_value('artist_urls', ['http://www.teegravy.com'])
     loader.add_xpath('prices', '//p[@class="shirt-dollars"]/text()')
     return loader.load_item()
    def parse_image(self, response):
        item = response.meta['item']
        loader = ProductItemLoader(item, response=response)

        regex = re.compile(r'"full":{"url":"(http.*?artwork\\/~artwork.*?)"')
        url = re.search(regex, response.body_as_unicode()).group(1).replace('\\', '')

        loader.add_value('image_url', url)

        return loader.load_item()
 def parse_product_page(self, response):
     loader = ProductItemLoader(response=response)
     loader.add_xpath('name', '//h1/text()')
     loader.add_value('url', response.url)
     loader.add_xpath('image_url', '//img[contains(@src, "detail")]/@src')
     loader.add_xpath('image_url',
                      '//img[contains(@src, "1024x1024")]/@src')
     loader.add_xpath('artist_name',
                      '//a[contains(@href, "/pages/member/")]/text()')
     loader.add_xpath('prices', '(//span[@class="price"])[1]/text()')
     return loader.load_item()
Beispiel #14
0
 def __parse_product_page(self, response):
     loader = ProductItemLoader(response=response)
     artist_re = re.compile('by\s+(.+)', re.IGNORECASE)
     artist = response.xpath('//h1[@class="pos-title"]/text()').extract()
     loader.add_xpath('name', '//meta[@itemprop="itemreviewed"]/@content')
     loader.add_value('url', ['http://www.teevolt.com'])
     loader.add_xpath('image_url', '//img[contains(@src,"com_zoo")][1]/@src')
     loader.add_value('artist_name', ''.join(artist), re=artist_re)
     loader.add_xpath('artist_urls', '//div[@class="pos-description"]//a/@href')
     loader.add_value('prices', '7.50 GBP / 9 EUR / 12 USD')
     return loader.load_item()
Beispiel #15
0
 def __parse_product_page(self, response):
     loader = ProductItemLoader(response=response)
     loader.add_xpath('name', '//span[@class="productname"]/text()')
     loader.add_value('url', response.url)
     loader.add_xpath('image_url', '//meta[@property="og:image"]/@content')
     loader.add_value(
         'artist_name',
         response.xpath('//meta[@property="og:title"]/@content').
         extract_first().rsplit(' by ', 1)[1])
     loader.add_xpath('prices',
                      '(//div[@class="productdt"]//span/text())[last()]')
     return loader.load_item()
Beispiel #16
0
    def parse_product_page(self, response):
        loader = ProductItemLoader(response=response)
        loader.add_xpath('name', '//section[@class="product-detail-info"]/h2/text()')
        loader.add_value('url', response.url)
        loader.add_xpath('artist_name', '//div[@class="product-artist"]/a/text()')
        loader.add_xpath('image_url', '//meta[@property="og:image"]/@content')
        loader.add_xpath('prices', '//meta[@property="og:price:amount"]/@content')

        artist_url = urlparse.urljoin(response.url, response.xpath('//div[@class="product-artist"]/a/@href').extract_first())
        request = scrapy.Request(artist_url, callback=self.parse_artist_page, dont_filter=True)
        request.meta['item'] = loader.load_item()
        yield request
 def parse_product_page(self, response):
     image_url = response.xpath(
         '//meta[@property="og:image"]/@content').extract_first()
     loader = ProductItemLoader(response=response)
     loader.add_xpath('name', '//h1/text()')
     loader.add_value('url', response.url)
     loader.add_value('image_url', image_url.replace('400x400', '750x750'))
     loader.add_xpath('fabric_colors',
                      '//img[contains(@src, "swatches")]/@alt')
     loader.add_value('artist_name', '6 Dollar Shirts')
     loader.add_value('artist_urls', ['http://6dollarshirts.com/'])
     loader.add_value('prices', '$6')
     return loader.load_item()
Beispiel #18
0
 def __parse_product_page(self, response):
     loader = ProductItemLoader(response=response)
     loader.add_xpath('name', '//meta[@property="og:title"]/@content')
     loader.add_value('url', response.url)
     loader.add_value('artist_name', 'Inkogneeto')
     loader.add_value('artist_urls', ['http://www.inkogneeto.com'])
     loader.add_xpath(
         'image_url',
         '//div[@id="productThumbnails"]//img[contains(@data-src, "SMALL")]/@data-src'
     )
     loader.add_xpath('prices',
                      '//meta[@property="product:price:amount"]/@content')
     return loader.load_item()
Beispiel #19
0
 def parse(self, response):
     loader = ProductItemLoader(response=response)
     loader.add_xpath(
         'name', '//div[contains(@class, "wpb_single_image")]/h2/text()')
     loader.add_xpath(
         'url', '//div[contains(@class, "wpb_single_image")]//a/@href')
     loader.add_xpath(
         'image_url',
         '//div[contains(@class, "wpb_single_image")]//img/@src')
     loader.add_value('artist_name', 'Anglotees')
     loader.add_value('artist_urls', ['http://anglotees.com/'])
     loader.add_value('prices', '$22.99')
     return loader.load_item()
Beispiel #20
0
    def __parse_product_page(self, response):
        loader = ProductItemLoader(response=response)
        loader.add_xpath('name', '//div[@class="name"]/text()')
        loader.add_value('url', response.url)
        loader.add_xpath('image_url', '//section[@class="product-info"]//img[1]/@ref')
        loader.add_xpath('artist_name', '//a[contains(@href, "/artists/")]/text()')
        loader.add_xpath('prices', '//span[@id="price"]/text()')

        artist_url = urlparse.urljoin(response.url, response.xpath(
            '//a[contains(@href, "/artists/")]//@href').extract_first())
        request = scrapy.Request(artist_url, callback=self.__parse_artist_page, dont_filter=True)
        request.meta['item'] = loader.load_item()
        yield request
Beispiel #21
0
    def parse(self, response):
        for product_link in response.xpath(
                '//div[contains(@class, "wc-shortcodes-column")]/p/a[contains(@href, "limited-edition-hip-hop-t-shirts")]'
        ):
            loader = ProductItemLoader(response=response)
            loader.add_value(
                'image_url',
                product_link.xpath('.//img[@srcset]/@src').extract_first())

            request = scrapy.Request(
                product_link.xpath('@href').extract_first(),
                callback=self.__parse_product_page)
            request.meta['item'] = loader.load_item()
            yield request
    def parse_product_page(self, response):
        loader = ProductItemLoader(response=response)
        loader.add_xpath('name', '//h1/text()')
        loader.add_xpath('description', '//div[@id="description"]/text()')
        loader.add_value('url', response.url)
        loader.add_xpath('image_url', '//div[@class="gallery-image"]/img[contains(@src, ".gif")]/@src')
        loader.add_xpath('artist_name', '//div[@id="description"]/text()', re=r'Design by (.+?)\.?\s*$')
        loader.add_value('artist_name', 'BustedTees')
        loader.add_xpath('prices', '//span[@id="sale_price"]/text()')
        loader.add_xpath('prices', '//span[@class="price"]/text()')

        tags = response.xpath('//meta[@name="keywords"]/@content').extract_first()
        loader.add_value('tags', tags.split(','))

        yield loader.load_item()
Beispiel #23
0
    def __parse_product_page(self, response):
        item = response.meta['item']
        loader = ProductItemLoader(item, response=response)

        loader.add_xpath('name',
                         '//meta[@property="og:title"]/@content',
                         re=r'(.*?) By')
        loader.add_value('url', response.url)
        loader.add_xpath('artist_name',
                         '//meta[@property="og:title"]/@content',
                         re=r'.*By\s(.+)')
        loader.add_xpath('artist_urls',
                         '//div[@class="mini_description"]//a/@href')
        loader.add_value('prices', '$25')
        return loader.load_item()
Beispiel #24
0
    def __parse_product_page(self, response):
        loader = ProductItemLoader(response=response)
        loader.add_xpath('name', '//div[@class="t-shirt-design-name"]/text()')
        loader.add_value('url', response.url)
        loader.add_xpath(
            'image_url',
            '//img[contains(@data-opaque, "user_design")]/@data-opaque')
        loader.add_xpath('artist_name',
                         '//div[@class="t-shirt-designer-name"]/text()',
                         re='Artist : (.+)')
        loader.add_xpath('prices', '//div[@class="addtocart-tab"]/span/text()')
        loader.add_xpath('fabric_colors',
                         '//div[@class="mens-color-box normalTip"]/@data-hex')

        return loader.load_item()
    def parse(self, response):
        for sel in response.xpath('//input[@name="DesignID"]/@value'):
            product_url = 'http://www.teetournament.com/product-detail.cfm?designid=' + sel.extract(
            )

            loader = ProductItemLoader(response=response)
            loader.add_xpath('prices',
                             '//div[@class="price priceLeft"]/text()')
            loader.add_xpath('expires_at',
                             'string(//script/text())',
                             re=r'until: (\d+)')

            request = scrapy.Request(product_url,
                                     callback=self.__parse_product_page)
            request.meta['item'] = loader.load_item()
            yield request
Beispiel #26
0
    def parse_product_page(self, response):
        loader = ProductItemLoader(response=response)

        default_color = response.xpath('//span[@class="color-name"]/text()').extract_first().strip()
        artist_url = response.xpath('//a[contains(@href, "/user/")]/@href').extract_first()

        loader.add_xpath('name', '//meta[@property="og:title"]/@content', re=r'(.*?) by ')
        loader.add_xpath('description', '//div[contains(@class, "product-description")]/p/text()')
        loader.add_xpath('fabric_colors', '//div[@class="design-show-color"]/label/@for')
        loader.add_value('url', response.url)
        loader.add_xpath(
            'image_url', '//span[contains(@class, "color-box") and @title="%s"]/@data-preview' % default_color)
        loader.add_xpath('prices', '//meta[@property="og:price:amount"]/@content')
        loader.add_xpath('artist_name', '//meta[@property="og:title"]/@content', re=r' by (.*)$')
        loader.add_value('artist_urls', 'https://www.teepublic.com' + artist_url)
        loader.add_xpath('tags', '//div[contains(@class, "tags")]/a/text()')
        return loader.load_item()
    def parse(self, response):
        loader = ProductItemLoader(response=response)
        loader.add_xpath('name', '//div[@id="app_info"]/strong/text()')
        loader.add_value('url', response.url)
        loader.add_xpath('image_url', '//img[contains(@src, "zoom")]/@src')
        loader.add_xpath('artist_name',
                         '//div[@id="profile-box"]//strong/text()')
        loader.add_xpath('artist_urls', '//a[@class="followBox"]/@href')
        loader.add_value('prices', '8.99 EUR')
        loader.add_value(
            'last_chance',
            response.url == 'https://teebusters.com/index/lastchance.html')
        loader.add_xpath('expires_at',
                         'string(//script/text())',
                         re=r'until: (\d+)')

        return loader.load_item()
    def parse_product_page(self, response):
        artist_url = response.xpath(
            '//a[contains(@href, "/artist/")]/@href').extract_first()

        loader = ProductItemLoader(response=response)
        loader.add_xpath('name', '//meta[@itemprop="name"]/@content')
        loader.add_xpath('description', '//meta[@property="og:description"]/@content')
        loader.add_xpath('fabric_colors', '//select[contains(@class, "color-tshirt")]/option/@value')
        loader.add_value('url', response.url)
        loader.add_xpath('image_url', '//img[@itemprop="image"]/@src')
        loader.add_xpath('prices', '//spam[@id="price"]/text()')
        loader.add_xpath('prices', '//meta[@itemprop="price"]/@content')
        loader.add_xpath('tags', '//a[contains(@href, "/tag/")]/text()')

        artist_url = response.xpath('//a[contains(@href, "/artist/")]//@href').extract_first()
        request = Request(urlparse.urljoin(response.url, artist_url), callback=self.__parse_artist_page, dont_filter=True)
        request.meta['item'] = loader.load_item()
        yield request
    def parse_product_page(self, response):
        print_url = response.xpath('//a[text()="Art Print"]/@href').extract_first()
        username = response.xpath('//div[@class="user-avatar"]/a/@href').extract_first()

        loader = ProductItemLoader(response=response)
        loader.add_xpath('name', '//h1[@data-dmc="prod-name"]/text()')
        loader.add_xpath('description', '//p[@id="about-the-art-description"]/text()')
        loader.add_value('url', response.url)
        loader.add_xpath('prices', '//meta[@itemprop="price"]/@content')
        loader.add_xpath('artist_name', '//h1[@data-dmc="prod-name"]/following::a/text()')
        loader.add_value('artist_urls', 'https://society6.com' + username)
        loader.add_xpath('tags', '//p[@class="tag-container"]/a/text()')
        loader.add_value('active', response.xpath(
            '//meta[@property="og:availability"]/@content').extract_first() == 'instock')

        request = Request(urlparse.urljoin(response.url, print_url), callback=self.parse_image)
        request.meta['item'] = loader.load_item()
        return request
    def parse_product_page(self, response):
        loader = ProductItemLoader(response=response)

        style = response.xpath(
            '//label[@class="color-label selected"]//span/@style'
        ).extract_first()

        artist_name = response.xpath(
            '//span[@class="name"]/text()').extract_first()

        loader.add_xpath('name', '//h1[@id="product-title"]/text()')
        loader.add_value('url', response.url)
        loader.add_xpath('image_url', '//meta[@property="og:image"]/@content')
        loader.add_xpath('prices', '//span[@id="product_price"]/text()')
        loader.add_value('artist_name', artist_name)
        loader.add_value('artist_urls',
                         'https://www.designbyhumans.com/shop/' + artist_name)
        return loader.load_item()