Esempio n. 1
0
 def parse_artist_page(self, response):
     item = response.meta['item']
     loader = ProductItemLoader(item, response=response)
     loader.add_xpath(
         'artist_urls',
         '//div[contains(@class, "category-userprofile-info")]//a/@href')
     return loader.load_item()
 def parse(self, response):
     for product_url in response.xpath('//a[contains(@href, "productos") and @class="btn btn-ficha"]/@href').extract():
         loader = ProductItemLoader(response=response)
         loader.add_xpath('expires_at', '//input[@id="segundos_restantes"]/@value')
         request = scrapy.Request(product_url, callback=self.__parse_product_page)
         request.meta['item'] = loader.load_item()
         yield request
    def parse_image(self, response):
        item = response.meta['item']
        loader = ProductItemLoader(item, response=response)

        regex = re.compile(r'"full":{"url":"(http.*?artwork\\/~artwork.*?)"')
        url = re.search(regex, response.body_as_unicode()).group(1).replace('\\', '')

        loader.add_value('image_url', url)

        return loader.load_item()
Esempio n. 4
0
    def parse(self, response):
        for product_link in response.xpath(
                '//div[contains(@class, "wc-shortcodes-column")]/p/a[contains(@href, "limited-edition-hip-hop-t-shirts")]'
        ):
            loader = ProductItemLoader(response=response)
            loader.add_value(
                'image_url',
                product_link.xpath('.//img[@srcset]/@src').extract_first())

            request = scrapy.Request(
                product_link.xpath('@href').extract_first(),
                callback=self.__parse_product_page)
            request.meta['item'] = loader.load_item()
            yield request
 def __parse_artist_page(self, response):
     item = response.meta['item']
     loader = ProductItemLoader(item, response=response)
     loader.add_xpath('artist_name', '//h3/text()')
     loader.add_xpath(
         'artist_urls',
         '//div[@class="col-sm-4"]//a[contains(@class, "socialIcon")]/@href'
     )
     return loader.load_item()
    def __parse_product_page(self, response):
        item = response.meta['item']
        loader = ProductItemLoader(item, response=response)
        loader.add_xpath('name', '//meta[@property="og:title"]/@content')
        loader.add_value('url', response.url)
        loader.add_value(
            'image_url',
            urlparse.urljoin(
                response.url,
                response.xpath('//a[@data-title]/@href').extract_first()))

        artist_url = urlparse.urljoin(
            response.url,
            response.xpath(
                '//a[contains(@href, "artist.cfm")]/@href').extract_first())

        request = scrapy.Request(artist_url,
                                 callback=self.__parse_artist_page,
                                 dont_filter=True)
        request.meta['item'] = loader.load_item()
        yield request
    def parse(self, response):
        for sel in response.xpath('//input[@name="DesignID"]/@value'):
            product_url = 'http://www.teetournament.com/product-detail.cfm?designid=' + sel.extract(
            )

            loader = ProductItemLoader(response=response)
            loader.add_xpath('prices',
                             '//div[@class="price priceLeft"]/text()')
            loader.add_xpath('expires_at',
                             'string(//script/text())',
                             re=r'until: (\d+)')

            request = scrapy.Request(product_url,
                                     callback=self.__parse_product_page)
            request.meta['item'] = loader.load_item()
            yield request
 def parse_product_page(self, response):
     loader = ProductItemLoader(response=response)
     loader.add_xpath('name', '//h1/text()')
     loader.add_xpath('description',
                      '//div[@class="short-description"]//p/text()')
     loader.add_value('url', response.url)
     loader.add_xpath('image_url',
                      '//a[contains(@class, "artwork-image")]/@href')
     loader.add_xpath('image_url',
                      '//a[@class="artworkImage Guys default"]/@data-mask')
     loader.add_xpath(
         'image_url',
         '//img[contains(@data-zoom-image, "fullpic")]/@data-zoom-image')
     loader.add_value('artist_name', 'SnorgTees')
     loader.add_value('artist_urls', ['https://twitter.com/snorgtees'])
     loader.add_xpath(
         'prices',
         '//p[@class="special-price"]//span[@class="price"]/text()')
     loader.add_xpath('prices', '//span[@class="price"]/text()')
     return loader.load_item()
    def parse_product_page(self, response):
        loader = ProductItemLoader(response=response)
        loader.add_xpath(
            'name', '//div[@class="product_identity clearfix"]/h1/text()')
        loader.add_value('url', response.url)
        loader.add_xpath(
            'image_url',
            '//a[@class="design" and contains(@href, "design")]/@href')
        loader.add_xpath('artist_name', '//p[@class="tip"]/@data-title')
        loader.add_xpath('prices',
                         '//div[@data-glname="Mens Tee"]/@data-glprice')
        loader.add_xpath('prices', '//div/@data-glprice')

        artist_url = response.xpath(
            '//p[@class="tip"]/a/@href').extract_first()
        request = scrapy.Request(urlparse.urljoin(response.url, artist_url),
                                 callback=self.parse_artist_page,
                                 dont_filter=True)
        request.meta['item'] = loader.load_item()
        yield request
    def parse(self, response):
        expiry = response.xpath(
            '//div[@class="index-countdown"]/@data-time').extract_first()

        for index, sel in enumerate(response.css('.big-slide.tee > div')):
            product_id = sel.xpath('@data-id').extract_first()

            artist_id = sel.xpath('@data-user-id').extract_first()
            currencies = [
                self.build_currency_string(sel, c)
                for c in ['usd', 'gbp', 'eur']
            ]

            loader = ProductItemLoader(response=response, selector=sel)
            loader.add_xpath('name', '@data-name')
            loader.add_value('url', response.url)
            loader.add_value('image_url', self.build_image_url(product_id))
            loader.add_xpath('artist_name', '@data-user')
            loader.add_value('prices', ' / '.join(currencies))
            loader.add_value('last_chance', index > 2)
            loader.add_value('expires_at', expiry)

            request = scrapy.Request(urlparse.urljoin(response.url,
                                                      '/profile/' + artist_id),
                                     callback=self.parse_artist_page,
                                     dont_filter=True)
            request.meta['item'] = loader.load_item()
            yield request
    def __parse_product_page(self, response):
        if '/product/' in response.url:
            product_id = response.xpath(
                '//a[contains(@class, "buy-button")]/@data-id').extract_first(
                )
            artist = response.xpath('//span[@class="author"]/a')
            currency_sel = response.xpath('//p[@class="product-price"]')
            currencies = [
                self.build_currency_string(currency_sel, c)
                for c in ['usd', 'gbp', 'eur']
            ]

            loader = ProductItemLoader(response=response)
            loader.add_xpath('name', '//span[@class="name"]/text()')
            loader.add_value('url', response.url)
            loader.add_value('image_url', self.build_image_url(product_id))
            loader.add_value('artist_name', artist.xpath('text()').extract())
            loader.add_value('prices', ' / '.join(currencies))
            loader.add_value(
                'last_chance',
                response.url == 'https://www.qwertee.com/last-chance')

            request = scrapy.Request(urlparse.urljoin(
                response.url,
                artist.xpath('@href').extract_first()),
                                     callback=self.parse_artist_page,
                                     dont_filter=True)
            request.meta['item'] = loader.load_item()
            return request
Esempio n. 12
0
 def parse(self, response):
     loader = ProductItemLoader(response=response)
     loader.add_xpath(
         'name', '//div[contains(@class, "wpb_single_image")]/h2/text()')
     loader.add_xpath(
         'url', '//div[contains(@class, "wpb_single_image")]//a/@href')
     loader.add_xpath(
         'image_url',
         '//div[contains(@class, "wpb_single_image")]//img/@src')
     loader.add_value('artist_name', 'Anglotees')
     loader.add_value('artist_urls', ['http://anglotees.com/'])
     loader.add_value('prices', '$22.99')
     return loader.load_item()
Esempio n. 13
0
    def parse_product_page(self, response):
        loader = ProductItemLoader(response=response)
        loader.add_xpath('name', '//section[@class="product-detail-info"]/h2/text()')
        loader.add_value('url', response.url)
        loader.add_xpath('artist_name', '//div[@class="product-artist"]/a/text()')
        loader.add_xpath('image_url', '//meta[@property="og:image"]/@content')
        loader.add_xpath('prices', '//meta[@property="og:price:amount"]/@content')

        artist_url = urlparse.urljoin(response.url, response.xpath('//div[@class="product-artist"]/a/@href').extract_first())
        request = scrapy.Request(artist_url, callback=self.parse_artist_page, dont_filter=True)
        request.meta['item'] = loader.load_item()
        yield request
    def parse_product_page(self, response):
        print_url = response.xpath('//a[text()="Art Print"]/@href').extract_first()
        username = response.xpath('//div[@class="user-avatar"]/a/@href').extract_first()

        loader = ProductItemLoader(response=response)
        loader.add_xpath('name', '//h1[@data-dmc="prod-name"]/text()')
        loader.add_xpath('description', '//p[@id="about-the-art-description"]/text()')
        loader.add_value('url', response.url)
        loader.add_xpath('prices', '//meta[@itemprop="price"]/@content')
        loader.add_xpath('artist_name', '//h1[@data-dmc="prod-name"]/following::a/text()')
        loader.add_value('artist_urls', 'https://society6.com' + username)
        loader.add_xpath('tags', '//p[@class="tag-container"]/a/text()')
        loader.add_value('active', response.xpath(
            '//meta[@property="og:availability"]/@content').extract_first() == 'instock')

        request = Request(urlparse.urljoin(response.url, print_url), callback=self.parse_image)
        request.meta['item'] = loader.load_item()
        return request
    def parse_product_page(self, response, deal=False):
        loader = ProductItemLoader(response=response)
        loader.add_xpath('name', '//input[@name="txtSaleDesignTitle"]/@value')
        loader.add_value('url', response.url)
        loader.add_value(
            'image_url',
            urlparse.urljoin(
                response.url,
                response.xpath(
                    '//div[@class="tee-des"]/a/@href').extract_first()))
        loader.add_xpath('artist_name', '//span[@id="lblArtistname"]/text()')
        loader.add_xpath('artist_urls', '//div[@class="social-icon"]/a/@href')
        loader.add_xpath('prices', '//span[@class="tagfullprice"]//text()',
                         Join(separator=u''))
        loader.add_value('fabric_colors', self.__parse_fabric_colors(response))

        if deal:
            loader.add_xpath('expires_at', '//span[@id="lbldatetime"]/text()')

        return loader.load_item()
Esempio n. 16
0
    def __parse_product_page(self, response):
        item = response.meta['item']
        loader = ProductItemLoader(item, response=response)

        loader.add_xpath('name',
                         '//meta[@property="og:title"]/@content',
                         re=r'(.*?) By')
        loader.add_value('url', response.url)
        loader.add_xpath('artist_name',
                         '//meta[@property="og:title"]/@content',
                         re=r'.*By\s(.+)')
        loader.add_xpath('artist_urls',
                         '//div[@class="mini_description"]//a/@href')
        loader.add_value('prices', '$25')
        return loader.load_item()
Esempio n. 17
0
    def parse_product_page(self, response):
        loader = ProductItemLoader(response=response)

        default_color = response.xpath('//span[@class="color-name"]/text()').extract_first().strip()
        artist_url = response.xpath('//a[contains(@href, "/user/")]/@href').extract_first()

        loader.add_xpath('name', '//meta[@property="og:title"]/@content', re=r'(.*?) by ')
        loader.add_xpath('description', '//div[contains(@class, "product-description")]/p/text()')
        loader.add_xpath('fabric_colors', '//div[@class="design-show-color"]/label/@for')
        loader.add_value('url', response.url)
        loader.add_xpath(
            'image_url', '//span[contains(@class, "color-box") and @title="%s"]/@data-preview' % default_color)
        loader.add_xpath('prices', '//meta[@property="og:price:amount"]/@content')
        loader.add_xpath('artist_name', '//meta[@property="og:title"]/@content', re=r' by (.*)$')
        loader.add_value('artist_urls', 'https://www.teepublic.com' + artist_url)
        loader.add_xpath('tags', '//div[contains(@class, "tags")]/a/text()')
        return loader.load_item()
 def parse_product_page(self, response):
     image_url = response.xpath(
         '//meta[@property="og:image"]/@content').extract_first()
     loader = ProductItemLoader(response=response)
     loader.add_xpath('name', '//h1/text()')
     loader.add_value('url', response.url)
     loader.add_value('image_url', image_url.replace('400x400', '750x750'))
     loader.add_xpath('fabric_colors',
                      '//img[contains(@src, "swatches")]/@alt')
     loader.add_value('artist_name', '6 Dollar Shirts')
     loader.add_value('artist_urls', ['http://6dollarshirts.com/'])
     loader.add_value('prices', '$6')
     return loader.load_item()
Esempio n. 19
0
    def __parse_product_page(self, response):
        loader = ProductItemLoader(response=response)
        loader.add_xpath('name', '//div[@class="t-shirt-design-name"]/text()')
        loader.add_value('url', response.url)
        loader.add_xpath(
            'image_url',
            '//img[contains(@data-opaque, "user_design")]/@data-opaque')
        loader.add_xpath('artist_name',
                         '//div[@class="t-shirt-designer-name"]/text()',
                         re='Artist : (.+)')
        loader.add_xpath('prices', '//div[@class="addtocart-tab"]/span/text()')
        loader.add_xpath('fabric_colors',
                         '//div[@class="mens-color-box normalTip"]/@data-hex')

        return loader.load_item()
Esempio n. 20
0
 def parse_product_page(self, response):
     print response.url
     loader = ProductItemLoader(response=response)
     loader.add_xpath('name',
                      '//span[@itemprop="name"]/text()',
                      re=r'(.+) by')
     loader.add_value('url', response.url)
     loader.add_xpath('artist_name',
                      '//aside[@class="artist-info"]//h2/text()')
     loader.add_xpath('artist_name',
                      '//span[@itemprop="name"]/text()',
                      re=r'\sby\s(.+)')
     loader.add_xpath('image_url',
                      '(//div[@data-zoom-url]/@data-zoom-url)[last()]')
     loader.add_xpath(
         'prices',
         '//div[@itemprop="priceSpecification"]/span[@itemprop="minPrice" or @itemprop="price"]/text()'
     )
     return loader.load_item()
    def parse(self, response):
        loader = ProductItemLoader(response=response)
        loader.add_xpath('name', '//div[@id="app_info"]/strong/text()')
        loader.add_value('url', response.url)
        loader.add_xpath('image_url', '//img[contains(@src, "zoom")]/@src')
        loader.add_xpath('artist_name',
                         '//div[@id="profile-box"]//strong/text()')
        loader.add_xpath('artist_urls', '//a[@class="followBox"]/@href')
        loader.add_value('prices', '8.99 EUR')
        loader.add_value(
            'last_chance',
            response.url == 'https://teebusters.com/index/lastchance.html')
        loader.add_xpath('expires_at',
                         'string(//script/text())',
                         re=r'until: (\d+)')

        return loader.load_item()
Esempio n. 22
0
    def __parse_product_page(self, response):
        loader = ProductItemLoader(response=response)
        loader.add_xpath('name', '//div[@class="name"]/text()')
        loader.add_value('url', response.url)
        loader.add_xpath('image_url', '//section[@class="product-info"]//img[1]/@ref')
        loader.add_xpath('artist_name', '//a[contains(@href, "/artists/")]/text()')
        loader.add_xpath('prices', '//span[@id="price"]/text()')

        artist_url = urlparse.urljoin(response.url, response.xpath(
            '//a[contains(@href, "/artists/")]//@href').extract_first())
        request = scrapy.Request(artist_url, callback=self.__parse_artist_page, dont_filter=True)
        request.meta['item'] = loader.load_item()
        yield request
    def parse_product_page(self, response):
        loader = ProductItemLoader(response=response)

        style = response.xpath(
            '//label[@class="color-label selected"]//span/@style'
        ).extract_first()

        artist_name = response.xpath(
            '//span[@class="name"]/text()').extract_first()

        loader.add_xpath('name', '//h1[@id="product-title"]/text()')
        loader.add_value('url', response.url)
        loader.add_xpath('image_url', '//meta[@property="og:image"]/@content')
        loader.add_xpath('prices', '//span[@id="product_price"]/text()')
        loader.add_value('artist_name', artist_name)
        loader.add_value('artist_urls',
                         'https://www.designbyhumans.com/shop/' + artist_name)
        return loader.load_item()
 def parse(self, response):
     loader = ProductItemLoader(response=response)
     loader.add_xpath('name', '//span[@class="product-name"]/text()')
     loader.add_value('url', response.url)
     loader.add_xpath('image_url', '//div[@class="showcase-image"]/img/@src')
     loader.add_xpath('artist_name', '//span[@class="product-artist"]/a/text()')
     loader.add_value('prices', '$10')
     return loader.load_item()
Esempio n. 25
0
 def __parse_artist_page(self, response):
     item = response.meta['item']
     loader = ProductItemLoader(item, response=response)
     loader.add_xpath('artist_urls', '//div[@class="typography"]//a/@href')
     return loader.load_item()
Esempio n. 26
0
 def __parse_product_page(self, response):
     loader = ProductItemLoader(response=response)
     loader.add_xpath('name', '//h1/text()')
     loader.add_value('url', response.url)
     loader.add_xpath('artist_name', '//span[text()="Artist:"]/following::a[1]/text()')
     loader.add_xpath('artist_urls', '//span[text()="Artist:"]/following::a[1]/@href')
     loader.add_xpath('image_url', '//meta[@property="og:image"]/@content')
     loader.add_xpath('prices', '//span[@class="price-new"]/text()')
     return loader.load_item()
Esempio n. 27
0
 def __parse_product_page(self, response):
     loader = ProductItemLoader(response=response)
     artist_re = re.compile('by\s+(.+)', re.IGNORECASE)
     artist = response.xpath('//h1[@class="pos-title"]/text()').extract()
     loader.add_xpath('name', '//meta[@itemprop="itemreviewed"]/@content')
     loader.add_value('url', ['http://www.teevolt.com'])
     loader.add_xpath('image_url', '//img[contains(@src,"com_zoo")][1]/@src')
     loader.add_value('artist_name', ''.join(artist), re=artist_re)
     loader.add_xpath('artist_urls', '//div[@class="pos-description"]//a/@href')
     loader.add_value('prices', '7.50 GBP / 9 EUR / 12 USD')
     return loader.load_item()
Esempio n. 28
0
    def __parse_products(self, response):
        json_response = json.loads(response.body_as_unicode())

        if self.shuffle_products:
            random.shuffle(json_response['products'])

        added_products = 0
        for product in json_response['products']:
            if len(product['images']) == 0 or not any(v['available'] for v in product['variants']):
                continue

            if self.product_limit != -1 and added_products >= self.product_limit:
                break

            loader = ProductItemLoader()
            loader.add_value('name', product['title'])
            loader.add_value('url', 'http://%s/products/%s' %
                             (self.shopify_domain, product['handle']))
            loader.add_value('image_url', product['images'][0]['src'])
            loader.add_value('artist_name', product['vendor'])
            loader.add_value('prices', '$' + product['variants'][0]['price'])
            loader.add_value('tags', product['tags'])
            loader = self.before_save(loader, product)
            if self.validate(loader, product):
                yield loader.load_item()
                added_products += 1
Esempio n. 29
0
    def __parse_product_page(self, response):
        loader = ProductItemLoader(response=response)
        loader.add_xpath('name',
                         '//h2[@class="work-information_title"]/text()')
        loader.add_xpath('description',
                         '//meta[@name="product:price:amount"]/@content')
        loader.add_value('url', response.url)
        loader.add_value('image_url',
                         response.body,
                         re=r'https:\/\/[\w|\.]+\/[\w|.]+\/raf.*?\.jpg')
        loader.add_xpath('prices',
                         '//meta[@property="product:price:amount"]/@content')
        loader.add_xpath('artist_name', '//a[@rel="author"]/text()')
        loader.add_xpath('artist_urls', '//a[@rel="author"]/@href')
        loader.add_xpath('tags',
                         '//p[@class="description_tag-list"]/a/text()',
                         output_processor=MapCompose(lambda x: x.split(',')))

        return loader.load_item()
 def parse_artist_page(self, response):
     item = response.meta['item']
     loader = ProductItemLoader(item, response=response)
     loader.add_xpath('artist_urls', '//div[@id="page-header"]/span/text()')
     return loader.load_item()