Python extract_price_eu 예제들, product_spiders.utils.extract_price_eu Python 예제들

예제 #1

0

파일 보기

    def parse_product(self, response):
        hxs = HtmlXPathSelector(response)
        loader = ProductLoader(item=Product(), response=response)

        price = Decimal(0)
        price_script = ''.join(
            hxs.select(
                "//script[contains(., 'productPrice')]/text()").extract())
        if price_script:
            price = re.findall(r'productPrice":(\d+)', price_script)
            if price:
                price = extract_price(price[0])
        else:
            price = ''.join(
                hxs.select(
                    "//div[@id='product-simple']//span[contains(concat('',@id,''), 'product-price')]//text()"
                ).extract())
            price = ''.join(re.findall('([\d\.,]+)', price))
            price = extract_price_eu(price)

        loader.add_value('price', price)
        loader.add_value('url', response.url)
        loader.add_xpath('name', "//h1/text()")
        if hxs.select(
                "//p[@class='availability']/img[@alt='En stock']").extract():
            stock = '1'
        else:
            stock = '0'
        loader.add_value('stock', stock)
        loader.add_xpath(
            'category',
            "//div[@class='breadcrumbs']//li[1< position()]//a/text()")
        loader.add_xpath(
            'brand',
            "//table[@class='data-table']//tr[contains(., 'Marque')]/td/text()"
        )
        loader.add_value('shipping_cost', "0")
        sku = ''.join(
            hxs.select("//input[@type='hidden' and @name='product']/@value").
            extract())
        loader.add_value('sku', sku.strip())
        loader.add_value('identifier', sku)
        loader.add_xpath('image_url', "//a[@class='MagicZoomPlus']/img/@src")
        yield loader.load_item()

예제 #2

0

파일 보기

    def parse_product(self, response):
        base_url = get_base_url(response)

        for url in response.xpath(
                '//p[@id="color_variants"]//a/@href').extract():
            yield Request(urljoin_rfc(base_url, url),
                          callback=self.parse_product)

        size_varians = response.xpath(
            '//p[@id="size_variants"]//a/@href').extract()
        for url in size_varians:
            yield Request(urljoin_rfc(base_url, url),
                          callback=self.parse_product)

        loader = ProductLoader(item=Product(), response=response)
        price = response.xpath(
            '//b[contains(@class, "pro-price")]/text()').extract()
        if not price:
            return
        price = extract_price_eu(''.join(price[0].split()).strip())

        loader.add_value('price', price)
        loader.add_value('url', response.url)
        loader.add_xpath('name', '//h1[@itemprop="name"]/text()')
        if response.xpath('//span[text()="Obserwuj"]'):
            stock = '0'
        else:
            stock = '1'
        loader.add_value('stock', stock)
        loader.add_xpath('category',
                         '//nav[@id="breadcrumbs"]/a[position()>1]/text()')
        loader.add_xpath('brand', '//*[@id="catalog-info"]//a/b/text()')
        loader.add_value('shipping_cost', "0")
        sku = ' ' + response.xpath(
            '//*[@id="catalog-info"]//b/text()').extract()[-1].strip()
        loader.add_value('sku', sku.strip())
        loader.add_value('identifier', hashlib.md5(sku).hexdigest())
        loader.add_xpath('image_url', '//main[@id="content"]//img/@src')

        product = loader.load_item()

        if product['identifier'] not in self.identifiers_viewed:
            self.identifiers_viewed.append(product['identifier'])
            yield product

예제 #3

0

파일 보기

파일: experttechnomarkt.py 프로젝트: oceancloud82/scraping

    def parse_product(self, response):
        hxs = HtmlXPathSelector(response)
        loader = ProductLoader(item=response.meta['product'], selector=hxs)
        loader.add_xpath('identifier', '//input[@name="anid"]/@value')
        #        loader.add_xpath('sku', '//span[contains(text(),"Herstellernummer:")]/span/text()')
        loader.add_value('url', response.url)
        loader.add_value(
            'name', u''.join(
                hxs.select('//h1[@id="test_product_name"]/text()').extract()).
            strip().replace('\n', ' '))

        sku = response.meta.get('sku', '')
        if sku:
            loader.add_value('sku', sku)
            price = hxs.select(
                '//form/div[@class="price"]/img/@title').extract()
            price = price[0] if price else '0'
            loader.add_value('price', extract_price_eu(price))
            out_of_stock = hxs.select(
                '//form/div[@class="price"]/img/@title').extract()
            if out_of_stock:
                loader.add_value('stock', '0')
            else:
                loader.add_value('stock', '1')
            loader.add_value('brand', response.meta.get('brand', ''))
        else:
            try:
                loader.add_value(
                    'sku',
                    re.search(r'\b([A-Z]{1,2})*[\+\-0-9]{2,10}',
                              loader.get_output_value('name')).group(0))
            except:
                pass
            loader.add_value('brand', 'Logitech')

        loader.add_value('category', response.url.split('/')[-2])

        img = hxs.select('//td[@id="magiczoomplushook"]//a/@href').extract()
        if img:
            loader.add_value('image_url',
                             urljoin_rfc(get_base_url(response), img[0]))

        loader.add_value('shipping_cost', '0')
        yield loader.load_item()

예제 #4

0

파일 보기

파일: bauhausreedition_com.py 프로젝트: oceancloud82/scraping

    def parse_product(self, response):
        hxs = HtmlXPathSelector(response)

        product_name = ''.join(
            hxs.select('//*[@id="content"]/h2[1]//text()').extract()).strip()
        product_name = product_name.replace(u'\xa0', ' ')
        sku = hxs.select('//li[@class="codice"]/span/text()').extract()
        sku = sku[0] if sku else ''

        img = hxs.select('//div[@class="img-big"]//img/@src').extract()
        category = ''.join(
            hxs.select('//*[@id="content"]//p[@class="briciola"]//text()').
            extract()).strip()
        category = category.split(' / ')[2:]

        for option in hxs.select('//ul[@class="buy"]'):
            loader = ProductLoader(item=Product(), selector=hxs)
            name = option.select(
                './li[@class="prezzo"]//text()').extract()[0].replace(
                    ':', '').strip()
            if name != '':
                name = ' - ' + name
            price = option.select(
                './li[@class="prezzo"]//text()').extract()[1].strip().replace(
                    u'\u20ac', '')
            price = extract_price_eu(price) * Decimal('1.22')
            product_identifier = option.select(
                './li[@class="acquista"]/a/@href').extract()[0]
            product_identifier = url_query_parameter(
                urljoin_rfc(get_base_url(response), product_identifier),
                'id_opzione')

            loader.add_value('identifier', product_identifier)
            loader.add_value('sku', sku)
            loader.add_value('url', response.url)
            loader.add_value('name', product_name + name)
            loader.add_value('price', price)
            loader.add_xpath('brand', '//p[@class="desc-prod"]/a/text()')
            if img:
                loader.add_value('image_url',
                                 urljoin_rfc(get_base_url(response), img[0]))
            loader.add_value('category', category)
            yield loader.load_item()

예제 #5

0

파일 보기

    def parse_cat(self, response):
        hxs = HtmlXPathSelector(response)

        for productxs in hxs.select('//div[contains(@class, "box-product")]'):
            product = Product()
            product['price'] = extract_price_eu(''.join(
                productxs.select(
                    './/div[contains(@class,"product-price")]/strong/text()').
                extract()))

            request = Request(urljoin_rfc(
                get_base_url(response),
                productxs.select('.//a/@href').extract()[0]),
                              callback=self.parse_product,
                              meta=response.meta)
            yield self.fetch_product(request, self.add_shipping_cost(product))

        for page in hxs.select('//div[@class="paginator"]//a/@href').extract():
            yield Request(urljoin_rfc(get_base_url(response), page),
                          callback=self.parse_cat)

예제 #6

0

파일 보기

    def parse_cat(self, response):
        hxs = HtmlXPathSelector(response)

        for productxs in hxs.select('//ul[@id="product_list"]/li'):
            product = Product()
            product['price'] = extract_price_eu(''.join(
                productxs.select('.//span[@class="price"]//text()').re(
                    r'[\d.,]+')))
            request = Request(urljoin_rfc(
                get_base_url(response),
                productxs.select(
                    './/a[@class="product_img_link"]/@href').extract()[0]),
                              callback=self.parse_product,
                              meta=response.meta)
            yield self.fetch_product(request, self.add_shipping_cost(product))

        for url in hxs.select(
                '//ul[contains(@class, "pagination")]//a/@href').extract():
            yield Request(urljoin_rfc(get_base_url(response), url),
                          callback=self.parse_cat)

예제 #7

0

파일 보기

파일: centrogamma_spider.py 프로젝트: oceancloud82/scraping

    def parse(self, response):
        hxs = HtmlXPathSelector(response)
        base_url = get_base_url(response)

        categories = hxs.select('//div[@id="listaSezioni"]/div/a/@href').extract()
        for category in categories:
            cat_url = urljoin_rfc(base_url, category)
            yield Request(cat_url)

        sub_categories = hxs.select('//div[@class="contentGruppi"]/div/div[@class="nome"]/a/@href').extract()
        for sub_category in sub_categories:
            cat_url = urljoin_rfc(base_url, sub_category)
            yield Request(cat_url)

        products = hxs.select('//div[@class="articolo"]')
        if products:
            for product in products:
                l = ProductLoader(item=Product(), selector=product)
                #l.add_xpath('name', 'h2/a/b/text()')
                url = product.select('.//h2/a/@href').extract()
                url = urljoin_rfc(base_url, url[0])
                l.add_value('url', url)
                l.add_value('identifier', re.search('art/(\d+)_', url).group(1))
                l.add_xpath('sku', 'p[@class="codfor"]/strong/text()')
                l.add_xpath('brand', 'p[@class="marca"]/img/@alt')
                image_url = product.select('div[@class="img"]/a/img/@src').extract()
                image_url = urljoin_rfc(base_url, image_url[0]) if image_url else ''
                l.add_value('image_url', image_url)
                category = hxs.select('//div[@class="gruppo"]/text()').extract()[0].strip()
                l.add_value('category', category)
                price = product.select('p[@class="prezzo"]/text()').extract()
                price = extract_price_eu(price[-1]) if price else 0
                l.add_value('price', price)
                if price<=0:
                    l.add_value('stock', 0)
                item = l.load_item()
                yield Request(item['url'], callback=self.parse_product, meta={'item': item})

        next = hxs.select('//a[@class="next"]').extract()
        if next:
            yield Request(urljoin_rfc(base_url, next[-1]))

예제 #8

0

파일 보기

    def parse_product(self, response):
        def get_sku(name):
            res = re.findall("([0-9]{3,5}).*", name)
            if res:
                return res.pop()
            else:
                return ""

        hxs = HtmlXPathSelector(response)
        base_url = get_base_url(response)

        name = hxs.select('//h1/text()').extract().pop().strip()
        price = hxs.select('//span[@class="bigPrice"]/text()').extract().pop()
        sku = get_sku(name)

        identifier = hxs.select(
            '//input[@id="surveyObjectId"]/@value').extract().pop()
        category = hxs.select(
            '//div[@itemprop="breadcrumb"]/div/a[not(contains(@class, "last"))]/text()'
        ).extract()

        brand = "Lego"

        image_url = hxs.select('//img[@id="imgMain"]/@src').extract()

        loader = ProductLoader(item=Product(), selector=hxs)
        loader.add_value("name", name)
        loader.add_value("identifier", identifier)
        loader.add_value("price", extract_price_eu(price))
        loader.add_value("url", response.url)

        loader.add_value("sku", sku)
        if image_url:
            loader.add_value("image_url", urljoin_rfc(base_url,
                                                      image_url.pop()))
        if category:
            loader.add_value("category", category.pop())
        loader.add_value("stock", 1)
        loader.add_value("brand", brand)
        loader.add_value('shipping_cost', 5.75)
        yield loader.load_item()

예제 #9

0

파일 보기

    def parse(self, response):
        xxs = XmlXPathSelector(response)

        for product in xxs.select('//product'):
            category = product.select('./Category/text()').extract()
            loader = ProductLoader(item=Product(), selector=product)
            loader.add_xpath('identifier', './product-id/text()')
            loader.add_xpath('sku', './product-id/text()')
            loader.add_xpath('url', './product-url/text()')
            loader.add_xpath('name', './product-name/text()')
            loader.add_xpath('brand', './brand/text()')
            loader.add_value('price', extract_price_eu(' '.join(product.select('./price/text()').extract())))
            if category:
                loader.add_value('category', category[0].split('/')[-1].strip())
            loader.add_xpath('image_url', './image-url/text()')
            loader.add_xpath('stock', './stock/text()')
            if loader.get_output_value('price') > 499:
                loader.add_value('shipping_cost', '0')
            else:
                loader.add_value('shipping_cost', '25')
            yield loader.load_item()

예제 #10

0

파일 보기

    def parse_products(self, response):

        data = json.loads(response.body)

        hxs = HtmlXPathSelector(text=data['products'])
        base_url = response.meta.get('base_url')

        products = hxs.select('//div[contains(@class, "productItem")]')
        for product in products:
            product_loader = ProductLoader(item=Product(), selector=hxs)
            identifier = product.select(
                './/*[contains(@id, "main_image")]/@id').re(r'(\d+)$')
            if not identifier:
                continue
            product_loader.add_value('identifier', identifier[0])
            image_url = product.select(
                './/img[contains(@id, "main_image")]/@data-src').extract()[0]
            product_loader.add_value('image_url',
                                     urljoin_rfc(base_url, image_url))
            product_name = product.select(
                './/span[contains(@class, "productTitle")]/a/text()').extract(
                )[0]
            product_loader.add_value('name', product_name)
            sku = ''
            for match in re.finditer(r"([\d,\.]+)", product_name):
                if len(match.group()) > len(sku):
                    sku = match.group()
            product_loader.add_value('sku', sku)
            url = product.select(
                './/span[contains(@class, "productTitle")]/a/@href').extract(
                )[0]
            product_loader.add_value('url', urljoin_rfc(base_url, url))
            price = ''.join(
                product.select('.//span[@class="price"]/text()').re(
                    r'[\d.,]+'))
            price = extract_price_eu(price)
            product_loader.add_value('price', price)

            yield product_loader.load_item()

예제 #11

0

파일 보기

파일: banneke_com.py 프로젝트: oceancloud82/scraping

    def parse_cat(self, response):
        hxs = HtmlXPathSelector(response)

        for productxs in hxs.select('//li[@class="product"]'):
            product = Product()
            product['price'] = extract_price_eu(''.join(
                productxs.select(
                    './/div/span[@class="price"]//text()').extract()))
            product['stock'] = '1'

            request = Request(urljoin_rfc(
                get_base_url(response),
                productxs.select('.//h2[@class="title"]/a/@href').extract()
                [0]),
                              callback=self.parse_product,
                              meta=response.meta)
            yield self.fetch_product(request, self.add_shipping_cost(product))

        for page in hxs.select(
                '//div[@class="navigation"]//a/@href').extract():
            yield Request(urljoin_rfc(get_base_url(response), page),
                          callback=self.parse_cat)

예제 #12

0

파일 보기

파일: exportprive.py 프로젝트: oceancloud82/scraping

    def parse_product(self, response):
        hxs = HtmlXPathSelector(response)
        loader = ProductLoader(item=Product(), response=response)

        price = ''.join(
            hxs.select("//span[@id='our_price_display']//text()").extract())
        price = ','.join(re.findall('([\d\.,]+)', price))
        price = extract_price_eu(price)

        loader.add_value('price', price)
        loader.add_value('url', response.url)
        loader.add_xpath('name', "//h1//text()")
        if hxs.select(
                "//p[@id='stock_statut']//img[contains(./@src, 'stock_in.png')]"
        ).extract():
            # if price:
            stock = '1'
        else:
            stock = '0'
        loader.add_value('stock', stock)
        loader.add_xpath(
            'category', "//div[@class='breadcrumb']/a[1 < position()]/text()")
        loader.add_xpath(
            'brand',
            "//div[@id='block_link_manu']//p[contains(., 'Voir tous les produits')]//a/text()"
        )
        loader.add_value('shipping_cost', "0")
        sku = ''.join(
            hxs.select("(//h2[@id='product_reference']//text())[2]").extract())
        loader.add_value('sku', sku.strip())

        script = ''.join(
            hxs.select(
                "(//script[contains(., 'id_product')]//text())[1]").extract())
        product_id = self.product_id_regex.findall(script)

        loader.add_value('identifier', ''.join(product_id))
        loader.add_xpath('image_url', "//img[@id='bigpic']/@src")
        yield loader.load_item()

예제 #13

0

파일 보기

파일: epal.py 프로젝트: oceancloud82/scraping

    def parse(self, response):
        hxs = HtmlXPathSelector(response)

        for cat in response.css('div.product-category a::attr(href)').extract():
            yield Request(urljoin_rfc(get_base_url(response), cat), callback=self.parse)
        for cat in response.css('a.page-number::attr(href)').extract():
            yield Request(urljoin_rfc(get_base_url(response), cat), callback=self.parse)

        for productxs in response.css('div.product'):
            product = Product()
            price = productxs.css('span.amount::text').extract_first()
            if not price:
                continue
            product['price'] = extract_price_eu(price)

            if productxs.select('.//div[contains(@class, "out-of-stock-label")]'):
                product['stock'] = 0
            else:
                product['stock'] = 1

            request = Request(urljoin_rfc(get_base_url(response), productxs.select('.//a/@href').extract()[0]), callback=self.parse_product, meta=response.meta)
            yield self.fetch_product(request, self.add_shipping_cost(product))

예제 #14

0

파일 보기

    def parse(self, response):
        hxs = HtmlXPathSelector(response)

        for cat in hxs.select('//ul[@class="menu"]//a/@href').extract():
            yield Request(urljoin_rfc(get_base_url(response), cat),
                          callback=self.parse)
        for cat in hxs.select('//p[@class="pagination"]//a/@href').extract():
            yield Request(urljoin_rfc(get_base_url(response), cat),
                          callback=self.parse)

        for productxs in hxs.select(
                '//div[@id="search_results_products"]/div[starts-with(@id, "product_")]'
        ):
            price = extract_price_eu(
                productxs.select(
                    './/div[contains(@class,"product_price") and @class!="product_price_percentage_saved"]//span[@class="inc"]/span[@class="SEK"]/text()'
                ).extract()[-1])
            yield Request(urljoin_rfc(
                get_base_url(response),
                productxs.select('.//a/@href').extract()[0]),
                          callback=self.parse_product,
                          meta={'price': price})

예제 #15

0

파일 보기

    def parse_product(self, response):

        hxs = HtmlXPathSelector(response)
        identifier = hxs.select(
            '//input[@name="product_id"]/@value').extract()[0]
        sku = hxs.select(
            '//div[@class="texte_zoom"]/div/div/h2/text()').extract()[0]
        category = hxs.select(
            '//div[@class="breadParent"]/ol/li/a/span/text()').extract()[1:]
        name = hxs.select('//h1/span/text()').extract()[0].strip()
        brand = hxs.select(
            '//div[@class="texte_zoom"]/div/div/a/img/@alt').extract()
        price = "".join(
            hxs.select(
                '//div[@id="prixZoom"]//div[@class="ttc"]/span[@itemprop="price"]/span/text()'
            ).extract()).strip().replace(' ', '')
        image_url = hxs.select(
            '//div[@class="photos"]//img[@itemprop="image"]/@src').extract()
        stock = hxs.select(
            '//div[contains(@name,"dispodiv")]/span[contains(text(),"En stock")]'
        )

        l = ProductLoader(item=Product(), response=response)
        l.add_value('identifier', identifier)
        l.add_value('name', name)
        l.add_value('category', category)
        if brand:
            l.add_value('brand', brand)
        l.add_value('sku', sku)
        l.add_value('url', response.url)
        l.add_value('price', extract_price_eu(price))
        # if not stock:
        l.add_value('stock', 1)
        if image_url:
            l.add_value(
                'image_url',
                urljoin_rfc(get_base_url(response), image_url[0].strip()))
        yield l.load_item()

예제 #16

0

파일 보기

    def parse(self, response):
        hxs = HtmlXPathSelector(response)

        for productxs in hxs.select('//div[@class="artbox"]'):
            product = Product()
            product['price'] = extract_price_eu(''.join(
                productxs.select('.//span[@class="price"]//text()').extract()))
            if product['price'] == 0:
                product['stock'] = '0'
            else:
                product['stock'] = '1'

            request = Request(urljoin_rfc(
                get_base_url(response),
                productxs.select('.//a[@class="title"]/@href').extract()[0]),
                              callback=self.parse_product,
                              meta=response.meta)
            yield self.fetch_product(request, product)

        for page in hxs.select(
                '//div[@class="paging"]//a[@class="navi more"]/@href').extract(
                ):
            yield Request(urljoin_rfc(get_base_url(response), page))
            break  # First link only

예제 #17

0

파일 보기

파일: sferaufficio.py 프로젝트: oceancloud82/scraping

    def parse_product(self, response):
        base_url = get_base_url(response)
        hxs = HtmlXPathSelector(response)

        loader = ProductLoader(selector=hxs, item=Product())

        name = hxs.select('//h1//text()').extract()
        loader.add_value('name', name)
        loader.add_value('url', response.url)
        price = hxs.select('//span[@class="price"]/text()').extract()[0]
        price = price.split()[1]
        price = extract_price_eu(price)
        loader.add_value('price', price)
        loader.add_value('shipping_cost', 0)
        image_url = hxs.select('//img/@src').extract()[1]
        loader.add_value('image_url', urljoin(base_url, image_url))
        category = hxs.select(
            '//div[@class="breadcrumbs"]//a/text()').extract()[1]
        loader.add_value('category', category.strip())
        brand = hxs.select(
            '//td[text()="Produttore"]/following-sibling::td[1]/a/text()'
        ).extract()[0].strip()
        loader.add_value('brand', brand)
        loader.add_value('stock', 1)
        sku = hxs.select(
            '//div[@id="product-single"]//table//tr[2]/td[2]/text()').extract(
            )

        loader.add_xpath(
            'sku',
            '//td[text()="Codice prodotto"]/following-sibling::td[1]/text()')
        loader.add_xpath(
            'identifier',
            '//td[text()="Codice prodotto"]/following-sibling::td[1]/text()')

        yield loader.load_item()

예제 #18

0

파일 보기

    def parse_product(self, response):
        hxs = HtmlXPathSelector(response)
        # Options
        options = hxs.select('//h3[contains(text(), "Warianty produktu")]/..//h2[@class="producttitlesimple"]/a/@href').extract()
        for url in options:
            yield Request(urlparse.urljoin(get_base_url(response), url), callback=self.parse_product)

        loader = ProductLoader(item=Product(), response=response)

        price = ''.join(hxs.select('//span[@itemprop="price"]/text()').extract()).replace(' ', '')
        price = extract_price_eu(price)

        loader.add_value('price', price)
        loader.add_value('url', response.url)
        loader.add_xpath('name', '//h1[contains(@class, "productbittitle")]/text()')

        if hxs.select('//div[@class="clearfix hidden-xs"]/a[@class="avail"]'):
            loader.add_value('stock', '1')
        else:
            loader.add_value('stock', '0')
        categories = hxs.select('//ol[contains(@class, "breadcrumb")]/a/@title').extract()[1:-1]
        loader.add_value('category', categories)
        loader.add_xpath('brand', '//meta[@itemprop="brand"]/@content')
        if price < Decimal(1000):
            shipping_cost = '15'
        else:
            shipping_cost = '0'
        loader.add_value('shipping_cost', shipping_cost)
        sku = ''.join(hxs.select('//p[@class="productcode"]/strong/text()').extract())
        if not sku:
            sku = re.findall(re.compile("\/(\d*.)$"), response.url)
            sku = sku[0] if sku else ''
        loader.add_value('sku', sku.strip())
        loader.add_value('identifier', response.url.split("/")[-1])
        loader.add_xpath('image_url', "//div[@id='main-photo']//img/@src")
        yield loader.load_item()

예제 #19

0

파일 보기

파일: experttechnomarkt.py 프로젝트: oceancloud82/scraping

    def parse(self, response):
        hxs = HtmlXPathSelector(response)

        for productxs in hxs.select('//div[@class="single_article"]'):
            product = Product()
            product['price'] = extract_price_eu(''.join(
                productxs.select('.//div[@class="price"]/img/@alt').extract()))
            if productxs.select(
                    './/div[@class="status"]/img[contains(@pagespeed_url_hash,"2593193988")]'
            ):
                product['stock'] = '0'
            else:
                product['stock'] = '1'

            request = Request(urljoin_rfc(
                get_base_url(response),
                productxs.select('substring-before(./a/@href,"?")').extract()
                [0]),
                              callback=self.parse_product,
                              meta=response.meta)
            yield self.fetch_product(request, product)

        for page in hxs.select('//div[@class="paging"]//a/@href').extract():
            yield Request(urljoin_rfc(get_base_url(response), page))

예제 #20

0

파일 보기

파일: lekmer_dk.py 프로젝트: oceancloud82/scraping

    def parse_product(self, response):
        loader = ProductLoader(item=response.meta.get('product', Product()),
                               response=response)

        loader.add_xpath('identifier', '//input[@name="id"]/@value')
        loader.add_value('url', response.url)
        loader.add_xpath('name', '//h1//text()')
        sku = response.xpath(
            '//div[@class="basic-content-body"]//dt[contains(text(), "Artikelnummer")]'
            '/following-sibling::dd/text()').re(r'(\d{3}\d*)')
        if sku:
            loader.add_value('sku', sku)
        else:
            self.log('No SKU for %s' % (response.url))

        loader.add_xpath(
            'category',
            '//ul[contains(@class, "breadcrumbs")]/li[position()=last()-1]/a/text()'
        )

        img = response.xpath('//img[@itemprop="image"]/@src').extract()
        if img:
            loader.add_value('image_url', response.urljoin(img[0]))

        price = ''.join(
            response.xpath('normalize-space(//*[@itemprop="price"]/text())').
            re(r'([\d.,]+)'))
        loader.add_value('price', extract_price_eu(price))
        loader.add_value('brand', 'Lego')
        in_stock = bool(
            response.xpath(
                '//div[@class="product-info"]//em[@class="mod-success"]//text()'
            ).re(r'lager'))
        if not in_stock:
            loader.add_value('stock', 0)
        yield self.add_shipping_cost(loader.load_item())

예제 #21

0

파일 보기

파일: made_de.py 프로젝트: oceancloud82/scraping

    def parse_product(self, response):
        hxs = HtmlXPathSelector(response)

        loader = ProductLoader(item=Product(), selector=hxs)
        loader.add_xpath('identifier', '//input[@name="product"]/@value')
        if not loader.get_output_value('identifier'):
            loader.add_xpath(
                'identifier',
                'substring-after(//span[starts-with(@id,"product-price-")]/@id, "product-price-")'
            )
        loader.add_xpath(
            'sku', '//tr/th[contains(text(),"Artikelnummer")]/../td/text()')
        loader.add_value('url', response.url)
        loader.add_xpath('name', '//div[@itemprop="name"]//text()')
        loader.add_xpath('image_url', '//meta[@itemprop="image"]/@content')
        loader.add_xpath('price', '//meta[@itemprop="price"]/@content')
        shipping_cost = hxs.select(
            '//th[contains(text(),"Standardlieferpreis")]//following-sibling::td/span[@class="price"]/text()'
        ).extract()
        if shipping_cost:
            loader.add_value('shipping_cost',
                             extract_price_eu(shipping_cost[0]))
        if not loader.get_output_value('name'):
            return
        if loader.get_output_value('name').split()[0] == '2':
            loader.add_value('brand', 'Flynn')
        else:
            loader.add_value('brand',
                             loader.get_output_value('name').split()[0])

        if hxs.select(
                '//span[@itemprop="availability" and @content="in_stock"]'):
            loader.add_value('stock', '1')
        else:
            loader.add_value('stock', '0')

        sku = loader.get_output_value('sku')
        sku = sku.upper().strip() if sku else ''
        made_product = self.made_products.get(sku, None)

        no_category = False
        if made_product:
            loader.add_value('category', made_product['Category'])
        else:
            loader.add_xpath(
                'category',
                '//div[@class="breadcrumbs"]/ul/li[position()>1]/a/span/text()'
            )
            if not loader.get_output_value('category'):
                loader.add_value('category',
                                 (x.replace('-', ' ')
                                  for x in response.url.split('/')[3:-1]))
                no_category = True

        product = loader.load_item()

        catmap = {
            "bedding and bath": "Bed & Bath",
            "beds": "Beds",
            "chairs": "Chairs",
            "homewares accessories": "Home Accessories",
            "lighting": "Lighting",
            "sofas and armchairs": "Sofas",
            "storage": "Storage",
            "tables": "Tables",
        }
        product['category'] = catmap.get(product['category'],
                                         product['category'])

        metadata = MadeMeta()
        metadata['johnlewis_code'] = made_product[
            'JL product code'] if made_product else ''
        metadata['next_code'] = made_product[
            'Next product code'] if made_product else ''
        product['metadata'] = metadata

        trs = hxs.select(
            '//table[@id="super-product-table"]//tr/td[@class="price"]/..')
        if not trs:
            for x in self.yield_product(product, no_category):
                yield x
            return

        for tr in trs:
            loader = ProductLoader(item=Product(product), selector=tr)
            loader.add_xpath(
                'identifier',
                'substring-after(.//span[starts-with(@id,"product-price-")]/@id, "product-price-")'
            )
            loader.add_value('name', product['name'])
            loader.add_xpath('name', './/td[1]/text()')
            loader.add_xpath('price', './/span[@property="price"]/@content')
            for x in self.yield_product(loader.load_item(), no_category):
                yield x

예제 #22

0

파일 보기

파일: fnac.py 프로젝트: oceancloud82/scraping

    def extract_product_info(self, product):
        prod_url = product.xpath('.//a/@href').extract()[0]
        price_section = product.xpath('./../../following-sibling::div')
        marketplace = len(price_section.xpath('.//*[@class="seller"]')) > 0

        is_used = False
        if marketplace:
            state = product.xpath('../..//*[@class="shipping"]/ul/li[span[contains(text(), "Estado")]]/strong/text()').extract_first()
            if state and 'novo' not in state.lower():
                is_used = True

        price = price_section.xpath('.//*[@class="floatl"]//*[@class="userPrice"]/text()').extract()
        if price:
            price = price[0].replace(u'\xa0', '')
            price = extract_price_eu(price)

        promotion_price = price_section.xpath('.//*[@class="floatl"]//*[@class="oldPrice"]/text()').extract()
        if promotion_price:
            promotion_price = promotion_price[0].replace(u'\xa0', '')
            promotion_price = extract_price_eu(promotion_price)

        offers_url = None
        offers_count = 0
        offers_links = price_section.css('.OffersSumary').xpath(".//a")
        for a in offers_links:
            link_title = a.xpath("text()").extract_first().strip()
            if u'segunda' in link_title:
                continue
            if u'novo' not in link_title:
                continue
            offers_url = a.xpath("@href").extract_first()
            offers_count = a.xpath("text()").re_first(u"(?u)(\d*)\s*novo")
            offers_count = int(offers_count)

        seller = None
        seller_identifier = None
        if marketplace:
            seller = price_section.xpath('.//a[@class="seller"]/text()').extract_first().strip()
            seller_url = price_section.xpath('.//a[@class="seller"]/@href').extract_first()
            seller_identifier = get_seller_id_from_url(seller_url)

            self.seller_ids[seller.lower()] = seller_identifier

        out_stock = len(product.xpath('./..//*[@class="Nodispo"]').extract()) > 0
        dispo = product.xpath('./../..').css('.sellerInfos > li')
        if dispo:
            dispo = ' '.join(dispo.css('.Dispo-txt').xpath("text()").extract())
        else:
            dispo = ''

        exclusive_online = u'exclusivo internet' in dispo.lower()

        if marketplace:
            shipping = product.xpath('../..//*[@class="shipping"]/ul/li[span[contains(text(), "Portes")]]/strong/text()')
        else:
            shipping = product.xpath('./..//*[@class="Delivery-price"]//text()')
        if shipping:
            shipping = ''.join(shipping.extract())
            shipping = extract_price_eu(shipping)
        else:
            shipping = ''

        identifier = re.search('/mp(\d+)/', prod_url)
        if not identifier:
            identifier = re.search('/a(\d+)$', prod_url)
        if identifier:
            identifier = 'fcom' + identifier.groups()[0]
        else:
            self.log('Identifier not found {}'.format(prod_url))

        if marketplace:
            combined_identifier = identifier + '-' + seller_identifier
        else:
            combined_identifier = identifier
            combined_identifier = self.get_identifier(combined_identifier)

        result = {'url': prod_url, 'marketplace': marketplace, 'price': price,
                  'promotion_price': promotion_price, 'out_stock': out_stock,
                  'exclusive_online': exclusive_online, 'shipping': shipping,
                  'identifier': identifier,
                  'offers_url': offers_url, 'offers_count': offers_count,
                  'seller': seller, 'seller_identifier': seller_identifier,
                  'combined_identifier': combined_identifier,
                  'is_used': is_used}

        return result

예제 #23

0

파일 보기

파일: bikediscount.py 프로젝트: oceancloud82/scraping

    def parse_product(self, response):
        try:
            brand_name = response.xpath(
                '//span[@class="manufacturer"]/text()').extract()[0]
            name = response.xpath(
                '//div[@id="product-box"]//div[@class="title"]/text()'
            ).extract()[0].strip()
        except:
            self.log('No brand or name found: %s' % response.url)
            return

        if response.xpath(
                '//div[@class="no-valid-variants" and contains(text(), "this item is currently not available")]'
        ):
            return

        product_loader = ProductLoader(item=Product(), response=response)
        product_loader.add_value('url', response.url)
        product_loader.add_value('name', brand_name + ' ' + name)
        sku = response.xpath(
            '////div[@class="additional-product-no"]/@data-xencoded').extract(
            )
        if sku:
            sku = sku[0]
            h = HTMLParser.HTMLParser()
            key, data = sku.split(':', 1)
            key = int(key)
            data = h.unescape(data)
            # XOR decoding
            data = [ord(c) ^ key for c in data]
            data = ''.join([chr(c) for c in data])
            sku = re.search('Manufacturer Item no\. (.*)', data)
            if sku:
                sku = sku.group(1)
                # 'Hersteller Artikelnr: 20050/20051'
                product_loader.add_value('sku', sku)
        # product_loader.add_xpath('sku', u'//div[@class="additional-product-no" and contains(text(), "Manufacturer Item no.")]', re=r'Manufacturer Item no\. (.*)')
        identifier = response.xpath(
            '//input[@name="vw_id"]/@value').extract()[0]
        product_loader.add_value('identifier', identifier)

        price = response.xpath(
            '//div[@class="current-price"]/span[@class="price"]/text()'
        ).extract()
        if not price:
            price = response.xpath(
                '//table[@class="product-price"]//tr[@class="price"]/td/text()'
            ).extract()
        if price:
            price = price[0]
            product_loader.add_value('price', extract_price_eu(price))
        else:
            self.log('No product price found: %s' % response.url)
            return

        category = response.css('.uk-breadcrumb a::text').extract()[-1]

        product_loader.add_value('category', category)

        product_loader.add_value('brand', brand_name.strip())

        try:
            image_url = response.urljoin(
                response.xpath('//img[@itemprop="image"]/@src').extract()[0])
            product_loader.add_value('image_url', image_url)
        except:
            pass
        product = product_loader.load_item()

        rrp = extract_price_eu(''.join(
            response.xpath('//span[@class="retail-value"]/text()').extract()))
        rrp = str(rrp) if rrp > extract_price_eu(price) else ''

        options = response.xpath(
            '//div[contains(@id,"artikel_element_prices")]')
        if options:
            for opt in options:
                p = Product(product)
                optname = opt.xpath(
                    './/meta[@itemprop="name"]/@content').extract()[0]
                p['name'] = optname
                p['price'] = extract_price(
                    opt.xpath('.//meta[@itemprop="price"]/@content').extract()
                    [0])
                p['identifier'] = p['identifier'] + '-' + opt.xpath('@id').re(
                    'artikel_element_prices(.*)')[0]
                if p['identifier'] not in self.identifiers:
                    self.identifiers.append(p['identifier'])
                    yield p
        else:
            if product['identifier'] not in self.identifiers:
                self.identifiers.append(product['identifier'])
                yield product

예제 #24

0

파일 보기

    def parse_product(self, response):
        base_url = get_base_url(response)

        name = response.xpath(
            '//span[contains(@id, "uxProductName")]/text()').extract()[0]

        loader = ProductLoader(item=Product(), response=response)
        loader.add_value('name', name)
        loader.add_xpath(
            'price',
            '//div[contains(@class, "product-view__total-price")]/@data-price')
        image_url = response.xpath(
            '//img[contains(@id, "uxProductImage")]/@src').extract()
        if image_url:
            loader.add_value('image_url', image_url[0])
        loader.add_xpath(
            'brand',
            '//tr[td[contains(text(), "Produsent")]]/td[not(contains(text(), "Produsent"))]/text()'
        )
        category = response.xpath(
            '//tr[td[contains(text(), "Linsetype")]]/td[not(contains(text(), "Linsetype"))]/text()'
        ).extract()
        loader.add_value('category', category)
        loader.add_value('url', response.url)

        identifier = re.findall("return '(\d+)';", response.body)
        if not identifier:
            identifier = re.findall("var productId = (\d+);", response.body)
        identifier = identifier[0]

        loader.add_value('identifier', identifier)
        loader.add_value('sku', identifier)

        item = loader.load_item()

        options = response.xpath(
            '//span[@class="HeaderMinPrices"]/text()').extract()
        options = options[0].split(' / ') if options else []
        if options and len(options) > 1:
            for option in options:
                option_item = deepcopy(item)
                name = re.findall('(.*) linser per', option)[0]
                option_item['name'] += ' ' + name
                option_item['identifier'] += '-' + ''.join(name.split())
                option_item['sku'] = option_item['identifier']
                price = re.findall('kr (.*)', option)
                option_item['price'] = extract_price_eu(price[0])
                yield option_item

        else:
            price = response.xpath(
                '//span[@class="HeaderMinPrices"]/text()').extract()
            if not price:
                price = response.xpath(
                    '//div[@class="DescriptionExtraAccessories"]//span[contains(text(), "Kr")]/text()'
                ).extract()
            if not price:
                price = response.xpath(
                    '//div[@class="DescriptionExtra"]//span[contains(text(), "Kr") or contains(text(), "kr")]/text()'
                ).extract()
            price = price[0].lower()
            price = re.findall('\d+', price.split('kr')[-1])[0]
            item['price'] = extract_price_eu(price)
            yield item

예제 #25

0

파일 보기

파일: lego_amazon_fr.py 프로젝트: oceancloud82/scraping

 def extract_price(self, price):
     """
     override extract price cause French site has different number format: #.###,##
     """
     return extract_price_eu(price)

예제 #26

0

파일 보기

    def parse_product(self, response):
        if not response.xpath('//div[@id="product"]'):
            return

        for url in response.xpath('//ul[@class="options-types"]//a/@href').extract():
            yield Request(response.urljoin(url), callback=self.parse_product)

        loader = ProductLoader(item=Product(), response=response)
        xpath = '//script/text()'
        pattern = "tc_vars\['%s'\] = '(.+)'"
        loader.add_xpath('identifier', xpath, re=pattern % 'product_id')
        loader.add_xpath('sku', xpath, re=pattern % 'product_id')
        loader.add_xpath('name', xpath, re=pattern % 'product_name')
        image_url = response.xpath(xpath).re(pattern % 'product_url_picture')
        if image_url:
            image_url = response.urljoin(image_url[0])
            loader.add_value('image_url', image_url)
        loader.add_xpath('url', xpath, re=pattern % 'product_url_page')
        loader.add_xpath('price', xpath, re=pattern % 'product_totalprice_ati')
        categories = response.xpath('//nav[@class="breadcrumb"]//span/text()').extract()
        loader.add_value('category', categories[1:-1])
        loader.add_value('stock', int(response.xpath(xpath).re(pattern % 'product_instock')[0] == 'Y'))
        product = loader.load_item()

        opt = True
        options = []
        for group in response.xpath('//div[@class="options-list"]/form/div'):
            for ul in group.select('./ul'):
                options.append(ul.select('./li'))
                try:
                    product['name'] += ' ' + ul.select('.//input[@checked]/../label/text()').extract()[0]
                except:
                    opt = False
            if group.select('./div'):
                if group.select('./div/ul'):
                    options.append(group.select('.//li'))
                    product['name'] += ' ' + group.select('.//input[@checked]/../label/text()').extract()[0]
                elif group.select('./div/select'):
                    option = {'name':group.select('./h2/text()').re('\d*\.(.+)')[0].strip()}
                    option['url_name'] = group.select('.//select/@name').extract()[0]
                    option['price'] = extract_price_eu(group.select('.//label/text()').extract()[0])
                    opts = []
                    for opt in group.select('.//select/option'):
                        d = option.copy()
                        d['selector'] = opt
                        opts.append(d)
                    options.append(opts)
                    product['name'] += ' ' + group.select('./h2/text()').re('\d*\.(.+)')[0].strip()
                    product['name'] += ' ' + group.select('.//option[@selected]/text()').extract()[0]
        if opt:
            yield product
        struct_id = response.xpath('//form[@id="formCombinationOptions"]/input[1]/@value').extract()
        if not struct_id:
            return
        struct_id = struct_id[0]
        struct_name = response.xpath('//form[@id="formCombinationOptions"]/input[1]/@name').extract()[0]
        url_pattern = 'http://www.maisonsdumonde.com/FR/fr/%s/productCombinationUpdate?%s=%s' %(struct_id, struct_name, struct_id)
        variants = itertools.product(*options)
        for variant in variants:
            item = Product(product)
            url = url_pattern
            head_id = ''
            for option in variant:
                if not type(option) is dict:
                    if option.select('.//@data-headref'):
                        head_id = option.select('.//@data-headref').extract()[0]
                    url += '&' + option.select('.//@name').extract()[0] + '=' + option.select('.//@value').extract()[0]
                    item['name'] += ' ' + option.select('.//label/text()').extract()[0]
                    item['identifier'] += '-' + option.select('.//input/@value').extract()[0]
                    price = option.select('.//span[@class="price"]/text()[preceding-sibling::br]').extract()
                    if price:
                        item['price'] += extract_price_eu(price[0])
                else:
                    url += '&' + option['url_name'] + '=' + option['selector'].select('.//@value').extract()[0]
                    item['name'] += ' ' + option['name'] + ' ' + option['selector'].select('./text()').extract()[0]
                    quantity = option['selector'].select('./@value').extract()[0]
                    item['identifier']  += '-' + quantity
                    item['price'] += option['price'] * int(quantity)
            if head_id:
                url += '&combinationProduct[head]=%s' %head_id
            yield Request(url, callback=self.parse_product)

예제 #27

0

파일 보기

 def _get_price(self, hxs):
     price = hxs.select('//*[@itemprop="price"]/text()').extract()
     if price:
         return extract_price_eu(price[0])
     return Decimal('0.0')

예제 #28

0

파일 보기

파일: room21_no.py 프로젝트: oceancloud82/scraping

    def parse_product(self, response):
        hxs = HtmlXPathSelector(response)

        if 'Egenskap2' in response.body:
            self.log('SECOND!!!!!!!!')

        has_options = hxs.select('//*[@id="OrderFalt"]//select[@name="Egenskap1"]')

        img = hxs.select('//img[@itemprop="image"]/@src').extract()
        category = hxs.select('//*[@id="breadcrumb"]//a/text()').extract()[1:]
        brand = hxs.select('//*[@id="VarumarkeFalt"]/a/img/@alt').extract()
        brand = brand[0] if brand else ''

        if has_options:
            for match in re.finditer(r"(?sim)Vektor_Rubrikartikel\[\d+\] = '(.*?)';", response.body_as_unicode()):
                loader = ProductLoader(item=Product(), selector=hxs)
                option = match.group(1)
                option = option.split('!div!')
                name = option[2]
                product_identifier = option[4]
                match = re.search(r'<span class="PrisREA">(\d+)<span>', option[1],
                                  re.DOTALL | re.IGNORECASE | re.MULTILINE)
                if match:
                    result = match.group(1)
                else:
                    match = re.search(r'<span class="PrisBOLD">(\d+)<span>', option[1],
                                  re.DOTALL | re.IGNORECASE | re.MULTILINE)
                    if match:
                        result = match.group(1)
                    else:
                        self.log('ERROR!!!! NO price!')
                        result = '0'
                price = extract_price_eu(result)
                stock = option[6]
                if 'Midlertidig utsolgt' in stock:
                    loader.add_value('stock', 0)
                loader.add_value('identifier', product_identifier)
                loader.add_value('sku', product_identifier)
                loader.add_value('url', response.url)
                loader.add_value('name', name)
                loader.add_value('price', price)
                if img:
                    loader.add_value('image_url', urljoin_rfc(get_base_url(response), img.pop()))
                loader.add_value('category', category)
                loader.add_value('brand', brand)
                yield loader.load_item()
        else:
            loader = ProductLoader(item=Product(), selector=hxs)
            product_identifier = hxs.select('//*[@id="ArtnrFalt"]/text()').extract()[0]
            loader.add_value('identifier', product_identifier)
            loader.add_value('sku', product_identifier)
            loader.add_value('url', response.url)
            loader.add_xpath('name', '//*[@id="ArtikelnamnFalt"]/text()')
            price = ''.join(hxs.select('//*[@id="PrisFalt"]/meta[@itemprop="price"]/@content').extract())
            price = extract_price_eu(price)
            loader.add_value('price', price)
            if img:
                loader.add_value('image_url', urljoin_rfc(get_base_url(response), img.pop()))
            loader.add_value('category', category)
            loader.add_value('brand', brand)
            stock = hxs.select('//*[@id="LevtidFaltMeta"]/@content').extract()[0].strip()
            if stock == 'Midlertidig utsolgt':
                loader.add_value('stock', 0)
            yield loader.load_item()

예제 #29

0

파일 보기

파일: fnac.py 프로젝트: oceancloud82/scraping

    def parse_offers_static_page(self, response):
        rows = response.css('#colsMP tr')
        if rows:
            rows = rows[1:]

        exclusive_online = False
        if response.meta.get('exclusive_online'):
            exclusive_online = True
        product_info = response.meta['product_info']
        base_identifier = product_info['base_identifier'].replace('mp', '')
        if not 'fcom' in base_identifier:
            base_identifier = 'fcom' + base_identifier

        self.seen.add(base_identifier.replace('fcom', ''))
        product_info = response.meta.get('product_info')
        for row in rows:
            if row.css('.fnacView'):
                self.log('Skipping Fnac direct product')
                continue
            status = row.css('td.gras').xpath('./text()').extract()
            if status and 'novo' not in status[0].lower():
                self.log('Skipping used product')
                continue

            price = row.css('.userPrice').xpath('./text()').extract()
            if not price:
                self.log('Price not found')
                continue
            else:
                price = price[0].replace(u'\xa0', '').strip()

            promotion_price = row.css('.oldPrice').xpath('./text()').extract()
            if promotion_price:
                promotion_price = extract_price_eu(promotion_price[0].replace(u'\xa0', '').strip())

            shipping_cost = row.css('.noir').xpath('./text()').extract()
            if shipping_cost:
                shipping_cost = extract_price_eu(shipping_cost[0].strip())

            dealer = row.css('.bleu_MP')
            if not dealer:
                self.log('Dealer not found')
                continue
            dealer_id = dealer.xpath('./a/@href').extract()[0].split('/')[-1]
            dealer_name = dealer.xpath('./a/strong/text()').extract()[0].strip()
        
            loader = ProductLoader(item=Product(), selector=row)
            identifier = base_identifier + '-' + dealer_id
            identifier = self.get_identifier(identifier)
            loader.add_value('identifier', identifier)
            loader.add_value('dealer', dealer_name)
            for c in ['name', 'category', 'brand', 'url', 'image_url', 'sku']:
                loader.add_value(c, product_info[c])
            loader.add_value('price', price)
            if shipping_cost:
                loader.add_value('shipping_cost', shipping_cost)

            product = loader.load_item()
            metadata = SonaeMeta()
            if exclusive_online:
                metadata['exclusive_online'] = 'Yes'

            metadata['delivery_24_48'] = 'Yes'

            if promotion_price:
                metadata['promotion_price'] = str(promotion_price)

            product['metadata'] = metadata
            if identifier in self.metadata_:
                prev_meta = self.metadata_[identifier]
            else:
                prev_meta = {}
            promo = promotion_price
            promo_start = prev_meta.get('promo_start')
            promo_end = prev_meta.get('promo_end')
            today = datetime.datetime.now().strftime('%Y-%m-%d')

            product['metadata']['extraction_timestamp'] = datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
            if promo:
                product['metadata']['promo_start'] = promo_start if promo_start and not promo_end else today
                product['metadata']['promo_end'] = ''
            else:
                if promo_start:
                    product['metadata']['promo_start'] = promo_start
                    product['metadata']['promo_end'] = today if not promo_end else promo_end

            yield product

예제 #30

0

파일 보기

파일: designandfurniture.py 프로젝트: oceancloud82/scraping

    def parse_product(self, response):
        hxs = HtmlXPathSelector(response)

        product_found = hxs.select('//div[@id="primary_block"]')
        if not product_found:
            return

        product_id = hxs.select('//input[@name="id_product"]/@value').extract()[0]
        name = hxs.select('//div[@id="dfCenter"]//h1/text()').extract()[0]
        category = hxs.select('//div[@class="breadcrumb"]/a/text()').extract()[1:]
        image_url = hxs.select('//img[@id="bigpic"]/@src').extract()
        if image_url:
            image_url = image_url[0]
        product_url = response.url
        product_brand = hxs.select('//div[@id="short_description_content"]//p[1]//text()').extract()[0]
        product_brand = product_brand.replace(' di ', ' da ')
        product_brand = product_brand.replace(' by ', ' da ')
        try:
            if len(product_brand) > 20:
                product_brand = re.search(' da.+?[,.]', product_brand).group(0)
        except:
            pass
        product_brand = product_brand.split(' da ')[-1]
        product_brand = product_brand.strip().strip('.,')
        if len(product_brand) > 20:
            title = hxs.select('//title/text()').extract()[0]
            s = SequenceMatcher(a=product_brand.title(), b=title.title())
            m = s.find_longest_match(0, len(s.a), 1, len(s.b))
            product_brand = s.a[m[0]:m[0]+m[-1]].strip()
        if len(product_brand) < 7 or ' ' not in product_brand:
            product_brand = None
        currencyRate = re.search('var currencyRate\D+([\d\.]+)', response.body)
        if currencyRate:
            currencyRate = Decimal(currencyRate.group(1))
        else:
            currencyRate = 1

        taxRate = re.search("var taxRate\D+([\d\.]+)", response.body)
        if taxRate:
            taxRate = Decimal(taxRate.group(1))
        else:
            taxRate = 0

        reduction_percent = re.search("var reduction_percent\D+([\d\.]+)", response.body)
        if reduction_percent:
            reduction_percent = Decimal(reduction_percent.group(1))
        else:
            reduction_percent = 0

        reduction_price = re.search("var reduction_price\D+([\d\.]+)", response.body)
        if reduction_price:
            reduction_price = Decimal(reduction_price.group(1))
        else:
            reduction_price = 0

        productPriceTaxExcluded = re.search("var productPriceTaxExcluded\D+([\d\.]+)", response.body)
        if productPriceTaxExcluded:
            productPriceTaxExcluded = Decimal(productPriceTaxExcluded.group(1))
        else:
            productPriceTaxExcluded = 0

        idDefaultImage = re.search('var idDefaultImage = (\d+)', response.body)
        if idDefaultImage:
            idDefaultImage = idDefaultImage.group(1)

        
        if re.search('addCombination.*?;', response.body):
            # here we parse option tags for more product options.
            option_value_xpath = '//div[@id="attributes"]//select/option/@value'
            option_values = hxs.select(option_value_xpath).extract()
            option_text_xpath = '//div[@id="attributes"]//select/option//text()'
            option_texts = hxs.select(option_text_xpath).extract()

            # build the lookup table.
            options = {}
            for i in range(len(option_values)):
                options[option_values[i]] = option_texts[i]

            # addCombination(5631, new Array('259'), 11, 109.99, 0, -1, 'GGT3050', 0.00, 1);
            for x in re.finditer('addCombination\((.*?)\);', response.body):
                s = x.group(0).split(',')
                offset = Decimal(s[-6])

                # determining place of options keys
                option_key_start = 1
                option_key_end = len(s) - 7

                # parsing option keys
                option_texts = []
                opt = ''
                for i in range(option_key_start, option_key_end):
                    try:
                        opt = re.sub('[^\d]+', '', s[i])
                        option_text = options[opt]
                    except:
                        pass
                    if len(option_text) > 0:
                        option_texts.append(option_text.strip())

                price = productPriceTaxExcluded + offset * currencyRate
                tax = (taxRate / Decimal('100')) + 1
                price = price * tax
                reduction = Decimal('0')
                if reduction_price or reduction_percent:
                    reduction = price * (reduction_percent / Decimal('100')) + reduction_price
                    price = price - reduction
                price = round(price, 2)
                loader = ProductLoader(response=response, item=Product())
                loader.add_value('url', product_url)
                loader.add_value('name', name + ' ' + ' '.join(option_texts))

                image_id = s[-4].strip(" '")
                if image_url and image_id != "-1" and image_id != idDefaultImage:
                    loader.add_value('image_url', image_url.replace('-' + idDefaultImage + '-', '-' + image_id + '-'))
                else:
                    loader.add_value('image_url', image_url)

                loader.add_value('brand', product_brand)
                loader.add_value('price', price)
                loader.add_value('category', category)
                loader.add_value('identifier', '%s-%s' % (product_id, re.search(r'(\d+)', s[0]).group(1)))
                loader.add_value('sku', s[-3].strip("' ").decode('utf8'))

                yield loader.load_item()
        else:
            loader = ProductLoader(response=response, item=Product())
            loader.add_value('url', product_url)
            loader.add_value('name', name)
            loader.add_value('image_url', image_url)
            loader.add_xpath('price', '//*[@id="our_price_display"]/text()', lambda x: extract_price_eu(x[0]) if x else Decimal('0'))
            loader.add_value('category', category)
            loader.add_value('identifier', product_id)
            loader.add_xpath('sku', '//*[@id="product_reference"]/span/text()')
            loader.add_value('brand', product_brand)

            yield loader.load_item()