Exemple #1
0
    def detail(self, response):
        log.msg(response.url)
        hxs = HtmlXPathSelector(response)
        variants_price = hxs.select(
            "//div[@class='fleft catbox pricerate']//span/text()").extract()
        variants_seller = hxs.select(
            "//div[@class='catbox fleft storeimage']/img/@alt").extract()
        quantitylist = []
        pricelist = []
        items = []

        if (len(variants_price) != 0
                or variants_price != None) and (len(variants_seller)
                                                or variants_seller != None):
            for price, seller in zip(variants_price, variants_seller):
                item = BillionPricesIndiaItem()
                item['date'] = time.strftime("%d/%m/%Y")
                item['vendor'] = seller.split(" ")[-1:][0]
                item['product'] = response.url.split('/')[-1].split(".")[0]
                itemprice = re.sub('[,]', '', price).split(" ")[-1:][0]
                item['category'] = "tablets"
                item['price'] = float(itemprice)
                item['quantity'] = '1'
                item['measure'] = 'pcs'
                item['unitprice'] = float(itemprice)

                items.append(item)
        return items
Exemple #2
0
    def parse(self, response):
        log.msg(response.url)
        hxs = HtmlXPathSelector(response)
        items = []
        variants_date = hxs.select("//span[@class='normal']//text()").extract()
        variants_price = hxs.select(
            "//table[@id='objContPreviousPrices_grdPreviousPrices']//tr//td[@class='normal']//text()"
        ).extract()

        price_items = self.__group_iter(variants_price, 4)
        av_price = []
        for price_list in price_items:
            av_price.append(
                reduce(
                    lambda x, y: float(x) + float(y) / float(len(price_list)),
                    price_list, 0))
        for price, date in zip(variants_price, variants_date):
            item = BillionPricesIndiaItem()
            quantity = '1 lt'
            item['date'] = date
            item['vendor'] = "ioc"
            item['product'] = "gasoline"
            item['category'] = "oil and gas"

            value, measure, unitprice = self.__unit_price(price, quantity)
            item['price'] = price
            item['quantity'] = value
            item['measure'] = measure
            item['unitprice'] = unitprice

            items.append(item)
        return items
Exemple #3
0
 def detail(self, response):
     log.msg(response.url)
     hxs = HtmlXPathSelector(response)
     product_name = hxs.xpath(
         '//*[@id="vip_content_section"]/div[2]/h1/text()').extract()
     # //*[@id="vip_content_section"]/div[2]/h1
     if (len(product_name) != 0):
         product_name = hxs.xpath(
             '//*[@id="vip_content_section"]/div[2]/h1/text()').extract()[0]
     product_price = hxs.xpath('//*[@id="price-val"]/text()').extract()
     if (len(product_price) != 0):
         product_price = hxs.xpath(
             '//*[@id="price-val"]/text()').extract()[0]
     if (len(product_price) != 0
             or product_price != None) and (len(product_name)
                                            or product_name != None):
         l = ItemLoader(item=BillionPricesIndiaItem(), response=response)
         l.add_xpath('product_name',
                     '//*[@id="vip_content_section"]/div[2]/h1/text()')
         # l.add_xpath('quantity', '//*[@id="product_detail_view_1"]/div/div[1]/div/text()')
         l.add_xpath('category', '//*[@id="cat_crum"]/@value')
         l.add_xpath('product', '//*[@id="overview_tab"]/div/div/p/text()')
         item = l.load_item()
         item['product_url'] = response.url
         item['price'] = product_price
         item['vendor'] = 'PepperFry'
         item['city'] = 'Mumbai'
         item['state'] = 'Maharashtra'
         item['country'] = 'India'
         item['date'] = str(time.strftime("%d/%m/%Y"))
         return item
 def parse(self, response):
     results = json.loads(response.body)
     for result in results['product']:
         item = BillionPricesIndiaItem()
         item['product'] = result['model']
         category = result['section']
         item['category'] = category[0].upper() + category[1:]
         if len(result['stores']) > 0:
             for store in result['stores']:
                 price = float(store['price'])
                 item['date'] = str(time.strftime("%d/%m/%Y"))
                 item['vendor'] = store['website']
                 item['quantity'] = 1
                 item['measure'] = 'pcs'
                 item['price'] = price
                 item['unitprice'] = price
                 yield item
Exemple #5
0
    def parse_product(self, response):
        product_url = response.url
        # sel = self.selenium
        #sel.open(response.url)
        #time.sleep(2.5)

        selector = Selector(response)

        # //*[@id="product_detail_view_1"]/div/div[6]/div[2]/span[2]
        price = selector.xpath(
            '//*[@id="product_detail_view_1"]/div/div[7]/div[2]/span[2]/text()'
        ).extract()
        if not price:
            price = selector.xpath(
                '//*[@id="product_detail_view_1"]/div/div[6]/div[2]/span[2]/text()'
            ).extract()
        if not price:
            price = selector.xpath(
                '//*[@id="product_detail_view_1"]/div/div[5]/div[2]/span[2]/text()'
            ).extract()
        if not price:
            price = selector.xpath(
                '//*[@id="product_detail_view_1"]/div/div[4]/div[2]/span[2]/text()'
            ).extract()
        l = ItemLoader(item=BillionPricesIndiaItem(), response=response)
        l.add_xpath('product_name',
                    '//*[@id="inner"]/div[1]/div[1]/div/div/text()')
        l.add_xpath('quantity',
                    '//*[@id="product_detail_view_1"]/div/div[1]/div/text()')
        l.add_xpath('category',
                    '//*[@id="inner"]/div[1]/div[1]/div/a[1]/text()')
        l.add_xpath('product',
                    '//*[@id="inner"]/div[1]/div[1]/div/a[2]/text()')
        item = l.load_item()
        item['product_url'] = product_url
        item['price'] = price
        item['vendor'] = 'Local Banya'
        item['city'] = 'Mumbai'
        item['state'] = 'Maharashtra'
        item['country'] = 'India'
        item['date'] = str(time.strftime("%d/%m/%Y"))

        return item
 def parse_products(self, response):
     hxs = HtmlXPathSelector(response)
     product_containers = hxs.xpath(
         '//*[@class="product-container floatL"]')
     for product in product_containers.xpath('..//div/a'):
         l = ItemLoader(item=BillionPricesIndiaItem(), response=response)
         item = l.load_item()
         item['product_url'] = product.xpath('@href').extract()[0]
         item['product_name'] = product.xpath(
             './/*[@class=""]/text()').extract()[0]
         item['price'] = product.xpath(
             'div[3]/div[2]/div[1]/text()').extract()[0]
         item['quantity'] = product.xpath(
             'div[3]/div[1]/span[1]/text()').extract()[0]
         item['vendor'] = 'LocalBanya'
         item['city'] = 'Mumbai'
         item['state'] = 'Maharashtra'
         item['country'] = 'India'
         item['date'] = str(time.strftime("%d/%m/%Y"))
         print item
         return item
Exemple #7
0
    def detail(self, response):

        log.msg(response.url)
        productTitle = response.url.split("/")[-2]
        hxs = HtmlXPathSelector(response)
        variants = hxs.select(
            "//div[@class='uiv2-size-variants']/label/text()").extract()
        quantitylist = []
        pricelist = []
        items = []
        productList = []
        if len(variants) != 0 or variants != None:
            for variant in variants:
                quantity = variant.split('-')[0].strip()
                price = re.findall(r'[Rs ]\d+\.?\d*', variant)
                if quantity not in quantitylist or price not in pricelist and productTitle + quantity not in productList:
                    item = BillionPricesIndiaItem()
                    quantitylist.append(quantity)
                    item['date'] = str(time.strftime("%d/%m/%Y"))
                    item['vendor'] = 'bigbasket'
                    item['product'] = productTitle
                    item['category'] = self.category
                    p_price = ""
                    if len(price) == 1:
                        pricelist.append(price)
                        item['price'] = price[0].strip()
                        p_price = price[0].strip()
                    elif len(price) != 1:
                        pricelist.append(price)
                        item['price'] = price[1].strip()
                        p_price = price[1].strip()
                    if self.__unit_price(p_price, quantity) is not None:
                        value, measure, unitprice = self.__unit_price(
                            p_price, quantity)
                        item['quantity'] = value
                        item['measure'] = measure
                        item['unitprice'] = unitprice
                        items.append(item)
        else:
            price = hxs.select("//div[@class='uiv2-price']/text()").extract()
            quantity = hxs.select("//div[@class='uiv2-field-wrap mt10']/text()"
                                  ).extract()[0].strip()
            if productTitle + quantity not in productList:
                item = BillionPricesIndiaItem()
                item['date'] = str(time.strftime("%d/%m/%Y"))
                item['vendor'] = 'bigbasket'
                item['product'] = productTitle
                item['category'] = self.category

                if len(price) == 1 and price not in pricelist:
                    item['price'] = price[0].split(" ")[-1:][0].strip()
                    p_price = price[0].split(" ")[-1:][0].strip()
                elif len(price) != 1 and price not in pricelist:
                    item['price'] = price[1].split(" ")[-1:][0].strip()
                    p_price = price[1].split(" ")[-1:][0].strip()

                value, measure, unitprice = self.__unit_price(
                    p_price, quantity)
                item['quantity'] = value
                item['measure'] = measure
                item['unitprice'] = unitprice
                items.append(item)

        return items