def detail(self, response): log.msg(response.url) hxs = HtmlXPathSelector(response) variants_price = hxs.select( "//div[@class='fleft catbox pricerate']//span/text()").extract() variants_seller = hxs.select( "//div[@class='catbox fleft storeimage']/img/@alt").extract() quantitylist = [] pricelist = [] items = [] if (len(variants_price) != 0 or variants_price != None) and (len(variants_seller) or variants_seller != None): for price, seller in zip(variants_price, variants_seller): item = BillionPricesIndiaItem() item['date'] = time.strftime("%d/%m/%Y") item['vendor'] = seller.split(" ")[-1:][0] item['product'] = response.url.split('/')[-1].split(".")[0] itemprice = re.sub('[,]', '', price).split(" ")[-1:][0] item['category'] = "tablets" item['price'] = float(itemprice) item['quantity'] = '1' item['measure'] = 'pcs' item['unitprice'] = float(itemprice) items.append(item) return items
def parse(self, response): log.msg(response.url) hxs = HtmlXPathSelector(response) items = [] variants_date = hxs.select("//span[@class='normal']//text()").extract() variants_price = hxs.select( "//table[@id='objContPreviousPrices_grdPreviousPrices']//tr//td[@class='normal']//text()" ).extract() price_items = self.__group_iter(variants_price, 4) av_price = [] for price_list in price_items: av_price.append( reduce( lambda x, y: float(x) + float(y) / float(len(price_list)), price_list, 0)) for price, date in zip(variants_price, variants_date): item = BillionPricesIndiaItem() quantity = '1 lt' item['date'] = date item['vendor'] = "ioc" item['product'] = "gasoline" item['category'] = "oil and gas" value, measure, unitprice = self.__unit_price(price, quantity) item['price'] = price item['quantity'] = value item['measure'] = measure item['unitprice'] = unitprice items.append(item) return items
def detail(self, response): log.msg(response.url) hxs = HtmlXPathSelector(response) product_name = hxs.xpath( '//*[@id="vip_content_section"]/div[2]/h1/text()').extract() # //*[@id="vip_content_section"]/div[2]/h1 if (len(product_name) != 0): product_name = hxs.xpath( '//*[@id="vip_content_section"]/div[2]/h1/text()').extract()[0] product_price = hxs.xpath('//*[@id="price-val"]/text()').extract() if (len(product_price) != 0): product_price = hxs.xpath( '//*[@id="price-val"]/text()').extract()[0] if (len(product_price) != 0 or product_price != None) and (len(product_name) or product_name != None): l = ItemLoader(item=BillionPricesIndiaItem(), response=response) l.add_xpath('product_name', '//*[@id="vip_content_section"]/div[2]/h1/text()') # l.add_xpath('quantity', '//*[@id="product_detail_view_1"]/div/div[1]/div/text()') l.add_xpath('category', '//*[@id="cat_crum"]/@value') l.add_xpath('product', '//*[@id="overview_tab"]/div/div/p/text()') item = l.load_item() item['product_url'] = response.url item['price'] = product_price item['vendor'] = 'PepperFry' item['city'] = 'Mumbai' item['state'] = 'Maharashtra' item['country'] = 'India' item['date'] = str(time.strftime("%d/%m/%Y")) return item
def parse(self, response): results = json.loads(response.body) for result in results['product']: item = BillionPricesIndiaItem() item['product'] = result['model'] category = result['section'] item['category'] = category[0].upper() + category[1:] if len(result['stores']) > 0: for store in result['stores']: price = float(store['price']) item['date'] = str(time.strftime("%d/%m/%Y")) item['vendor'] = store['website'] item['quantity'] = 1 item['measure'] = 'pcs' item['price'] = price item['unitprice'] = price yield item
def parse_product(self, response): product_url = response.url # sel = self.selenium #sel.open(response.url) #time.sleep(2.5) selector = Selector(response) # //*[@id="product_detail_view_1"]/div/div[6]/div[2]/span[2] price = selector.xpath( '//*[@id="product_detail_view_1"]/div/div[7]/div[2]/span[2]/text()' ).extract() if not price: price = selector.xpath( '//*[@id="product_detail_view_1"]/div/div[6]/div[2]/span[2]/text()' ).extract() if not price: price = selector.xpath( '//*[@id="product_detail_view_1"]/div/div[5]/div[2]/span[2]/text()' ).extract() if not price: price = selector.xpath( '//*[@id="product_detail_view_1"]/div/div[4]/div[2]/span[2]/text()' ).extract() l = ItemLoader(item=BillionPricesIndiaItem(), response=response) l.add_xpath('product_name', '//*[@id="inner"]/div[1]/div[1]/div/div/text()') l.add_xpath('quantity', '//*[@id="product_detail_view_1"]/div/div[1]/div/text()') l.add_xpath('category', '//*[@id="inner"]/div[1]/div[1]/div/a[1]/text()') l.add_xpath('product', '//*[@id="inner"]/div[1]/div[1]/div/a[2]/text()') item = l.load_item() item['product_url'] = product_url item['price'] = price item['vendor'] = 'Local Banya' item['city'] = 'Mumbai' item['state'] = 'Maharashtra' item['country'] = 'India' item['date'] = str(time.strftime("%d/%m/%Y")) return item
def parse_products(self, response): hxs = HtmlXPathSelector(response) product_containers = hxs.xpath( '//*[@class="product-container floatL"]') for product in product_containers.xpath('..//div/a'): l = ItemLoader(item=BillionPricesIndiaItem(), response=response) item = l.load_item() item['product_url'] = product.xpath('@href').extract()[0] item['product_name'] = product.xpath( './/*[@class=""]/text()').extract()[0] item['price'] = product.xpath( 'div[3]/div[2]/div[1]/text()').extract()[0] item['quantity'] = product.xpath( 'div[3]/div[1]/span[1]/text()').extract()[0] item['vendor'] = 'LocalBanya' item['city'] = 'Mumbai' item['state'] = 'Maharashtra' item['country'] = 'India' item['date'] = str(time.strftime("%d/%m/%Y")) print item return item
def detail(self, response): log.msg(response.url) productTitle = response.url.split("/")[-2] hxs = HtmlXPathSelector(response) variants = hxs.select( "//div[@class='uiv2-size-variants']/label/text()").extract() quantitylist = [] pricelist = [] items = [] productList = [] if len(variants) != 0 or variants != None: for variant in variants: quantity = variant.split('-')[0].strip() price = re.findall(r'[Rs ]\d+\.?\d*', variant) if quantity not in quantitylist or price not in pricelist and productTitle + quantity not in productList: item = BillionPricesIndiaItem() quantitylist.append(quantity) item['date'] = str(time.strftime("%d/%m/%Y")) item['vendor'] = 'bigbasket' item['product'] = productTitle item['category'] = self.category p_price = "" if len(price) == 1: pricelist.append(price) item['price'] = price[0].strip() p_price = price[0].strip() elif len(price) != 1: pricelist.append(price) item['price'] = price[1].strip() p_price = price[1].strip() if self.__unit_price(p_price, quantity) is not None: value, measure, unitprice = self.__unit_price( p_price, quantity) item['quantity'] = value item['measure'] = measure item['unitprice'] = unitprice items.append(item) else: price = hxs.select("//div[@class='uiv2-price']/text()").extract() quantity = hxs.select("//div[@class='uiv2-field-wrap mt10']/text()" ).extract()[0].strip() if productTitle + quantity not in productList: item = BillionPricesIndiaItem() item['date'] = str(time.strftime("%d/%m/%Y")) item['vendor'] = 'bigbasket' item['product'] = productTitle item['category'] = self.category if len(price) == 1 and price not in pricelist: item['price'] = price[0].split(" ")[-1:][0].strip() p_price = price[0].split(" ")[-1:][0].strip() elif len(price) != 1 and price not in pricelist: item['price'] = price[1].split(" ")[-1:][0].strip() p_price = price[1].split(" ")[-1:][0].strip() value, measure, unitprice = self.__unit_price( p_price, quantity) item['quantity'] = value item['measure'] = measure item['unitprice'] = unitprice items.append(item) return items