Example #1
0
def create_new_category(category,subcategory):
    new_category=Category()
    new_category.category_name=category
    if (len(subcategory)==3):
        new_category.subcategory_1=subcategory[0]
        new_category.subcategory_2=subcategory[1]
        new_category.subcategory_3=subcategory[2]
    elif(len(subcategory)==2):
        new_category.subcategory_1=subcategory[0]
        new_category.subcategory_2=subcategory[1]
    elif(len(subcategory)==1):
        new_category.subcategory_1=subcategory[0]
    else:
        print "no sub category"
    new_category.save()
    return new_category
 def parse_item(self, response):
     try:
         #print ">>>>>", response.request.url
         sel = Selector(response)
         items = []
         item = BuildkarItem()
         item['url'] = response.request.url
         #import pdb;pdb.set_trace()
         title = sel.xpath('//h1[@itemprop="name"]/text()').extract()
         #print len(title)
         item['title'] = title
         category = sel.xpath(
             '//nav[@itemprop="breadcrumb"]/a/text()').extract()
         level = len(category)
         item['level'] = level
         category = str(category).replace("u'", "").replace(
             "[", "").replace("]", "").replace("Home",
                                               "").replace("',", "")
         item['category'] = category
         try:
             item['brand'] = sel.xpath(
                 '//p[@itemprop="brand"]/text()').extract()
         except:
             pass
         price1 = str(
             list(
                 map(
                     unicode.strip,
                     sel.xpath(
                         '//p[@class="price"]/del/span[@class="amount"]/text()'
                     ).extract())))
         price1 = price1.replace("u'", "").replace("[", "").replace(
             "]", "").replace("Rs.\\xa0", "").replace("'",
                                                      "").replace('"', '')
         item['price1'] = price1
         price = str(
             list(
                 map(
                     unicode.strip,
                     sel.xpath(
                         '//p[@class="price"]//ins/span[@class="amount"]/text()'
                     ).extract())))
         price = price.replace("u'", "").replace("[", "").replace(
             "]", "").replace("Rs.\\xa0", "").replace("'",
                                                      "").replace('"', '')
         item['price'] = price
         desc = str(
             list(
                 map(
                     unicode.strip,
                     sel.xpath('//div[@id="content_description"]//text()').
                     extract())))
         desc = desc.replace("u'",
                             "").replace("[",
                                         "").replace("]",
                                                     "").replace("'", "")
         item['desc'] = desc
         variant = sel.xpath('//input[@name="quantity"]/@min').extract()
         item['Variant'] = variant
         item1 = BaseProducts()
         item1.source_url = item['url']
         item1.Sku = item['title'][0]
         item1.title = item['title'][0]
         item1.category_name = item['category']
         item1.description = item['desc']
         item1.source_id = 5
         item1.save()
         item2 = Subscribe_Product()
         item3 = SPM()
         item2.bp = item1
         item2.source_id = 5
         item2.Sku = item['title'][0]
         item2.Variant = item['Variant'][0]
         item2.save()
         item3.sp = item2
         item3.Sku = item['title'][0]
         try:
             item3.price = item['price']
         except:
             item3.price = 0
         try:
             item3.store_price = item['price1']
         except:
             item3.store_price = 0
         item3.source_id = 5
         #item3.saller = item['Saller']
         item3.save()
         item4 = Category()
         item4.category_name = item['category']
         item4.category_path = item['category']
         item4.level = "3"
         item4.source_id = 5
         item4.save()
         if item['title'] and item['category'] and item['price'] and item[
                 'Variant']:
             return item
     except:
         pass
    def parse(self, response):
        print ">>>>>", response.request.url
        sel = Selector(response)
        items = []
        data = []
        data1 = []
        item = SnapdealItem()
        self.driver.get(response.url)
        item['url'] = response.request.url
        #import pdb;pdb.set_trace()
        item['title'] = sel.xpath('//h1[@itemprop="name"]/text()').extract()
        item['Product_id'] = str(item['url']).split('/')[-1].replace(
            "#bcrumbLabelId:892", "")
        brand = sel.xpath('//i[@itemprop="name"]/text()').extract()
        item['brand'] = brand
        price = str(
            list(
                map(
                    unicode.strip,
                    sel.xpath('//p[@class="product-offer-price"]/text()').
                    extract())))
        price = price.replace("Rs.",
                              "").replace("[", "").replace("]", "").replace(
                                  "u'", "").replace(",", "").replace("'", "")
        item['price'] = price
        desc1 = str(
            list(
                map(
                    unicode.strip,
                    sel.xpath('//ul[@class="dtls-list clear"]//li//text()').
                    extract())))
        desc1 = desc1.replace("[", "").replace("]", "").replace("u'", "")
        item['desc1'] = desc1
        desc = str(
            list(
                map(
                    unicode.strip,
                    sel.xpath(
                        '//div[@itemprop="description"]//text()').extract())))
        desc = desc.replace("[", "").replace("]", "").replace("u'", "")
        item['desc'] = desc
        try:

            saller = str(
                list(
                    map(
                        unicode.strip,
                        sel.xpath(
                            '//a[@class="pdp-e-seller-info-name reset-margin"]/text()'
                        ).extract())))
            saller = saller.replace("u'", "").replace("[", "").replace("]", "")
        except:
            pass
        item['Saller'] = saller
        category_path = sel.xpath(
            '//div//span[@itemprop="title"]/text()').extract()
        level = len(category_path)
        item['level'] = level
        category_path = str(category_path).replace("u'", "").replace(
            "Home',",
            "").replace("[", "").replace("]", "").replace('"',
                                                          '').replace("'", "")
        item['category_path'] = category_path
        for o1 in self.driver.find_elements_by_xpath(
                '//*[@id="attribute-select-0"]/ul//li'):
            variant1 = o1.text
            print "var1", variant1
            if o1: o1.click()
            time.sleep(5)
            for x2 in self.driver.find_elements_by_xpath(
                    '//span//ul/li[@class="rippleGrey btn btn-toggle pdpAttr attrActive"]'
            ):
                variant2 = x2.text
                print "var2", variant2
                if x2: x2.click()
                time.sleep(5)
                for x1 in self.driver.find_elements_by_xpath(
                        '//*[@id="buyPriceBox"]/div[3]/div[2]/span[2]/span'):
                    price = x1.text
                    item['price'] = price.encode('ascii',
                                                 'ignore').replace(",", "")
                    #len(item['price'])
                    Variant = variant1, variant2
                    item['Variant'] = str(Variant)
                    self.spamwriter.writerow(item["price"])
                    print "----", self.count, item["price"], item['Variant']
                    self.count += 1
                    data.append(item["price"])
                    data1.append(item['Variant'])
                    print len(data)
        item1 = BaseProducts()
        item1.brand_name = item['brand'][0]
        item1.title = item['title'][0]
        item1.Product_id = item['Product_id']
        item1.Sku = item['title'][0]
        item1.category_name = "Building Material"
        item1.description = item['desc']
        item1.additional_information = item['desc1']
        item1.source_url = item['url']
        try:
            item1.unit_measurements = item['um'][0]
        except:
            item1.unit_measurements = ""
        item1.source_id = 2
        item1.save()
        item2 = Subscribe_Product()
        item3 = SPM()
        item2.bp = item1
        item2.source_id = 2
        item2.Sku = item['title'][0]
        item2.save()
        item3.sp = item2
        item3.Sku = item['title'][0]
        try:
            item3.price = float(item['price'])
        except:
            item3.price = 0
        item3.source_id = 2
        item3.saller = item['Saller']
        item3.save()
        item4 = Category()
        item4.category_name = "Building Material"
        item4.category_path = item['category_path']
        item4.level = item['level']
        item4.source_id = 2
        item4.save()

        for y1 in range(len(data)):
            print "datadata", data[y1]
            print "datadata111", data1[y1]
            item2 = Subscribe_Product()
            item3 = SPM()
            item2.bp = item1
            item2.source_id = 2
            item2.Sku = item['title'][0]
            item2.Variant = data1[y1]
            item2.save()
            item3.sp = item2
            item3.Sku = item['title'][0]
            try:
                item3.price = data[y1]
            except:
                item3.price = 0
            item3.source_id = 2
            item3.saller = item['Saller']
            item3.save()

        print item
        return item
Example #4
0
	def parse(self, response) :
		sel = Selector (response)
		data = []
		data1 = []
		items=[]
		item = MaterialtreeItem ()
		self.driver.get(response.url)
		time.sleep(2.5)
		item['url'] = response.request.url
		#import pdb;pdb.set_trace()
		item['title'] = sel.xpath('//div/h1/text()').extract()
		um1 = um2 = ""
		try:
			um1 = str(list(map(unicode.strip,sel.xpath('//span[@id="sellerPrice"]/text()').extract())))
			um1 = um1.split('/')[1]
			um1 = um1.replace("u'","").replace("Rs.","").replace("[","").replace("]","").replace("'","").replace('"','')
		except: pass	
		try:
			um2 = self.driver.find_element_by_xpath('//*[@id="ddContent"]/table/tbody//tr/td[6]')
			um2 = um2.text
			um2 = str(um2).replace("u'","")
			print "<<<<",um2
		except: pass 
		item['um'] = um2 or um1
		Sku1 = Sku2 = ""
		try:
			Sku1 = sel.xpath('//div[@class="sku"]/span[@class="value"]/text()').extract()
		except:pass
		try:
			Sku2 = sel.xpath('//div/h1/text()').extract()
		except: pass	
		item['Sku'] = Sku1 or Sku2
		try:
			item['Product_id'] = sel.xpath('//input[@name="productid"]/@value').extract()
		except: pass
		price11 = price22 = ""		

		try:
			price22 = self.driver.find_element_by_xpath('//*[@id="ddContent"]/table/tbody/tr[2]/td[7]/h5')
			price22 = price22.text
			price22 = str(price22).replace(".00","").replace(",","").replace("Rs","").replace("u'","")
			print ">>>",price22
		except: pass
		try:
			price11 = str(list(map(unicode.strip,sel.xpath('//span[@id="sellerPrice"]/text()').extract())))
			price11 = price11.split('/')[0]
			price11 = price11.replace("u'","").replace("Rs.","").replace("[","").replace("]","")
		except: pass
		item['price'] = price22 or price11
		try:
			seller = str(list(map(unicode.strip,sel.xpath('//span[@id="sold_by_name"]/a/text()').extract())))
			seller = seller.replace("u'","").replace("[","").replace("]","")
		except: pass
		item['Saller'] = seller
		#import pdb;pdb.set_trace()
		category = item['url'].split('/')[-1]
		category = str(category).split('-')[7:10]
		category = str(category).replace("'","").replace(",","").replace("[","").replace("]","")
		item['category'] = category
		desc = desc_new = ""
		try:
			desc = str(list(map(unicode.strip,sel.xpath('//div[@class="panel"]/div[@class="std"]/text()').extract())))
			desc = desc.replace("u'","").replace("[","").replace("]","")
		except:pass 
		try:
			desc_new = self.driver.find_element_by_xpath('//div[@id="category-description"]/p')
			desc_new = desc_new.text
			desc_new = str(desc_new).replace("u'","")
			print"......",desc_new
		except: pass 
		item['desc'] =  desc_new or desc
		try:
			desc1 = str(list(map(unicode.strip,sel.xpath('//table[@class="data-table"]/tbody//text()').extract())))
			desc1 = desc1.replace("u'","").replace("[","").replace("]","").replace("', ',","")
		except: pass	
		item['desc1'] = desc1
		brand1 = brand2 = ""
		try:
			brand1 = self.driver.find_element_by_xpath('//*[@id="ddContent"]/table/tbody//tr/td[3]')
			brand1 = brand1.text
			brand1 = str(brand1).replace("u'","")
		except: pass	
		try:
			brand2 = sel.xpath('//div[@class="brand-name attribute"]/text()').extract()
			brand2 = str(brand2).replace("u'","").replace("[","").replace("]","")
		except: pass
		item['brand'] = brand1 or brand2

		for x2 in  self.driver.find_elements_by_xpath("//*[@id='ddContent']/table/tbody//tr//h4"):
			price = x2.text
			item['price'] = price.encode('ascii', 'ignore').replace(",","").replace("Rs","")
			self.spamwriter.writerow(item['price'])
			print "----", self.count, item['price']
			self.count += 1
			data1.append(item['price'])
			
		row = self.driver.find_elements_by_xpath("//*[@id='ddContent']/table/tbody//tr")
		for x1 in row:
			variant = x1.text
			Variant = str(variant).split("/")[:2]
			item['Variant'] = Variant
			#print item['Variant']
			self.spamwriter.writerow(item['Variant'])
			print "----", self.count, item['Variant']
			self.count += 1
			data.append(item['Variant'])
			print">>>>>>>>", len(data)
			# if len(data)<7:
			# 	continue
			# else:break
			
		
		item1 = BaseProducts()	
		item1.brand_name = item['brand']
		item1.title = item['title'][0]
		try:
			item1.Product_id = item['Product_id'][0]
		except:
			item1.Product_id = ""
		item1.Sku = item['Sku'][0]
		#item1.category_name=  
		item1.category_name = item['category']
		item1.description=item['desc']
		item1.additional_information = item['desc1']
		item1.source_url = item['url']
		try:
			item1.unit_measurements = item['um']
		except:	item1.unit_measurements = ""
		item1.source_id = 3
		item1.save()
		item2 = Subscribe_Product()
		item3 = SPM()
		item2.bp = item1
		item2.source_id =3
		#item2.region_name = item['region_name']
		item2.Sku = item['Sku'][0]
		# item2.Variant = 
		item2.save()
		item3.sp = item2
		item3.Sku = item['Sku'][0]
		#item3.region_name = item['region_name']
		try:
			item3.store_price = float(item['price1'])
		except: item3.store_price = 0
		#item3.Delivery_time = item['Delivery_time']
		try:
			item3.price = float(item['price'])
		except : item3.price = 0
		item3.source_id = 3
		item3.saller = item['Saller']
		item3.save()
		item4 = Category()
		item4.source_id = 3
		item4.category_path = 'kitchen'+item['category']
		item4.category_name = item['category']
		item4.level = 3
		item4.save()
		for y1 in range(len(data)):
			print"datadata..........", data[y1]
			item2 = Subscribe_Product()
			item3 = SPM()
			item2.bp = item1
			item2.source_id =3
			item2.Sku = item['Sku'][0]
			item2.Variant = data[y1]
			item2.save()
			item3.sp = item2
			item3.Sku = item['Sku'][0]
			try:
				item3.price = data1[y1]
			except : item3.price = 0
			item3.source_id = 3
			item3.saller = item['Saller']
			item3.save()
			
		print item	
		return item
	def parse(self, response) :
		print ">>>>>", response.request.url 
		sel = Selector (response)
		items=[]
		data = []
		data1 = []
		item =  SnapdealItem()
		self.driver.get(response.url)
		item['url'] = response.request.url
		#import pdb;pdb.set_trace()
		item['title'] = sel.xpath('//h1[@itemprop="name"]/text()').extract()
		item['Product_id'] = str(item['url']).split('/')[-1].replace("#bcrumbLabelId:892","")
		brand = sel.xpath('//i[@itemprop="name"]/text()').extract()
		item['brand'] = brand
		price = str(list(map(unicode.strip,sel.xpath('//p[@class="product-offer-price"]/text()').extract())))
		price = price.replace("Rs.","").replace("[","").replace("]","").replace("u'","").replace(",","").replace("'","")
		item['price'] = price
		desc1 = str(list(map(unicode.strip,sel.xpath('//ul[@class="dtls-list clear"]//li//text()').extract())))
		desc1 = desc1.replace("[","").replace("]","").replace("u'","")
		item['desc1'] = desc1
		desc = str(list(map(unicode.strip,sel.xpath('//div[@itemprop="description"]//text()').extract())))
		desc = desc.replace("[","").replace("]","").replace("u'","")
		item['desc'] = desc
		try:
			
			saller =  str(list(map(unicode.strip,sel.xpath('//a[@class="pdp-e-seller-info-name reset-margin"]/text()').extract())))
			saller = saller.replace("u'","").replace("[","").replace("]","")
		except: pass
		item['Saller'] = saller
		category_path = sel.xpath('//div//span[@itemprop="title"]/text()').extract()
		level = len(category_path)
		item['level'] = level
		category_path = str(category_path).replace("u'","").replace("Home',","").replace("[","").replace("]","").replace('"','').replace("'","")
		item['category_path'] = category_path
		for o1 in self.driver.find_elements_by_xpath('//*[@id="attribute-select-0"]/ul//li'):
			variant1 = o1.text
			print"var1",variant1
			if o1:o1.click()
			time.sleep(5)					
			for x2 in self.driver.find_elements_by_xpath('//span//ul/li[@class="rippleGrey btn btn-toggle pdpAttr attrActive"]'):
				variant2 = x2.text
				print"var2",variant2
				if x2:x2.click()
				time.sleep(5)
				for x1 in  self.driver.find_elements_by_xpath('//*[@id="buyPriceBox"]/div[3]/div[2]/span[2]/span'):
					price = x1.text
					item['price'] = price.encode('ascii', 'ignore').replace(",","")
					#len(item['price'])
					Variant = variant1 , variant2
					item['Variant'] = str(Variant)
					self.spamwriter.writerow(item["price"])
					print "----", self.count, item["price"],item['Variant']
					self.count += 1
					data.append(item["price"])
					data1.append(item['Variant'])
					print len(data)
		item1 = BaseProducts()	
		item1.brand_name = item['brand'][0]
		item1.title = item['title'][0]
		item1.Product_id = item['Product_id']
		item1.Sku = item['title'][0]
		item1.category_name=  "Building Material"
		item1.description=item['desc']
		item1.additional_information = item['desc1']
		item1.source_url = item['url']
		try:
			item1.unit_measurements = item['um'][0]
		except:	item1.unit_measurements = ""
		item1.source_id = 2
		item1.save()
		item2 = Subscribe_Product()
		item3 = SPM()
		item2.bp = item1
		item2.source_id =2
		item2.Sku = item['title'][0]
		item2.save()
		item3.sp = item2
		item3.Sku = item['title'][0]
		try:
			item3.price = float(item['price'])
		except : item3.price = 0
		item3.source_id = 2
		item3.saller = item['Saller']
		item3.save()
		item4 = Category()
		item4.category_name = "Building Material"
		item4.category_path = item['category_path']
		item4.level = item['level']
		item4.source_id = 2
		item4.save()

		for y1 in range(len(data)):
			print"datadata", data[y1]
			print"datadata111",data1[y1]
			item2 = Subscribe_Product()
			item3 = SPM()
			item2.bp = item1
			item2.source_id =2
			item2.Sku = item['title'][0]
			item2.Variant = data1[y1]
			item2.save()
			item3.sp = item2
			item3.Sku = item['title'][0]
			try:
				item3.price = data[y1]
			except : item3.price = 0
			item3.source_id = 2
			item3.saller = item['Saller']
			item3.save()
			
		print item	
		return item