Esempio n. 1
0
    def parse(self, response):
        for sel in response.xpath('//ol[@class="product-layout"]/li'):
            item = AdItem()
            empty = ""
            item['source'] = "eloue"
            item['category'] = self.category
            item['subcategory'] = self.subcategory

            try:
                item['title'] = sel.xpath("@name").extract()[0]
            except:
                item['title'] = empty
            try:
                item['media'] = "https:" + sel.xpath(
                    'div/div/a/img/@style').extract()[0].split(')')[0].split(
                        ':')[-1]

            except:
                item['media'] = empty
            try:
                item['url'] = self.allowed_domains[0] + sel.xpath(
                    'div/div/a/@href').extract()[0]
            except:
                item['url'] = empty
            try:
                item['description'] = sel.xpath(
                    'div/div[@class="info"]/p[@class="full_description"]/text()'
                ).extract()[0]

            except:
                item['description'] = empty
            try:
                item['location'] = sel.xpath(
                    'div/div[@class="info"]/p/text()').extract()[0]
                item['postal_code'] = int(item['location'].split(', ')[1])
            except:
                item['location'] = empty
                item['postal_code'] = 0

            try:
                item['latitude'] = sel.xpath("@locationx").extract()[0]
            except:
                item['latitude'] = empty
            try:
                item['longitude'] = sel.xpath("@locationy").extract()[0]
            except:
                item['longitude'] = empty
            try:
                price = sel.xpath('div/div/span[@class="badge price"]/text()'
                                  ).extract()[0].split('/')

                item['price'] = price[0].strip(' ').encode('utf-8').strip('€')
                item['period'] = price[1]
                item['currency'] = "€"
            except:
                item['price'] = empty
                item['period'] = empty
                item['currency'] = empty
            item['evaluations'] = empty
            yield item
Esempio n. 2
0
 def parse(self, response):
     for sel in response.xpath('//item[@id]'):
         item = AdItem()
         item['source'] = self.name
         item['category'] = self.category
         item['subcategory'] = self.subcategory
         empty = ""
         try:
             item['title'] = sel.xpath('title/text()').extract()[0]
         except:
             item['title'] = empty
         try:
             item['media'] = sel.xpath('image/palm/@url').extract()[0]
         except:
             item['media'] = empty
         try:
             item['url'] = sel.xpath('link/text()').extract()[0]
         except:
             item['url'] = empty
         try:
             item['description'] = sel.xpath('subtitle/text()').extract()[0]
         except:
             item['description'] = empty
         try:
             item['location'] = sel.xpath(
                 'location/locality/text()').extract()[0]
         except:
             item['location'] = empty
         try:
             item['postal_code'] = sel.xpath(
                 'location/postal_code/text()').extract()[0]
         except:
             item['postal_code'] = empty
         try:
             item['latitude'] = sel.xpath('location/lat/text()').extract(
             )[0] if len(sel.xpath('location/lat/text()').extract(
             )[0]) > 1 else sel.xpath('/search/lat/text()').extract()[0]
         except:
             item['latitude'] = empty
         try:
             item['longitude'] = sel.xpath('location/lng/text()').extract(
             )[0] if len(sel.xpath('location/lng/text()').extract(
             )[0]) > 1 else sel.xpath('/search/lng/text()').extract()[0]
         except:
             item['longitude'] = empty
         try:
             item['price'] = sel.xpath('price/text()').extract()[0]
         except:
             item['price'] = empty
         try:
             item['currency'] = sel.xpath('price/@currency').extract()[0]
         except:
             item['currency'] = empty
         try:
             item['evaluations'] = sel.xpath(
                 'evaluation_number/text()').extract()[0]
         except:
             item['evaluations'] = empty
         item['period'] = "jour"
         yield item
Esempio n. 3
0
    def parse(self, response):
        for sel in response.xpath("//ul[@id='results']/li"):
            item = AdItem()
            empty = ''
            item['source'] = self.name
            item['category'] = self.category
            item['subcategory'] = self.subcategory

            try:
                item['title'] = sel.xpath(
                    "div/div[2]/div[@class='offer__details']/h3/a/text()"
                ).extract()[0]
            except:
                item['title'] = empty

            try:
                item['media'] = sel.xpath(
                    'div/div/a/img[2]/@data-src').extract()[0]
            except:
                item['media'] = empty

            try:
                item['url'] = self.allowed_domains[0] + sel.xpath(
                    'div/div/a/@href').extract()[0]
            except:
                item['url'] = empty

            try:
                item['description'] = sel.xpath(
                    "div/div[2]/div[@class='offer__details']/div[@class='offer__description']/text()"
                ).extract()[0]
            except:
                item['description'] = empty

            try:
                item['location'] = sel.xpath(
                    "div/div[2]/div[@class='offer__details']/div[@class='offer__subtitle']/text()"
                ).extract()[0]
            except:
                item['location'] = empty

            item['latitude'] = empty
            item['longitude'] = empty

            try:
                item['price'] = sel.xpath(
                    "div/div[2]/div[@class='price price--mini js-price-per-night']/div/text()[2]"
                ).extract()[0].strip('\n').encode('utf-8').strip('€')
                item['currency'] = "€"
            except:
                item['price'] = empty
                item['currency'] = empty
            try:
                item['period'] = sel.xpath(
                    "div/div[2]/div[@class='price price--mini js-price-per-night']/div[2]/text()"
                ).extract()[0]
            except:
                item['period'] = empty

            yield item
    def parse(self, response):
        for sel in response.xpath('//ul[@class="users"]/li'):
            item = AdItem()
            empty = ''
            item['source'] = self.name
            item['category'] = self.category
            item['subcategory'] = self.subcategory

            try:
                item['title'] = sel.xpath(
                    'div/div[@class="content"]/h3[@class="orange"]/a/text()'
                ).extract()[0]
            except:
                item['title'] = empty

            try:
                item['media'] = sel.xpath('div/a/img/@src').extract()[0]
            except:
                item['media'] = empty

            try:
                item['url'] = sel.xpath(
                    'div/div[@class="content"]/h3[@class="orange"]/a/@href'
                ).extract()[0]
            except:
                item['url'] = empty

            try:
                item['description'] = sel.xpath(
                    'div/div[@class="content"]/p[@class="description"]/text()'
                ).extract()[0]
            except:
                item['description'] = empty

            try:
                item['location'] = sel.xpath(
                    'div/div[@class="content"]/div[@class="map_information"]/p/text()[2]'
                ).extract()[0]
            except:
                item['location'] = empty

            item['latitude'] = empty
            item['longitude'] = empty

            try:
                item['price'] = response.xpath(
                    'div/p[@class="price orange"]/text()').extract()[0].split(
                        '/')[0].encode('utf-8').strip('€')
                item['currency'] = "€"
            except:
                item['price'] = empty
                item['currency'] = empty
            try:
                item['period'] = response.xpath(
                    'div/div[@class="content"]/p[@class="meta"]/text()[3]'
                ).extract()[0]
            except:
                item['period'] = empty

            yield item
Esempio n. 5
0
	def parse(self, response):
		for sel in response.xpath('//div[@data-id]'):
			item = AdItem()
			empty = ''
			item['source'] = self.name
			item['category'] = self.category
			
			try:
				item['title'] = sel.xpath('@data-name').extract()[0]
			except: 
				item['title'] = empty

			try:	
				item['media'] = sel.xpath('div/a/div/img/@src').extract()[0]
			except: 
				item['media'] = empty

			try:
				item['url'] = self.allowed_domains[0] + sel.xpath('@data-url').extract()[0].split('?')[0]
			except:
				item['url'] = empty

			try:		
				item['description'] = sel.xpath('div[2]/div/div[@itemprop="description"]/a/text()').extract()[0]

			except:
			
				item['description'] = sel.xpath('@data-name').extract()[0]
			
			if "Chambre" in item['description']:
				item['subcategory'] = "room"
			else:
				item['subcategory']	= "apartment"
			
			try:
				item['evaluations'] = 0
				find = re.search(pattern, item['description'])
				if find:
					item['evaluations'] = int(find.group())
			except:
				item['evaluations'] = 0
			
			item['latitude'] = sel.xpath('@data-lat').extract()[0]
			item['longitude'] = sel.xpath('@data-lng').extract()[0]
 			try:
 				item['location'] = urllib.unquote(response.url.split('?')[0].split('s/')[-1])
			except:
				item['location']= empty
		        item['postal_code'] = 0		
			try:
				item['price'] = sel.xpath('div/a[2]/div/span/text()').extract()[0]
				item['currency'] = "€"
			except:
				item['price'] = empty
				item['currency'] = empty
			
			item['period'] = "nuit"
			
			yield item
Esempio n. 6
0
	def parse(self, response):
		print response.url
		for sel in response.xpath("//div[@id='loginbox']"):
			item = AdItem()
			empty = ''
			item['source'] = self.name
			item['category'] = self.category
			item['subcategory'] = self.subcategory

			try:
				item['title'] = sel.xpath('div/div/div/div/p/a/text()').extract()[0]
				
			except: 
				item['title'] = empty

			try:
				item['location'] = response.url.split('/')[-1].split('.')[0]

			except:
				item['location'] = empty
			item['postal_code'] = 0
			try:	
				item['media'] = sel.xpath('div[@class="detail"]/img/@src').extract()[0]
			except: 
				item['media'] = self.allowed_domains[0] + "/images/parking-orange-26x26.png"

			try:
				item['url'] = self.allowed_domains[0] + sel.xpath("div/div/div/div/p/a/@href").extract()[0]
			except:
				item['url'] = empty
			
			try:		
				desc0 = sel.xpath('div/div/div/div/span/text()').extract()[0]
				desc1 = sel.xpath('div/div/div/div/span[2]/text()').extract()[0]
				item['description'] = desc0 + ", " + desc1
			except:
				item['description'] = empty
			try:
				item['latitude'] = float(self.geo_cities[item['location']]['lat'])
			except:
				item['latitude'] = empty

			try:
				item['longitude'] = float(self.geo_cities[item['location']]['lon'])
			except:
				item['longitude'] = empty
			try:
				item['price'] = sel.xpath("div/div/div/div/span[3]/text()").extract()[0].split('/')[0].encode('utf-8').split('€')[0]
				item['currency'] = "€"
			except:
				item['price'] = empty
				item['currency'] = empty
			try:
				item['period'] = sel.xpath("div/div/div/div/span[3]/text()").extract()[0].split('/')[1] 
			except:
				item['period'] = empty
			item['evaluations'] = empty			
			yield item
Esempio n. 7
0
    def parse(self, response):
        for sel in response.xpath('//div[@class="card"]'):
            item = AdItem()
            empty = ''
            item['source'] = self.name
            item['category'] = self.category
            item['subcategory'] = self.subcategory

            try:
                item['title'] = sel.xpath(
                    'div[@class="content"]/div[@class="vehicle-info"]/p/text()'
                ).extract()[0]
            except:
                item['title'] = empty
            try:
                item['media'] = sel.xpath(
                    'a/div[@class="image"]/img/@src').extract()[0]
            except:
                item['title'] = empty
            try:
                item['url'] = self.allowed_domains[0] + sel.xpath(
                    'a/@href').extract()[0]
            except:
                item['url'] = empty
            try:
                item['description'] = sel.xpath(
                    'div[@class="content"]/div[@class="vehicle-info"]/p[@class="description"]/text()'
                ).extract()[0].strip("\n ")
            except:
                item['description'] = empty
            try:
                item['location'] = sel.xpath(
                    'div[@class="content"]/div[@class="vehicle-info"]/p[@class="city"]/strong/@title'
                ).extract()[0]
            except:
                item['location'] = empty
            try:
                res = sel.xpath(
                    'div[@class="content"]/div[@class="vehicle-info"]/p[@class="city"]/span/text()'
                ).extract()[0]
                item['postal_code'] = int(res.split(')')[0].split('(')[1])
            except:
                item['postal_code'] = 0
            item['latitude'] = empty
            item['longitude'] = empty
            item['evaluations'] = empty
            try:
                item['price'] = sel.xpath(
                    'a/div[@class="image"]/span[@class="price"]/strong/text()'
                ).extract()[0].encode('utf-8').strip('€')
                item['currency'] = "€"
            except:
                item['price'] = empty
                item['currency'] = empty

            item['period'] = "jour"

            yield item
Esempio n. 8
0
    def parse(self, response):
        for sel in response.xpath('//div[@class="block"]'):
            item = AdItem()
            empty = ''
            item['source'] = self.name
            item['category'] = self.category
            item['subcategory'] = self.subcategory

            try:
                item['title'] = sel.xpath('div/h2/a/text()').extract()[0].strip("\n ")

            except:
                item['title'] = empty
            try:
                item['media'] = self.allowed_domains[0] + sel.xpath('a/img/@src').extract()[0]
            except:
                item['media'] = empty
            try:
                item['url'] = self.allowed_domains[0] + sel.xpath('a/@href').extract()[0]
            except:
                item['url'] = empty
            try:
                item['description'] = sel.xpath('div/div[@class="boat-info"]/text()').extract()[0].strip("\n ")

            except:
                item['description'] = empty
	    try:
                item['evaluations'] = sel.xpath('div/div[@class="boat-skipper"]/div[@class="nb-commentaires"]/span[@class="nb-com"]/text()').extract()[0].strip("\n ")

            except:
                item['evaluations'] = empty

            try:
                item['location'] = sel.xpath('div/div/h4/strong/text()').extract()[0].strip(' -')
            except:
                item['location'] = empty
            try:
		item['latitude'] = self.geo[item['location']]['lat']
	    except:    
            	item['latitude'] = empty
	    try:
		item['longitude'] = self.geo[item['location']]['lon']
	    except:
            	item['longitude'] = empty
            
            try:
                item['price'] = sel.xpath('div[@class="hosting-meta"]/div/span/strong/text()').extract()[0].encode('utf-8').strip('€')
                item['currency'] = '€'
            except:
                item['price'] = empty
                item['currency'] = empty
            
            try:
                item['period'] = sel.xpath('div[3]/span/text()').extract()[0]
            except:
                item['period'] = empty
	    item['postal_code'] = empty
            yield item
    def parse(self, response):
        for sel in response.xpath('//div[@class="box-parking-dispo"]'):
            item = AdItem()
            empty = ""
            item['source'] = self.name
            item['category'] = self.category
            item['subcategory'] = self.subcategory

            try:
                item['title'] = sel.xpath('div/span[@class="title-parking"]/text()').extract()[0]
		item['postal_code'] = searchZip(item['title'])
            except:
                item['title'] = empty
		item['postal_code'] = 0
            try:
                item['media'] = self.allowed_domains[0] + sel.xpath('div/div/div[@class="detail-parking-left"]/div/img/@src').extract()[0]
            except:
                item['media'] = empty
            try:
                item['url'] = self.allowed_domains[0] + sel.xpath('div/div/div[@class="detail-parking-right"]/div[2]/a/@href').extract()[0]
            except:
                item['url'] = empty
            try:
                item['description'] = sel.xpath('div/div/div[@class="detail-parking-left"]/div/img/@alt').extract()[0]
            except:
                item['description'] = empty
            try:
                item['location'] = sel.xpath('div/div/div/div/h1/span/text()').extract()[0]
            except:
                item['location'] = empty
	    try:            
            	item['latitude'] = self.geo[item['location'].split(',')[-2].strip(' ')]['lat']
	    except:
		item['latitude'] = empty
            try:
		
		item['longitude'] = self.geo[item['location'].split(',')[-2].strip(' ')]['lon']
	    except:
	        item['longitude'] = empty
            
            try:
                item['price'] = sel.xpath('div/div/div[@class="detail-parking-right"]/div/span/span/text()').extract()[0].encode('utf-8').strip('€')
                item['currency'] = "€"
            except:
                item['price'] = empty
                item['currency'] = empty
            
            try:
                item['period'] = sel.xpath('div/div/div[@class="detail-parking-right"]/div/span/text()').extract()[0].strip('/')
            except:
                item['period'] = empty
	    try:
                item['evaluations'] = re.search(self.pattern, sel.xpath('div/div/div[@class="detail-parking-left"]/div/div/span/text()').extract()[0]).group()
            except:
                item['evaluations'] = empty

		
            yield item
Esempio n. 10
0
	def parse(self, response):
		for sel in response.xpath('//div[@class="rentResult ad-list-item"]'):
			item = AdItem()
			empty = ''
			item['source'] = self.name
			item['category'] = self.category
			item['subcategory'] = self.subcategory

			try:
				item['title'] = sel.xpath('div[@class="detail"]/img/@alt').extract()[0]
			except: 
				item['title'] = empty

			try:	
				item['media'] = sel.xpath('div[@class="detail"]/img/@src').extract()[0]
			except: 
				item['media'] = empty

			try:
				item['url'] = sel.xpath('div[@class="detail"]/meta/@content').extract()[0]
			except:
				item['url'] = empty
			
			try:		
				item['description'] = sel.xpath('div[@class="detail"]/div/p/span/text()').extract()[0]
			except:
				item['description'] = empty

			try:
				item['location'] = sel.xpath('//div[@class="rentResult ad-list-item"]/div[@class="detail"]/div/div[@itemprop="address"]/span[@class="location"]/span/text()').extract()[0]
			except:
				item['location'] = empty
                        item['postal_code'] = 0
			
			item['latitude'] = sel.xpath('div[@class="detail"]/div/div[@itemprop="geo"]/meta[@itemprop="latitude"]/@content').extract()[0]
			item['longitude'] = sel.xpath('div[@class="detail"]/div/div[@itemprop="geo"]/meta[@itemprop="longitude"]/@content').extract()[0]

			try:
				price0 = sel.xpath('table/tr[2]/td[1]/text()').extract()[0].encode('utf-8').strip('€')
				price1 = sel.xpath('table/tr[2]/td[2]/text()').extract()[0].encode('utf-8').strip('€')
				price2 = sel.xpath('table/tr[2]/td[3]/text()').extract()[0].encode('utf-8').strip('€')
				item['price'] = price0 + ", " + price1 + ", " + price2 
				item['currency'] = "€"

			except:
				item['price'] = empty
				item['currency'] = empty

			try:
				period0 = sel.xpath('table/tr/td[1]/text()').extract()[0]
				period1 = sel.xpath('table/tr/td[2]/text()').extract()[0]
				period2 = sel.xpath('table/tr/td[3]/text()').extract()[0]
				item['period'] = period0 + ", " + period1 + ", " + period2 
			except:
				item['period'] = empty
			item['evaluations'] = empty			
			yield item
    def parse(self, response):
        for sel in response.xpath('//tr'):
            item = AdItem()
            empty = ''
            item['source'] = self.name
            item['category'] = self.category
            item['subcategory'] = self.subcategory

            try:
                item['title'] = sel.xpath(
                    'td[@id="colonne4"]/div[@id="title_ad"]/a/text()').extract(
                    )[0].strip(' ').title()
            except:
                item['title'] = empty

            item['media'] = empty

            try:
                item['url'] = sel.xpath(
                    'td[@id="colonne4"]/div[@id="title_ad"]/a/@href').extract(
                    )[0]
            except:
                item['url'] = empty

            try:
                item['description'] = sel.xpath(
                    'td[@id="colonne4"]/div[@id="text_ad"]/a/text()').extract(
                    )[0]
            except:
                item['description'] = empty

            try:
                item['location'] = sel.xpath('td[@id="colonne3"]/text()'
                                             ).extract()[0].strip(' ').title()
            except:
                item['location'] = empty

            try:
                item['latitude'] = self.geo[item['location']]['lat']
            except:
                item['latitude'] = empty
            try:
                item['longitude'] = self.geo[item['location']]['lon']
            except:
                item['longitude'] = empty
            item['price'] = empty
            item['currency'] = empty

            try:
                item['period'] = sel.xpath(
                    'td[@id="colonne5"]/div/text()').extract()[0]
            except:
                item['period'] = empty
            item['postal_code'] = empty
            item['evaluations'] = empty
            yield item
Esempio n. 12
0
    def parse(self, response):
        jsonresponse = json.loads(response.body_as_unicode())
        results = jsonresponse["ads"]
        for sel in results:
            item = AdItem()
            empty = ""
            item['category'] = sel['category']
            item['subcategory'] = sel['subcategory']

            try:
                item['title'] = sel['title']
            except:
                item['title'] = empty
            try:
                item['media'] = sel["media"]

            except:
                item['media'] = empty
            try:
                item['url'] = sel['url']

            except:
                item['url'] = empty
            try:
                item['description'] = sel['description']
            except:
                item['description'] = empty
            try:
                item['location'] = sel['location']
            except:
                item['location'] = empty

            try:
                item['latitude'] = sel['latitude']
            except:
                item['latitude'] = empty

            try:
                item['longitude'] = sel['longitude']
            except:
                item['longitude'] = empty

            try:
                item['price'] = sel["price"]
                item['currency'] = "EUR"

            except:
                item['price'] = empty
                item['currency'] = empty

            try:
                item['period'] = sel["period"]
            except:
                item['period'] = empty

            yield item
Esempio n. 13
0
    def parse(self, response):
        for sel in response.xpath('//div[@class="itemInside event-box p15"]'):
            item = AdItem()
            empty = ''
            item['source'] = self.name
            item['category'] = self.category
            item['subcategory'] = self.subcategory

            try:
                item['title'] = sel.xpath('a/img/@title').extract()[0]
            except:
                item['title'] = empty

            try:
                item['media'] = sel.xpath('a/img/@src').extract()[0]
            except:
                item['media'] = empty

            try:
                item['url'] = self.allowed_domains[0] + sel.xpath(
                    'div/div/h2/a/@href').extract()[0]
            except:
                item['url'] = empty

            try:
                desc0 = sel.xpath(
                    'div[@class="dateHeureEvent"]/a/text()').extract()[0]
                item['description'] = desc0
            except:
                item['description'] = empty

            try:
                item['location'] = sel.xpath(
                    'div[@class="author"]/div[@class="authorRight"]/a[2]/text()'
                ).extract()[0]
            except:
                item['location'] = empty

            item['latitude'] = empty
            item['longitude'] = empty

            try:
                item['price'] = sel.xpath("div/div[2]/div/text()").extract(
                )[0].strip('\n').encode('utf-8').strip('€')
                item['currency'] = "€"
            except:
                item['price'] = empty
                item['currency'] = empty

            try:
                item['period'] = sel.xpath(
                    'div[@class="dateHeureEvent"]/a/text()').extract()[0]
            except:
                item['period'] = empty

            yield item
Esempio n. 14
0
    def parse(self, response):
        for sel in response.xpath('//div[@class="row"]'):
            item = AdItem()
            empty = ""
            item['source'] = self.name
            item['category'] = self.category
            item['subcategory'] = self.subcategory

            try:
                item['title'] = sel.xpath("div/div/div/h4/text()").extract()[0]
            except:
                item['title'] = empty
            try:
                item['media'] = sel.xpath('div/div/div/img/@src').extract()[0]

            except:
                item['media'] = empty
            try:
                item['url'] = self.allowed_domains[0] + sel.xpath(
                    'div/div/div/a/@href').extract()[0]
            except:
                item['url'] = empty
            try:
                item['description'] = sel.xpath(
                    'div/div/div/div/div/h5/text()').extract()[0]

            except:
                item['description'] = empty

            item['location'] = response.url.split('terms=')[1].split('+')[0]
            item['postal_code'] = response.url.split('terms=')[1].split(
                '+')[1].split(')')[0].strip('(')
            item['evaluations'] = empty

            try:
                item['latitude'] = float(self.geo[item['location']]['lat'])
            except:
                item['latitude'] = empty

            try:
                item['longitude'] = float(self.geo[item['location']]['lon'])
            except:
                item['longitude'] = empty

            try:
                price = sel.xpath(
                    'div/div/div/div/div[3]/h5/text()').extract()[0].split('/')
                item['price'] = price[0].strip(' ').encode('utf-8').strip('€')
                item['period'] = price[1]
                item['currency'] = "€"
            except:
                item['price'] = empty
                item['period'] = empty
                item['currency'] = empty
            yield item
Esempio n. 15
0
    def parse(self, response):
        jsonresponse = json.loads(response.body_as_unicode())
        #print jsonresponse
        results = jsonresponse["hits"]
        for sel in results:
            item = AdItem()
            empty = ""
            item['source'] = self.name
            item['category'] = self.category
            item['subcategory'] = self.subcategory

            try:
                item['title'] = sel['title']
            except:
                item['title'] = empty
            try:
                item['media'] = sel["pictures"][0]
            except:
                item['media'] = empty
            try:
                item['url'] = sel['url']
            except:
                item['url'] = empty
            try:
                item['description'] = sel['comment']
            except:
                item['description'] = empty
            try:
                item['location'] = sel['address']
            except:
                item['location'] = empty
            try:
                item['postal_code'] = int(sel['postal_code'])
            except:
                item['postal_code'] = empty
            try:
                item['latitude'] = sel['lat']
            except:
                item['latitude'] = empty

            try:
                item['longitude'] = sel['lng']
            except:
                item['longitude'] = empty

            try:
                item['price'] = sel["unit_month_price_with_fee"]
                item['currency'] = "€"
                item['period'] = "par mois"
            except:
                item['price'] = empty
                item['currency'] = empty
                item['period'] = empty
            item['evaluations'] = empty
            yield item
	def parse(self, response):
		for sel in response.xpath('//ul[@id="results"]/li'):
			item = AdItem()
			empty = ''
			item['source'] = self.name
			item['category'] = self.category
			item['subcategory'] = self.subcategory

			try:
				item['title'] = sel.xpath('div[@class="resultContainer"]/div[@class="resultInfos"]/h3[@class="resultUserName"]/text()').extract()[0].strip('\n ')

			except: 
				item['title'] = empty

			try:	
				item['media'] = "https:"+sel.xpath('div[@class="resultContainer"]/div[@class="resultImgContainer"]/img/@src').extract()[0]
			except: 
				item['media'] = empty

			try:
				item['url'] = self.allowed_domains[0] + sel.xpath('div[@class="resultContainer"]/a/@href').extract()[0]
			except:
				item['url'] = empty
			
			try:		
				desc0 = sel.xpath('div[@class="resultContainer"]/div[@class="resultInfos"]/span[@class="resultType"]/text()').extract()[0].strip('\n ')
				desc1 = sel.xpath('div[@class="resultContainer"]/div[@class="resultInfos"]/span[@class="resultUsefull"]/text()').extract()[0].strip('\n ')

				item['description'] = desc0 + " "+ desc1
			except:
				item['description'] = empty

			try:
				item['location'] = sel.xpath('div[@class="resultContainer"]/div[@class="resultInfos"]/span[@class="resultUsefull"]/text()').extract()[0].strip('\n ').split(' ')[-1]

			except:
				item['location'] = empty
				
			item['latitude'] = empty
			item['longitude'] = empty

			try:
				item['price'] = sel.xpath('div[@class="resultContainer"]/div[@class="priceSpan"]/div[@class="innerSpan"]/i/text()').extract()[0].encode('utf-8').strip('\n €')
				item['currency'] = "€"
			except:
				item['price'] = empty
				item['currency'] = empty

			try:
				item['period'] = sel.xpath('div[@class="resultContainer"]/div[@class="priceSpan"]/div[@class="innerSpan"]/i/text()').extract()[0].strip("\n' /")

			except:
				item['period'] = empty
			
			yield item
Esempio n. 17
0
    def parse(self, response):
        for sel in response.xpath('//article'):
            item = AdItem()
            empty = ''
            item['source'] = self.name
            item['category'] = self.category
            item['subcategory'] = self.subcategory
            try:
                item['title'] = sel.xpath(
                    'a/div[2]/header/h1/text()').extract()[0]
            except:
                item['title'] = empty

            try:
                item['media'] = sel.xpath(
                    'a/aside/figure/img/@src').extract()[0]
            except:
                item['media'] = empty

            try:
                item['url'] = sel.xpath('a/@href').extract()[0]
            except:
                item['url'] = empty

            try:
                item['description'] = sel.xpath(
                    'a/div[2]/p[@class="description"]/text()').extract(
                    )[0].strip('\n')

            except:
                item['description'] = empty

            try:
                item['latitude'] = sel.xpath('a/@data-latitude').extract()[0]
            except:
                item['latitude'] = empty
            try:
                item['longitude'] = sel.xpath('a/@data-longitude').extract()[0]
            except:
                item['longitude'] = empty

            try:
                item['location'] = sel.xpath(
                    'a/aside/div[@class="user-city"]/text()').extract()[0]
            except:
                item['location'] = empty

            item['postal_code'] = empty
            item['evaluations'] = empty
            item['price'] = empty
            item['currency'] = empty
            item['period'] = empty

            yield item
Esempio n. 18
0
    def parse(self, response):
        for sel in response.xpath('//div[@itemtype]'):
            item = AdItem()
            empty = ""
            item['source'] = self.name
            category = response.url.split('?')[0].split('/')[-1]

            item['category'] = self.categories[category]["category"]
            item['subcategory'] = self.categories[category]["subcategory"]

            try:
                item['title'] = sel.xpath(
                    'div/div[@class="nsadtitle"]/text()').extract()[0]
            except:
                item['title'] = empty
            try:
                item['media'] = "https:" + sel.xpath(
                    'div/div/img/@src').extract()[0]

            except:
                item['media'] = empty
            try:
                item['url'] = self.allowed_domains[0] + sel.xpath(
                    'div[@class="nsadprice"]/div/a/@href').extract()[0]
            except:
                item['url'] = empty
            try:
                item['description'] = sel.xpath(
                    'div/div[@class="nsadsub"]/text()').extract()[0]

            except:
                item['description'] = empty
            try:
                item['location'] = sel.xpath(
                    'div[2]/div[3]/span[2]/text()').extract()[0]
                item['postal_code'] = int(
                    item['location'].test.split(' - ')[0])
            except:
                item['location'] = empty
                item['postal_code'] = 0

            item['latitude'] = empty
            item['longitude'] = empty
            try:
                item['price'] = sel.xpath(
                    'div[@class="nsadprice"]/div[@class="nsofferamount"]/text()'
                ).extract()[0].encode('utf-8').strip('€')
                item['currency'] = "€"
            except:
                item['price'] = empty
                item['currency'] = empty
            item['period'] = "jour"
            item['evaluations'] = empty
            yield item
Esempio n. 19
0
	def parse(self, response):
		for sel in response.xpath('//div[@data-element-id]'):
			item = AdItem()
			empty = ''
			item['source'] = "zilok"
			item['category'] = self.category
			item['subcategory'] = self.subcategory

			try:
				item['title'] = sel.xpath('div[2]/h3/a/text()').extract()[0]
			except: 
				item['title'] = empty

			try:	
				item['media'] = sel.xpath('div[1]/@style').extract()[0].split('(')[1].split(')')[0]
			except: 
				item['media'] = empty

			try:
				item['url'] = sel.xpath('div[2]/h3/a/@href').extract()[0]
			except:
				item['url'] = empty
			
			try:		
				desc0 = sel.xpath('div[2]/div/ul[1]/li[1]/text()').extract()[0]
				desc1 = sel.xpath('div[2]/div/ul[1]/li[2]/text()').extract()[0]
				desc2 = sel.xpath('div[2]/div/ul[2]/li/text()').extract()[0]
				item['description'] = desc0 + " " + desc1 + " " + desc2
			except:
				item['description'] = empty

			try:
				item['location'] = sel.xpath('div[2]/h4/text()').extract()[0]
			except:
				item['location'] = empty

			
			item['latitude'] = empty
			item['longitude'] = empty

			try:
				item['price'] = sel.xpath('div[3]/div/p/text()').extract()[0].strip('\n').encode('utf-8').strip('€')
				item['currency'] = "€"
			except:
				item['price'] = empty
				item['currency'] = empty

			try:
				item['period'] = sel.xpath('div[3]/div/p[2]/text()').extract()[0]
			except:
				item['period'] = empty
			
			yield item
    def parse(self, response):
        for sel in response.xpath('//ul[@class="items"]/li'):
            item = AdItem()
            empty = ""
            item['source'] = self.name
            item['category'] = self.category
            item['subcategory'] = self.subcategory

            try:
                item['title'] = sel.xpath('div[@class="description"]/h3/a/text()').extract()[0]
            except:
                item['title'] =empty
            try:
                item['location'] = item['title'].split(' (')[0].split(' ')[-1]
            except:
                item['location'] = empty
            try:
                item['media'] = sel.xpath('div[@class="thumbnail"]/img/@src').extract()[0]
            except:
                item['media'] =empty
	    try:
                item['evaluations'] = sel.xpath('div[@class="thumbnail"]/div[@class="comment-count"]/text()').extract()[0]
            except:
                item['evaluations'] =empty

            try:
                item['url'] = sel.xpath('div[@class="description"]/h3/a/@href').extract()[0]
            except:
                item['url'] =empty
            try:
                item['description'] = sel.xpath('div[@class="description"]/text()[3]').extract()[0]
            except:
                item['description'] =empty
	    try:        
            	item['latitude'] = self.geo[item['location']]['lat']
	    except:
		item['latitude'] = empty
	    try:
                item['longitude'] = self.geo[item['location']]['lon']
            except:
                item['longitude'] = empty

            item['longitude'] = empty           
            item['price'] = empty
            item['currency'] = empty
            item['period'] = empty
	    try:
            	item['postal_code'] = re.search(self.pattern, item['title']).group()
	    except:
		item['postal_code'] = empty
            yield item
Esempio n. 21
0
    def parse(self, response):
        for sel in response.xpath(
                '//ul[@data-view="card"]/li[@itemtype="http://schema.org/Organization"]'
        ):
            item = AdItem()
            empty = ''
            item['source'] = self.name
            item['category'] = self.category
            item['subcategory'] = self.subcategory

            try:
                item['title'] = sel.xpath('@data-name').extract()[0]
            except:
                item['title'] = empty
            try:
                item['location'] = urllib2.unquote(response.url.split('/')[-2])
            except:
                item['location'] = empty

            try:
                item['media'] = sel.xpath('div/a/@style').extract()[0].split(
                    '(')[-1].split(');')[0]

            except:
                item['media'] = empty

            try:
                item['url'] = sel.xpath('div/a/@href').extract()[0]
            except:
                item['url'] = empty

            try:
                item['description'] = sel.xpath(
                    'div/a[2]/div[2]/p/text()').extract()[0].strip('\t')
            except:
                item['description'] = empty

            item['postal_code'] = empty
            if item['location'] is not empty:
                item['latitude'] = self.geo[item['location']]['lat']
                item['longitude'] = self.geo[item['location']]['lon']
            else:
                item['latitude'] = empty
                item['longitude'] = empty
            item['price'] = empty
            item['currency'] = empty
            item['period'] = empty
            item['evaluations'] = empty
            yield item
	def parse(self, response):
		for sel in response.xpath('//div[@class="ligne_simple"]'):
			item = AdItem()
			empty = ''
			item['source'] = self.name
			item['category'] = self.category
			item['subcategory'] = self.subcategory

			try:
				item['title'] = sel.xpath('div[@class="annonce_detail"]/p/a/@title').extract()[0]
			except: 
				item['title'] = empty

			try:	
				item['media'] = sel.xpath('div[@class="annonce_img"]/a/img/@src').extract()[0]
			except: 
				item['media'] = empty

			try:
				item['url'] = sel.xpath('div[@class="annonce_img"]/a/@href').extract()[0]
			except:
				item['url'] = empty
			
			try:		
				item['description'] = sel.xpath('div[@class="annonce_detail"]/span[@class="desc"]/a/text()[2]').extract()[0]
			except:
				item['description'] = empty

			try:
				item['location'] = sel.xpath('div[@class="annonce_detail"]/span[@class="desc"]/a/span/text()').extract()[0]
			except:
				item['location'] = empty

			try:
                                item['postal_code'] = searchZip(sel.xpath('div[@class="annonce_detail"]/span[@class="desc"]/a/span[2]/text()').extract()[0])
                        except:
                                item['postal_code'] = empty			
			item['latitude'] = empty
			item['longitude'] = empty
			item['price'] = empty
			item['currency'] = empty

			try:
				item['period'] = sel.xpath('div[@class="annonce_detail"]/span[@class="desc"]/a/text()[2]').extract()[0].split('-')[0]
			except:
				item['period'] = empty
			item['evaluations'] = empty			
			yield item
	def parse(self, response):
		for sel in response.xpath('//div[@class="annonces"]'):
			item = AdItem()
			empty = ''
			item['source'] = self.name
			item['category'] = self.category
			item['subcategory'] = self.subcategory

			try:
				item['title'] = sel.xpath('div[@class="cadre-in"]/div[@class="coordonnees"]/b/text()').extract()[0]
			except: 
				item['title'] = empty

			try:	
				item['media'] = sel.xpath('img/@src').extract()[0]
			except: 
				item['media'] = empty

			try:
				item['url'] = sel.xpath('div[@class="cadre-in"]/div[@class="coordonnees"]/a/@href').extract()[0]
			except:
				item['url'] = empty
			
			try:		
				item['description'] = sel.xpath('div[@class="cadre-in"]/div[@class="descriptif"]/text()').extract()[0]
			except:
				item['description'] = empty

			try:
				item['location'] = sel.xpath('div[@class="cadre-in"]/div[@class="coordonnees"]/text()[3]').extract()[0].strip('\n \t')
			except:
				item['location'] = empty

			
			item['latitude'] = empty
			item['longitude'] = empty

			try:
				item['price'] = sel.xpath('div[@class="cadre-in"]/div[@class="coordonnees"]/text()[8]').extract()[0].split(':')[-1].strip('\n').encode('utf-8').split('€')[0]
				item['currency'] = "€"
			except:
				item['price'] = empty
				item['currency'] = empty

			item['period'] = empty
			
			yield item
Esempio n. 24
0
    def parse(self, response):
        
        for sel in response.xpath('//div[@data-car-id]'):
            item = AdItem()
            empty = ""
            item['source'] = self.name
            item['category'] = self.category
            item['subcategory'] = self.subcategory

            try:
                item['title'] = sel.xpath("div[@class='search_card_content car_content']/a[@class='car_title']/@title").extract()[0]
            except:
                item['title'] = empty
            try:
                item['media'] = sel.xpath('div[@class="search_card_aside car_photo"]/img/@src').extract()[0]
            except:
                item['media'] = empty
            try:
                item['url'] = sel.xpath('div[@class="search_card_content car_content"]/a[@class="car_title"]/@href').extract()[0]
            except:
                item['url'] = empty
            try:
                item['description'] = sel.xpath('div[@class="search_card_content car_content"]/div[@class="car_subtitle"]/text()').extract()[0]
            except:
                item['description'] = empty
            try:
                item['location'] = sel.xpath('div[@class="search_card_content car_content"]/div[@class="car_location"]/text()[2]').extract()[0]
            except:
                item['location'] = empty
            
            item['latitude'] = empty
            item['longitude'] = empty
            
            try:
                item['price'] = sel.xpath('div[@class="search_card_content car_content"]/span[@class="js_car_price car_price"]/strong/text()').extract()[0].encode('utf-8').strip('€')
                item['currency'] = "€"
            except:
                item['price'] = empty
                item['currency'] = empty

            
            try:
                item['period'] = sel.xpath('div[@class="search_card_content car_content"]/span[@class="js_car_price car_price"]/text()').extract()[0]
            except:
                item['period'] = empty

            yield item
Esempio n. 25
0
    def parse(self, response):
        for sel in response.xpath("//div[@class='home-list-item']"):
            item = AdItem()
            empty = ""
            item['source'] = self.name
            item['category'] = self.category
            item['subcategory'] = self.subcategory

            try:
                item['title'] = sel.xpath("div[2]/h2/a/text()").extract()[0]
                item['location'] = self.France.city_from_title(item['title'])
            except:
                item['title'] = empty
                item['location'] = empty

            try:
                item['media'] = sel.xpath('a/img/@src').extract()[0]
            except:
                item['media'] = empty

            try:
                item['url'] = self.allowed_domains[0] + sel.xpath(
                    'a/@href').extract()[0]
            except:
                item['url'] = empty

            try:
                item['description'] = sel.xpath(
                    'div[3]/div[2]/a/text()').extract()[0]

            except:
                item['description'] = empty

            item['latitude'] = empty
            item['longitude'] = empty

            try:
                item['price'] = sel.xpath('div/div/div/text()').extract(
                )[0].encode('utf-8').strip('€')
                item['currency'] = "€"
            except:
                item['price'] = empty
                item['currency'] = empty

            item['period'] = "day"
            yield item
Esempio n. 26
0
    def parse(self, response):
        for sel in response.xpath('//div[@class="row"]'):
            item = AdItem()
            empty = "unknown"
            item['source'] = self.name
            item['category'] = self.category
            item['subcategory'] = self.subcategory

            try:
                item['title'] = sel.xpath("div[2]/h2/a/text()").extract()[0]
            except:
                item['title'] = empty
            try:
                item['media'] = self.allowed_domains[0] + sel.xpath(
                    'div/a/img/@src').extract()[0]
            except:
                item['media'] = empty
            try:
                item['url'] = self.allowed_domains[0] + sel.xpath(
                    'div[2]/p/a/@href').extract()[0]
            except:
                item['url'] = empty
            try:
                item['description'] = sel.xpath('div[2]/p/text()').extract()[0]
            except:
                item['description'] = empty
            try:
                item['location'] = sel.xpath('div[2]/text()[3]').extract()[0]
            except:
                item['location'] = empty

            item['latitude'] = empty
            item['longitude'] = empty

            try:
                item['price'] = sel.xpath('div[3]/div/span/text()').extract(
                )[0].encode('utf-8').strip('€')
                item['currency'] = "€"
            except:
                item['price'] = empty
                item['currency'] = empty

            item['period'] = empty

            yield item
    def parse(self, response):
        for sel in response.xpath('//li[@data-id_product]'):
            item = AdItem()
            empty = ''
            item['source'] = self.name
            item['category'] = self.category
            item['subcategory'] = self.subcategory

            try:
                item['title'] = sel.xpath('a/p/strong/text()').extract()[0]
            except:
                item['title'] = empty

            try:
                item['media'] = sel.xpath(
                    'a/figure/span/img/@src').extract()[0]
            except:
                item['media'] = empty

            try:
                item['url'] = self.allowed_domains[0] + sel.xpath(
                    'a/@href').extract()[0]
            except:
                item['url'] = empty

            try:
                item['description'] = sel.xpath(
                    'a/figure/span/img/@alt').extract()[0]

            except:
                item['description'] = empty

            item['latitude'] = empty
            item['longitude'] = empty
            item['location'] = empty
            try:
                item['price'] = sel.xpath(
                    'a/p/span[2]/span/text()').extract()[0]
            except:
                item['price'] = empty

            item['period'] = empty

            yield item
    def parse(self, response):
        for sel in response.xpath('//table[@class="annonces"]/tr'):
            item = AdItem()
            empty = ""
            item['source'] = self.name
            item['category'] = self.category
            item['subcategory'] = self.subcategory

            try:
                item['title'] = sel.xpath('td/a/@title').extract()[0]

            except:
                item['title'] = empty
            
            item['media'] = empty
            
            try:
                item['url'] = self.allowed_domains[0] + sel.xpath('td/a/@href').extract()[0]
            except:
                item['url'] = empty
            try:
                item['description'] = sel.xpath('td[3]/text()').extract()[0]
            except:
                item['description'] = empty
            try:
                item['location'] = sel.xpath('td[2]/span/span/text()').extract()[0]
            except:
                item['location'] = empty
            
            item['latitude'] = empty
            item['longitude'] = empty
            
            try:
                price = sel.xpath('td[@style="text-align: right;"]/text()').extract()[0].split('/')
                item['price'] = price[0].encode('utf-8').strip('€')
                item['period'] = price[1]
                item['currency'] = "€"
            except:
                item['price'] = empty
                item['currency'] = empty
                item['period'] = empty
            
            yield item
Esempio n. 29
0
    def parse(self, response):
        for sel in response.xpath('//div[@class="community-events-results-item"]'):
            item = AdItem()
            empty = ''
            item['source'] = self.name
            item['category'] = self.category
            item['subcategory'] = self.subcategory

            try:
                item['title'] = sel.xpath('div[@class="community-events-results-right"]/h3/a/strong/text()').extract()[0]
            except: 
                item['title'] = empty

            try:    
                item['media'] = sel.xpath('div[@class="community-events-results-left"]/a/img/@src').extract()[0]

            except: 
                item['media'] = empty

            try:
                item['url'] = self.allowed_domains[0] + sel.xpath('div[@class="community-events-results-left"]/a/@href').extract()[0]
            except:
                item['url'] = empty
            
            try:        
                start = "De " + sel.xpath('div[@class="community-events-results-right"]/div[@class="eventTime"]/text()').extract()[0]
                end = "a " + sel.xpath('div[@class="community-events-results-right"]/div[@class="eventTime"]/text()[2]').extract()[0]

                item['description'] = start + ' ' + end
                item['period'] = item['description']
            except:
                item['description'], item['period'] = empty, empty

            try:
                item['location'] = xpath('div[@class="community-events-results-right"]/div[@class="eventLocation"]/text()').extract()[0]
            except:
                item['location'] = empty

            item['latitude'] = empty
            item['longitude'] = empty
            item['price'] = empty
            item['currency'] = empty
            yield item
Esempio n. 30
0
    def parse(self, response):
        for sel in response.xpath('//li[@data-element-id]'):
            item = AdItem()
            empty = ''
            item['source'] = self.name
            item['category'] = self.category
            item['subcategory'] = self.subcategory

            try:
                item['title'] = sel.xpath(
                    'div[2]/div[1]/h3/a/text()').extract()[0]
            except:
                item['title'] = empty

            try:
                item['media'] = sel.xpath('div[1]/@style').extract()[0].split(
                    '(')[1].split(')')[0].strip("'")
            except:
                item['media'] = empty

            try:
                item['url'] = self.allowed_domains[0] + sel.xpath(
                    'div[2]/div[1]/h3/a/@href').extract()[0]
            except:
                item['url'] = empty

            try:
                desc0 = sel.xpath('div[2]/div/ul[1]/li[1]/text()').extract()[0]
                desc1 = sel.xpath('div[2]/div/ul[1]/li[2]/text()').extract()[0]
                #desc2 = sel.xpath('div[2]/div/ul[2]/li/text()').extract()[0]
                item['description'] = desc0 + " " + desc1 + " "
            except:
                item['description'] = empty

            try:
                item['location'] = sel.xpath(
                    'div[2]/div[1]/h4/text()').extract()[0]
            except:
                item['location'] = empty

            item['postal_code'] = empty
            item['evaluations'] = empty

            url_city = response.url.split('?')[0].split('/')[-1]

            try:
                item['latitude'] = float(self.geo[url_city]['lat'])
            except:
                item['latitude'] = empty

            try:
                item['longitude'] = float(self.geo[url_city]['lon'])
            except:
                item['longitude'] = empty

            try:
                item['price'] = sel.xpath('div[2]/div[3]/p/text()').extract(
                )[0].strip('\n').encode('utf-8').strip('€')
                item['currency'] = "€"
            except:
                item['price'] = empty
                item['currency'] = empty

            try:
                item['period'] = sel.xpath(
                    'div[2]/div[3]/p[2]/text()').extract()[0]
            except:
                item['period'] = empty

            yield item