Example #1
0
    def parse(self, response):
    
        title = response.css('title::text').extract_first().encode('utf-8')
        
        try:
            subtitle = response.xpath('//*[@class="detail-title_subtitle"]/text()').extract_first().encode('utf-8')
        except:
            subtitle = ""
        
        try:
            price = response.xpath('//*[@id="price"]/text()').extract_first().encode('utf-8')
        except:
            price = ""
        
        try:            
            description = response.xpath('//input[(@type="hidden") and (@name="description")]/@value').extract_first().encode('utf-8')
        except:
            description = ""
            
        photo = response.xpath('//input[(@type="hidden") and (@name="urlphoto")]/@value').extract_first()

        try:          
            ville = response.xpath('//input[(@type="hidden") and (@name="ville")]/@value').extract_first().encode('utf-8')
        except:
            ville = ""
        
        try:          
            codepostal = response.xpath('//input[(@type="hidden") and (@name="codepostal")]/@value').extract_first().encode('utf-8')
        except:
            codepostal = ""
        
        try:
            typebien = response.xpath('//input[(@type="hidden") and (@name="typebien")]/@value').extract_first().encode('utf-8')
        except:    
            typebien = ""
                
        try:
            surface = response.xpath('//input[(@type="hidden") and (@name="surface")]/@value').extract_first().encode('utf-8')
        except:
            surface = ""

        try:
            northeastLatitude = response.xpath('//div[@id="resume__map_new"]/@data-boudingbox-northeast-latitude').extract_first().encode('utf-8')
            northeastLongitude = response.xpath('//div[@id="resume__map_new"]/@data-boudingbox-northeast-longitude').extract_first().encode('utf-8')
            southwestLatitude = response.xpath('//div[@id="resume__map_new"]/@data-boudingbox-southwest-latitude').extract_first().encode('utf-8')
            southwestLongitude = response.xpath('//div[@id="resume__map_new"]/@data-boudingbox-southwest-longitude').extract_first().encode('utf-8')        
            
            lat = (float(northeastLatitude) + float(southwestLatitude))/2
            lon = (float(northeastLongitude) + float(southwestLongitude))/2
        except:
            lat=0
            lon=0
            
        rubedo.insertContent(title, subtitle, price,description, photo, ville, codepostal, typebien, surface, lat, lon)
Example #2
0
 def parse(self, response):
 
     item_index = response.meta['index']
     originalUrl = response.request.meta['redirect_urls'][0]
     m = re.search(r'\d+',originalUrl)
     if m:
         contentId = m.group(0)
     else:
         contentId = None
     if contentId is not None:
         title = response.css('h1::text').extract_first()
         chapeau = title
         content = response.xpath('//*[@id="content"]')
         content = content.xpath('*[not(self::form or ancestor::form)]')
         content = content.xpath('*[not(@id="outils" or ancestor::div/@id="outils")]')
         texte = "".join(content.extract())
         visuel = response.xpath('//img[contains(@src, "arton")]/@src').extract_first()
         
         rubedo.insertContent(contentId, title, title, texte, visuel, self.type[item_index], self.taxo[item_index], self.workspaces[item_index])
     else:
         print(originalUrl)
         pass