Example #1
0
 def parse_job_detail(self, response):
     hxs = HtmlXPathSelector(response)
     
     title = hxs.select("//div[@id='businessmyaccountjobadpreviewmain']/div/font/text()").extract_unquoted()
     company = hxs.select("//li[@class='propertiesleft' and contains(text(),'Podjetje:')]/following-sibling::li/text()").extract_unquoted()
     
     if title and company:
         city = hxs.select("//li[@class='propertiesleft' and contains(text(),'Regija in kraj dela:')]/following-sibling::li/text()").extract_unquoted()
         category = hxs.select(u"//li[@class='propertiesleft2' and contains(text(),'Področje dela:')]/following-sibling::li/text()".encode('utf-8')).extract_unquoted()
         images_url = hxs.select("//img[@id='mainimage']/@src").extract()
         item=JobItem()
         
         if images_url:
             item.load_image(self.get_base_url(response, images_url[0]))
     
         loader = JobLoader(item)
         loader.add_value('title', title)
         loader.add_value('company', company)
         loader.add_value('category', category)
         loader.add_value('city', city)
         loader.add_value('details_url', url_query_cleaner(response.url))
         loader.add_value('published_date', hxs.select("//li[@class='dates']/text()").re(r".*:\s+(.*)"))
         loader.add_value('id', self.generate_id(response.url))
         loader.add_value('content', response.body_as_unicode())
         loader.add_value('source', self.name)
         loader.add_value('source_label', self.label)
         
         yield loader.load_item()
Example #2
0
    def parse_job_detail(self, response):
        hxs = HtmlXPathSelector(response)

        loader = JobLoader(JobItem())
        loader.add_item(response.request.meta['item'])
        loader.add_value('id', self.generate_id(response.url, ('IDEPD')))
        loader.add_value('source', self.name)
        loader.add_value('source_label', self.label)
        loader.add_value('summary', hxs.select("//div[@class='cc-gv']//tr/td[contains(text(),'Opis del in nalog')]/following-sibling::td[1]/text()").extract_unquoted())
        loader.add_value('content', response.body_as_unicode())

        yield loader.load_item()
Example #3
0
    def parse_job_detail(self, response):       
        loader = JobLoader(JobItem())
        loader.add_item(response.request.meta['item'])
        loader.add_value('id', self.generate_id(response.url))
        loader.add_value('source', self.name)
        loader.add_value('source_label', self.label)
        loader.add_value('content', response.body_as_unicode())

        yield loader.load_item()
Example #4
0
 def parse_job_detail(self, response):        
     hxs = HtmlXPathSelector(response)
     
     title = hxs.select("//span[@class='header']/text()").extract_unquoted()       
     company = hxs.select("//span[@class='fontblacklarge']/b/text()").extract_unquoted()   
          
     if title and company: 
         city = hxs.select("//span[@class='fontblacklarge']/b[2]/text()").extract_unquoted()
         category = response.request.meta['category']        
         published_date = hxs.select("//span[@class='fontblacklarge']/../../following-sibling::tr[2]/td/b/text()").extract_unquoted()
         
         item=JobItem()
         images_url = hxs.select("//span[@class='fontblacklarge']/../following-sibling::td[2]/img/@src").extract()        
         
         if images_url:
             item.load_image(self.get_base_url(response, images_url[0]))
         
         loader = JobLoader(item)
         loader.add_value('title', title)
         loader.add_value('company', company)
         loader.add_value('category', category)
         loader.add_value('city', city)
         loader.add_value('details_url', url_query_cleaner(response.url, ('najdi', 'id')))
         loader.add_value('published_date', published_date)
         loader.add_value('id', self.generate_id(response.url, ('najdi', 'id')))
         loader.add_value('content', response.body_as_unicode())
         loader.add_value('source', self.name)
         loader.add_value('source_label', self.label)
         
         yield loader.load_item()