def parse_job_detail(self, response): hxs = HtmlXPathSelector(response) title = hxs.select("//div[@id='businessmyaccountjobadpreviewmain']/div/font/text()").extract_unquoted() company = hxs.select("//li[@class='propertiesleft' and contains(text(),'Podjetje:')]/following-sibling::li/text()").extract_unquoted() if title and company: city = hxs.select("//li[@class='propertiesleft' and contains(text(),'Regija in kraj dela:')]/following-sibling::li/text()").extract_unquoted() category = hxs.select(u"//li[@class='propertiesleft2' and contains(text(),'Področje dela:')]/following-sibling::li/text()".encode('utf-8')).extract_unquoted() images_url = hxs.select("//img[@id='mainimage']/@src").extract() item=JobItem() if images_url: item.load_image(self.get_base_url(response, images_url[0])) loader = JobLoader(item) loader.add_value('title', title) loader.add_value('company', company) loader.add_value('category', category) loader.add_value('city', city) loader.add_value('details_url', url_query_cleaner(response.url)) loader.add_value('published_date', hxs.select("//li[@class='dates']/text()").re(r".*:\s+(.*)")) loader.add_value('id', self.generate_id(response.url)) loader.add_value('content', response.body_as_unicode()) loader.add_value('source', self.name) loader.add_value('source_label', self.label) yield loader.load_item()
def parse_job_detail(self, response): hxs = HtmlXPathSelector(response) title = hxs.select("//span[@class='header']/text()").extract_unquoted() company = hxs.select("//span[@class='fontblacklarge']/b/text()").extract_unquoted() if title and company: city = hxs.select("//span[@class='fontblacklarge']/b[2]/text()").extract_unquoted() category = response.request.meta['category'] published_date = hxs.select("//span[@class='fontblacklarge']/../../following-sibling::tr[2]/td/b/text()").extract_unquoted() item=JobItem() images_url = hxs.select("//span[@class='fontblacklarge']/../following-sibling::td[2]/img/@src").extract() if images_url: item.load_image(self.get_base_url(response, images_url[0])) loader = JobLoader(item) loader.add_value('title', title) loader.add_value('company', company) loader.add_value('category', category) loader.add_value('city', city) loader.add_value('details_url', url_query_cleaner(response.url, ('najdi', 'id'))) loader.add_value('published_date', published_date) loader.add_value('id', self.generate_id(response.url, ('najdi', 'id'))) loader.add_value('content', response.body_as_unicode()) loader.add_value('source', self.name) loader.add_value('source_label', self.label) yield loader.load_item()
def parse_job_detail(self, response): loader = JobLoader(JobItem()) loader.add_item(response.request.meta['item']) loader.add_value('id', self.generate_id(response.url)) loader.add_value('source', self.name) loader.add_value('source_label', self.label) loader.add_value('content', response.body_as_unicode()) yield loader.load_item()
def parse_job_detail(self, response): hxs = HtmlXPathSelector(response) loader = JobLoader(JobItem()) loader.add_item(response.request.meta['item']) loader.add_value('id', self.generate_id(response.url, ('IDEPD'))) loader.add_value('source', self.name) loader.add_value('source_label', self.label) loader.add_value('summary', hxs.select("//div[@class='cc-gv']//tr/td[contains(text(),'Opis del in nalog')]/following-sibling::td[1]/text()").extract_unquoted()) loader.add_value('content', response.body_as_unicode()) yield loader.load_item()