def get_job_advert(self, _id, advert): if advert is None: advert = BaseJobAdvert(_id) items = self.document.getroot().xpath("//div[@id='divrecueil']")[0] keep_next = False for item in items: if keep_next: if item.tag == 'div' and item.attrib['id'] == u'offre': first_div = self.parser.select(item, 'div/span', 2, method='xpath') advert.society_name = u'CCI %s' % first_div[0].text_content() advert.job_name = u'%s' % first_div[1].text_content() second_div = self.parser.select(item, 'div/fieldset', 2, method='xpath') ps_1 = self.parser.select(second_div[0], 'p[@class="normal"]', method='xpath') h2s_1 = self.parser.select(second_div[0], 'h2[@class="titreParagraphe"]', method='xpath') description = "" if len(ps_1) == 5 and len(h2s_1) == 5: for i in range(0, 5): description += "\r\n-- %s --\r\n" % h2s_1[i].text description += "%s\r\n" % ps_1[i].text_content() advert.description = description advert.url = self.url + '#' + advert.id date = self.parser.select(item, 'div/fieldset/p[@class="dateOffre"]', 1, method='xpath') advert.publication_date = dateutil.parser.parse(date.text_content()).date() break if item.tag == 'a' and u'%s' % item.attrib['name'] == u'%s' % _id: keep_next = True return advert
def get_job_advert(self, _id, advert): if advert is None: advert = BaseJobAdvert(_id) items = self.document.getroot().xpath("//div[@id='divrecueil']")[0] keep_next = False for item in items: if keep_next: if item.tag == 'div' and item.attrib['id'] == u'offre': first_div = self.parser.select(item, 'div/span', 2, method='xpath') advert.society_name = u'CCI %s' % first_div[ 0].text_content() advert.job_name = u'%s' % first_div[1].text_content() second_div = self.parser.select(item, 'div/fieldset', 2, method='xpath') description = "" ps_1 = self.parser.select(second_div[0], 'p[@class="normal"]', method='xpath') h2s_1 = self.parser.select(second_div[0], 'h2[@class="titreParagraphe"]', method='xpath') if len(ps_1) == 5 and len(h2s_1) == 5: for i in range(0, 5): description += "\r\n-- %s --\r\n" % h2s_1[i].text description += "%s\r\n" % ps_1[i].text_content() ps_2 = self.parser.select(second_div[1], 'p[@class="normal"]', method='xpath') h2s_2 = self.parser.select(second_div[1], 'h2[@class="titreParagraphe"]', method='xpath') if len(ps_2) == 3 and len(h2s_2) == 2: description += "\r\n-- %s --\r\n" % h2s_2[0].text a = self.parser.select(ps_2[0], 'a', 1, method='xpath') description += "%s\r\n" % a.text_content() description += "\r\n-- %s --\r\n" % h2s_2[1].text description += "%s\r\n" % ps_2[1].text_content() description += "%s\r\n" % ps_2[2].text_content() advert.description = description advert.url = self.url + '#' + advert.id date = self.parser.select( item, 'div/fieldset/p[@class="dateOffre"]', 1, method='xpath') advert.publication_date = dateutil.parser.parse( date.text_content()).date() break if item.tag == 'a' and u'%s' % item.attrib['name'] == u'%s' % _id: keep_next = True return advert