def parse(self, response): job = Job() job["website"] = self.meta["name"] job["url"] = response.url job["readvertised"] = "N/A" job["year"] = "2020" job["positionLevel"] = "N/A" job["positions"] = 1 job["jobTitle"] = response.xpath( '//div[@class="flex-full"]/h3[contains(@class, "mt-500")]/text()' ).get() location = response.xpath( '//div[contains(@class, "mb-500")]/text()').get() if location is not None: job["country"] = location.split(',')[-1].strip() job["town"] = location.split(',')[0].strip() job["company"] = response.xpath( '//div[contains(@class, "mb-500")]/a/text()').get() job["salary"] = response.xpath( '//div[@class="flex-full"]/p[1]/span[contains(text(), "Salary")]/following::span/text()' ).get() employmentType = response.xpath( '//div[@class="flex-full"]/p[1]/text()').getall() if len(employmentType) > 0: job["employmentType"] = employmentType[1].strip() job["jobType"] = employmentType[1].strip() divs = response.css('div.row-flex div.border-grey-sm *::text').getall() titles = response.xpath( '//h4/text() | //strong /text() | //b/text()').getall() self.get_description(titles, divs, job) return job
def parse(self, response): job = Job() job["ID"] = 1 job["website"] = self.meta["name"] job["url"] = response.url job["jobTitle"] = self.clean_text( response.css('header h1::text').get()) job["positionLevel"] = "N/A" job["positions"] = 1 job["readvertised"] = "N/A" job["year"] = "2020" job["company"] = self.clean_text(response.css('p.company::text').get()) job["technology"] = self.clean_text( response.css('header h1::text').get()) job["industry"] = self.clean_text( response.css('header h1::text').get()) job["country"] = "Kenya" self.get_header_details( response.css('header .details li *::text').getall(), job) self.get_tagged_details( response.css('header .tags li span::text').getall(), job) divs = response.css('.content *::text').getall() titles = response.xpath( '//h3/text() | //strong /text() | //b/text()').getall() self.get_description(titles, divs, job) return job
def parse(self, response): job = Job() job["ID"] = 1 job["website"] = self.meta["domain"] job["url"] = response.url job["jobTitle"] = response.css('selector::text').get() job["jobType"] = response.css('selector::text').get() job["positionLevel"] = response.css('selector::text').get() job["positions"] = response.css('selector::text').get() job["uploadDate"] = response.css('selector::text').get() job["year"] = response.css('selector::text').get() job["deadline"] = response.css('selector::text').get() job["town"] = response.css('selector::text').get() job["contact"] = response.css('selector::text').get() job["readvertised"] = response.css('selector::text').get() job["salary"] = response.css('selector::text').get() job["company"] = response.css('selector::text').get() job["technology"] = response.css('selector::text').get() job["description"] = response.css('selector::text').get() job["employmentType"] = response.css('selector::text').get() job["skills"] = response.css('selector::text').get() job["industry"] = response.css('selector::text').get() job["responsibilities"] = response.css('selector::text').get() job["requirements"] = response.css('selector::text').get() job["country"] = "Kenya" return job
def parse(self, response): job = Job() job["ID"] = 1 job["website"] = self.meta["name"] job["url"] = response.url job["jobTitle"] = response.css('#postJob h2::text').get() job["positions"] = 1 job["uploadDate"] = response.xpath( "//span[contains(text(), 'Posted')]/*/text()").get() job["year"] = "2020" job["deadline"] = "N/A" job["town"] = self.clean_text(' '.join( response.css('div.card-body div.pb-3 p::text').getall())) job["readvertised"] = "N/A" job["salary"] = "Login to view salary" job["country"] = "Kenya" self.get_header_details( response.css( '#job-description div.card-body div.pb-3 a *::text').getall(), job) divs = response.css('#job-description div.card-body *::text').getall() titles = response.xpath( '//h5/text() | //strong /text() | //b/text()').getall() self.get_description(titles, divs, job) return job
def parse(self, response): job = Job() job["ID"] = 1 job["website"] = self.meta["name"] job["url"] = response.url job["positionLevel"] = "N/A" job["positions"] = 1 job["readvertised"] = "N/A" job["country"] = "Kenya" titles = response.xpath( '//h1 /text() | //h2 /text() | //h3 /text() | //h4 /text() | //strong /text() | //b/text()' ).getall() divs = response.css('body *::text').getall() self.get_description(titles, divs, job) return job
def parse(self, response): job = Job() job["ID"] = 1 job["website"] = self.meta["name"] job["url"] = response.url job["jobTitle"] = response.css('h1.h2::text').get() job["company"] = response.xpath( "//h1[@class='h2']/following-sibling::h4/text()") job["positions"] = 1 job["readvertised"] = "N/A" job["country"] = "Kenya" titles = response.xpath( '//h1 /text() | //h4 /text() | //strong /text() | //b/text()' ).getall() divs = response.css('div.container *::text').getall() self.get_description(titles, divs, job) return job
def parse(self, response): job = Job() job["ID"] = 1 job["website"]= self.meta["name"] job["url"] = response.url job["jobTitle"] = response.css('article.post h2 a::text').get() job["jobType"] = "N/A" job["positions"] = 1 job["uploadDate"] = response.css('div#meta_authorl::text').get() job["readvertised"] = "N/A" job["company"] = response.css('article.post h2 a::text').get().split(" at ")[-1] job["employmentType"] = "N/A" job["country"] = "Kenya" titles = response.xpath('//h3/text() | //strong /text() | //b/text()').getall() divs = response.css('article.post *::text').getall() self.get_description(titles, divs, job) return job
def parse(self, response): job = Job() job["ID"]= 1 job["website"] = self.meta["name"] job["url"] = response.url title = self.clean_text(response.css('.b-advert-title-inner::text').get()) job["jobTitle"] = title job["positions"] = 1 job["description"] = title job["technology"] = title job["uploadDate"] = self.clean_text(response.css('time::text').get()) job["town"] = self.clean_text(response.css('.b-advert-info-statistics::text').get()) job["year"] = "2020" job["contact"] = "N/A" job["readvertised"] = "N/A" job["country"] = "Kenya" divs = response.css(".b-advert-attributes *::text").getall() self.get_details(divs, job) return job
def parse(self, response): job = Job() job["ID"] = 1 job["website"]= self.meta["name"] job["url"] = response.url job["jobTitle"] = response.css('header h1.title::text').get() job["positionLevel"] = response.css('header h1.title::text').get() job["positions"] = 1 job["uploadDate"] = response.css('time.time::attr(datetime)').get() job["deadline"] = response.css('selector::text').get() job["readvertised"] = "N/A" job["salary"] = "N/A" job["technology"] = response.css('header h1.title::text').get() job["country"] = "Kenya" self.get_header_details(response.css('article header span::text').getall(), job) titles = response.xpath('//h3/text() | //strong /text() | //b/text()').getall() divs = response.css('.content *::text').getall() self.get_description(titles, divs, job) return job
def parse(self, response): job = Job() job["ID"] = 1 job["website"] = self.meta["name"] job["url"] = response.url job["jobTitle"] = response.css('h1.entry-title::text').get().split( "–")[0] job["jobType"] = response.xpath( "//div[@class='single-job-meta']/ul/li[span[@class='job-meta-title']/text() = 'Job Type:']/span[@class='job-meta-content']/text()" ).get() job["positionLevel"] = response.xpath( "//div[@class='single-job-meta']/ul/li[span[@class='job-meta-title']/text() = 'Career Level:']/span[@class='job-meta-content']/text()" ).get() job["positions"] = 1 job["uploadDate"] = "N/A" job["year"] = response.xpath( "//div[@class='single-job-meta']/ul/li[span[@class='job-meta-title']/text() = 'Deadline:']/span[@class='job-meta-content']/text()" ).get().strip().split(" ")[-1] job["deadline"] = response.xpath( "//div[@class='single-job-meta']/ul/li[span[@class='job-meta-title']/text() = 'Deadline:']/span[@class='job-meta-content']/text()" ).get() job["town"] = response.xpath( "//div[@class='single-job-meta']/ul/li[span[@class='job-meta-title']/text() = 'Location:']/span[@class='job-meta-content']/text()" ).get() job["readvertised"] = "N/A" job["company"] = "N/A" job["technology"] = response.css('h1.entry-title::text').get().split( "–")[0] job["employmentType"] = response.xpath( "//div[@class='single-job-meta']/ul/li[span[@class='job-meta-title']/text() = 'Job Type:']/span[@class='job-meta-content']/text()" ).get() job["industry"] = response.css('h1.entry-title::text').get() job["country"] = "Kenya" titles = response.xpath('//strong /text() | //b/text()').getall() divs = response.css('div.entry-content *::text').getall() self.get_description(titles, divs, job) return job
def parse(self, response): job = Job() job["ID"] = 1 job["website"] = self.meta["name"] job["url"] = response.url job["jobTitle"] = self.clean_text( response.css('.job-header__title::text').get()) job["positions"] = 1 jobType = self.clean_text( response.css('.job-header__work-type::text').get()) job["jobType"] = jobType job["employmentType"] = jobType job["year"] = "2020" job["readvertised"] = "N/A" job["salary"] = self.clean_text( response.css('.job-header__salary::text').get()) job["country"] = "Kenya" self.get_town( response.css('.job-header__location *::text').getall(), job) self.get_company(response.css('h2 *::text').getall(), job) self.get_description( response.css('.job__details *::text').getall(), job) return job
def parse(self, response): job = Job() job["ID"] = 1 job["website"] = self.meta["name"] job["url"] = response.url jobTitle = self.clean_text(response.css('h1::text').get()) job["jobTitle"] = jobTitle job["technology"] = jobTitle job["industry"] = jobTitle job["positionLevel"] = "N/A" job["year"] = "2020" job["readvertised"] = "N/A" job["company"] = self.clean_text(response.css('h2::text').get()) job["skills"] = response.css('selector::text').get() job["responsibilities"] = "N/A" job["country"] = "Kenya" self.get_type(response.css('.box_r *::text').getall(), job) self.get_header_details( response.css('header span::text').getall(), job) self.get_description( response.css('.detalle_oferta ul *::text').getall(), job) return job
def parse(self, response): job = Job() job["ID"] = 1 job["website"]= self.meta["name"] job["url"] = response.url job["jobTitle"] = response.css('a.subjob-title::text').get() job["jobType"] = response.xpath("//ul[@class='job-key-info']/li[span[@class='jkey-title']/text() = 'Job Type']/span[@class='jkey-info']/a/text()").get() job["positionLevel"] = "N/A" job["positions"] = 1 job["uploadDate"] = response.css('div#posted-date::text').get() job["year"] = response.css('div#posted-date::text').get().strip().split(",")[-1] job["deadline"] = response.css('div.read-date-sec-li:nth-child(2)::text').get() job["town"] = response.xpath("//ul[@class='job-key-info']/li[span[@class='jkey-title']/text() = 'Location']/span[@class='jkey-info']/a/text()").get() job["readvertised"] = "N/A" job["company"] = response.css('li.job-industry a::text').get() job["technology"] = response.xpath("//ul[@class='job-key-info']/li[span[@class='jkey-title']/text() = 'Job Field']/span[@class='jkey-info']/a/text()").get() job["employmentType"] = response.xpath("//ul[@class='job-key-info']/li[span[@class='jkey-title']/text() = 'Job Type']/span[@class='jkey-info']/a/text()").get() job["industry"] = response.css('li.job-industry a::text').get() job["country"] = "Kenya" titles = response.xpath('//strong /text() | //b/text()').getall() divs = response.css('#job-description-holder *::text').getall() self.get_description(titles, divs, job) return job
def parse(self, response): job = Job() job["ID"] = 1 job["website"]= self.meta["name"] job["url"] = response.url job["jobTitle"] = response.xpath('//h1[@class="entry-title"]/text()').get() job["jobType"] = response.xpath('//span[@itemprop="employmentType"]/text()').get() job["positionLevel"] = response.xpath('//h1[@class="entry-title"]/text()').get() job["positions"] = 1 job["uploadDate"] = response.xpath('//span[@class="updated"]/text()').get() job["year"] = response.xpath('//span[@class="updated"]/text()').get().split(',')[-1].strip() job["deadline"] = "N/A" job["town"] = response.xpath('//span[@itemprop="address"]/text()').get() job["reavertised"] = "N/A" job["company"] = response.css('.wpjb-job-company::text').get() job["technology"] = response.xpath('//span[@itemprop="occupationalCategory"]/text()').get() job["employmentType"] = response.xpath('//span[@itemprop="employmentType"]/text()').get() job["industry"] = response.xpath('//span[@itemprop="occupationalCategory"]/text()').get() job["country"] = "Kenya" titles = response.xpath('//h3/text() | //strong /text() | //b/text()').getall() divs = response.css('.wpjb-job-content *::text').getall() self.get_description(titles, divs, job) return job
def parse(self, response): job = Job() jobContent = response.xpath( '//main[@class="content"]/article[contains(@class, "job_posting")]' ) jobDetails = jobContent.xpath('//div[@class="entry-content"]') jobMeta = response.xpath('//footer/p[@class="entry-meta"]') title = jobContent.xpath( '//header[@class="entry-header"]/h1[@class="entry-title"]/text()' ).get() uploadTime = jobContent.xpath( '//header[@class="entry-header"]/p/time/text()').get() if uploadTime: year = uploadTime.split(" ")[2] else: year = "N/A" jobType = jobMeta.xpath( '//span[contains(@class, "wsm-categories")]/a[1]/text()').get() description = ''.join( jobDetails.xpath( '//p/strong[contains(text(), "Title")]/ancestor::p/preceding-sibling::p/text()' ).getall()) salary = jobDetails.xpath( '//p/strong[contains(text(), "Gross Salary")]/ancestor::p/text()' ).get() if salary: salary = salary.strip() else: salary = "N/A" town = jobDetails.xpath( '//p/strong[contains(text(), "Location")]/ancestor::p/text()').get( ) if town is not None: town = town.strip() else: town = "N/A" skills = ''.join( jobDetails.xpath( '//p/span/strong[contains(text(), "Qualifications")]/ancestor::p/following-sibling::ul[1]/li/text()' ).getall()) responsibilities = ''.join( jobDetails.xpath( '//p/span/strong[contains(text(), "Responsibilities")]/ancestor::p/following-sibling::ul[1]/li/text()' ).getall()) contact = jobDetails.xpath( '//p/span/strong[contains(text(), "How to Apply")]/ancestor::p/span/strong[contains(text(), "@")]/text()' ).get() company = jobMeta.xpath('//span[last()]/a/text()').get() applicationDetails = jobDetails.xpath( '//p/span/strong[contains(text(), "How to Apply")]/ancestor::p/text()' ).getall() if applicationDetails: try: deadline = ''.join( [applicationDetails[-2], applicationDetails[-1]]).strip() except IndexError: deadline = "N/A" else: deadline = "N/A" industry = jobMeta.xpath( '//p/span[@class="entry-tags"]/a/text()').get() country = 'Kenya' requirements = 'N/A' positionLevel = 'N/A' technology = jobDetails.xpath('//p[3]/strong/text()').get() job["ID"] = 1 job["website"] = self.meta["name"] job["url"] = response.url job["jobTitle"] = title job["jobType"] = jobType job["positionLevel"] = "N/A" job["positions"] = 1 job["uploadDate"] = uploadTime job["year"] = year job["deadline"] = deadline job["town"] = town job["contact"] = contact job["readvertised"] = "NO" job["salary"] = salary job["company"] = company job["technology"] = technology job["description"] = description job["employmentType"] = jobType job["skills"] = skills job["industry"] = industry job["responsibilities"] = responsibilities job["requirements"] = requirements job["country"] = country return job
def parse(self, response): valid_position = lambda position: position if position != -1 else 0 job = Job() title = response.xpath('//div[@class="job-title"]/text()').get() date = response.xpath('//div[@class="job-date"]/text()').get() year = date.split('.')[-1].strip() company = response.xpath('//div[@class="job-company"]/text()').get() location = response.xpath('//div[@class="job-location"]/text()').get() town = location.split(',')[0].strip() country = location.split(',')[-1].strip() description = self.clean_text(''.join( response.xpath('//div[@class="job-desc"]/p/text()').getall())) sumPos = valid_position(description.find('JOB SUMMARY')) respPos = valid_position(description.find('RESPONSIBILITIES')) skillPos = valid_position(description.find('REQUIRED SKILLS')) eduPos = valid_position(description.find('REQUIRED EDUCATION')) try: jobDesc = response.xpath('substring("{}", {}, {})'.format( description, sumPos, (respPos - sumPos))).get() except: jobDesc = '' try: skills = response.xpath('substring("{}", {}, {})'.format( description, skillPos, (eduPos - skillPos))).get() except: skills = '' try: education = response.xpath('substring("{}", {})'.format( description, eduPos)).get() except: education = '' try: reponsibilities = response.xpath('substring("{}", {}, {})'.format( description, respPos, (skillPos - respPos))).get() except: reponsibilities = '' contact = response.xpath( '//div[@class="links"]/a[@class="view-job-link"]/@href').get() job["ID"] = 1 job["website"] = self.meta["name"] job["url"] = response.url job["jobTitle"] = title job["company"] = company job["jobType"] = "Fulltime" job["positionLevel"] = "N/A" job["uploadDate"] = date job["year"] = year job["deadline"] = "N/A" job["town"] = town job["country"] = country job["contact"] = contact job["readvertised"] = "N" job["technology"] = skills job["description"] = jobDesc job["employmentType"] = '' job["skills"] = skills job["industry"] = '' job["responsibilities"] = reponsibilities job["requirements"] = education job["country"] = country return job
def parse(self, response): job = Job() desc = response.xpath( '//*[contains(text(), "Description")]/following::p[1]/text() | //div[@id="carrerbox"]/descendant-or-self::div[@class="wpb_wrapper"]/p/strong/span[contains(text(), "Job description")]/following::p[1]/text()' ).get() resp = ''.join( response.xpath( '//*[contains(text(), "Responsibilities")]/following::ul[1]/li/text() | //div[@class="jobs-subheading"]/span/strong[contains(text(), "Responsibilities")]/ancestor::div/following-sibling::div/ul[1]/li/div/p/text()' ).getall()) qualifications = ''.join( response.xpath( '//*[contains(text(), "Responsibilities")]/following::ul[2]/li/text() | //div[@class="jobs-subheading"]/span/strong[contains(text(), "Responsibilities")]/ancestor::div/following-sibling::div/ul[1]/li/div/p/text()' ).getall()) if qualifications.find("Degree") == -1: education = "N/A" else: education = qualifications[qualifications. find("Degree"):qualifications.find(".")] contact = response.xpath( '//p/strong[contains(text(), "@")]/text() | //p/span/strong[contains(text(), "How to Apply")]/following::p[1]/span/a/@href' ).get() title = response.xpath( '//h1[contains(@class, "entry-title")]/text()').get() position = response.xpath( '//div[@class="wpb_wrapper"]/p[contains(text(), "Position")]/text()' ).get() if position is not None: position = position.split(':')[-1].strip() location = response.xpath( '//div[@class="wpb_wrapper"]/p[contains(text(), "Location")]/text()' ).get() if location is not None: location = location.split(':')[-1].strip() industry = response.xpath('//div[@class="row"]/p[1]/text()').get() job["ID"] = 1 job["website"] = self.meta["name"] job["url"] = response.url job["jobTitle"] = title job["company"] = title.split('Job')[-1] job["jobType"] = "Fulltime" job["positionLevel"] = position job["uploadDate"] = "N/A" job["year"] = "2020" job["deadline"] = "N/A" job["town"] = location job["country"] = "Kenya" job["contact"] = contact job["readvertised"] = "N/A" job["technology"] = resp job["description"] = desc job["employmentType"] = "Fulltime" job["skills"] = qualifications job["industry"] = industry job["responsibilities"] = resp job["requirements"] = education job["country"] = "Kenya" return job