コード例 #1
0
ファイル: glassdoor.py プロジェクト: JerryNyoike/jobscrape
    def parse(self, response):
        job = Job()
        job["website"] = self.meta["name"]
        job["url"] = response.url
        job["readvertised"] = "N/A"
        job["year"] = "2020"
        job["positionLevel"] = "N/A"
        job["positions"] = 1
        job["jobTitle"] = response.xpath(
            '//div[@class="flex-full"]/h3[contains(@class, "mt-500")]/text()'
        ).get()
        location = response.xpath(
            '//div[contains(@class, "mb-500")]/text()').get()
        if location is not None:
            job["country"] = location.split(',')[-1].strip()
            job["town"] = location.split(',')[0].strip()
        job["company"] = response.xpath(
            '//div[contains(@class, "mb-500")]/a/text()').get()
        job["salary"] = response.xpath(
            '//div[@class="flex-full"]/p[1]/span[contains(text(), "Salary")]/following::span/text()'
        ).get()
        employmentType = response.xpath(
            '//div[@class="flex-full"]/p[1]/text()').getall()
        if len(employmentType) > 0:
            job["employmentType"] = employmentType[1].strip()
            job["jobType"] = employmentType[1].strip()

        divs = response.css('div.row-flex div.border-grey-sm *::text').getall()
        titles = response.xpath(
            '//h4/text() | //strong /text() | //b/text()').getall()
        self.get_description(titles, divs, job)

        return job
コード例 #2
0
    def parse(self, response):
        job = Job()
        job["ID"] = 1
        job["website"] = self.meta["name"]
        job["url"] = response.url
        job["jobTitle"] = self.clean_text(
            response.css('header h1::text').get())
        job["positionLevel"] = "N/A"
        job["positions"] = 1
        job["readvertised"] = "N/A"
        job["year"] = "2020"
        job["company"] = self.clean_text(response.css('p.company::text').get())
        job["technology"] = self.clean_text(
            response.css('header h1::text').get())
        job["industry"] = self.clean_text(
            response.css('header h1::text').get())
        job["country"] = "Kenya"
        self.get_header_details(
            response.css('header .details li *::text').getall(), job)
        self.get_tagged_details(
            response.css('header .tags li span::text').getall(), job)

        divs = response.css('.content *::text').getall()
        titles = response.xpath(
            '//h3/text() | //strong /text() | //b/text()').getall()
        self.get_description(titles, divs, job)
        return job
コード例 #3
0
 def parse(self, response):
     job = Job()
     job["ID"] = 1
     job["website"] = self.meta["domain"]
     job["url"] = response.url
     job["jobTitle"] = response.css('selector::text').get()
     job["jobType"] = response.css('selector::text').get()
     job["positionLevel"] = response.css('selector::text').get()
     job["positions"] = response.css('selector::text').get()
     job["uploadDate"] = response.css('selector::text').get()
     job["year"] = response.css('selector::text').get()
     job["deadline"] = response.css('selector::text').get()
     job["town"] = response.css('selector::text').get()
     job["contact"] = response.css('selector::text').get()
     job["readvertised"] = response.css('selector::text').get()
     job["salary"] = response.css('selector::text').get()
     job["company"] = response.css('selector::text').get()
     job["technology"] = response.css('selector::text').get()
     job["description"] = response.css('selector::text').get()
     job["employmentType"] = response.css('selector::text').get()
     job["skills"] = response.css('selector::text').get()
     job["industry"] = response.css('selector::text').get()
     job["responsibilities"] = response.css('selector::text').get()
     job["requirements"] = response.css('selector::text').get()
     job["country"] = "Kenya"
     return job
コード例 #4
0
ファイル: emploi.py プロジェクト: JerryNyoike/jobscrape
    def parse(self, response):
        job = Job()
        job["ID"] = 1
        job["website"] = self.meta["name"]
        job["url"] = response.url
        job["jobTitle"] = response.css('#postJob h2::text').get()
        job["positions"] = 1
        job["uploadDate"] = response.xpath(
            "//span[contains(text(), 'Posted')]/*/text()").get()
        job["year"] = "2020"
        job["deadline"] = "N/A"
        job["town"] = self.clean_text(' '.join(
            response.css('div.card-body div.pb-3 p::text').getall()))
        job["readvertised"] = "N/A"
        job["salary"] = "Login to view salary"
        job["country"] = "Kenya"

        self.get_header_details(
            response.css(
                '#job-description div.card-body div.pb-3 a *::text').getall(),
            job)

        divs = response.css('#job-description div.card-body *::text').getall()
        titles = response.xpath(
            '//h5/text() | //strong /text() | //b/text()').getall()
        self.get_description(titles, divs, job)
        return job
コード例 #5
0
    def parse(self, response):
        job = Job()
        job["ID"] = 1
        job["website"] = self.meta["name"]
        job["url"] = response.url
        job["positionLevel"] = "N/A"
        job["positions"] = 1
        job["readvertised"] = "N/A"
        job["country"] = "Kenya"

        titles = response.xpath(
            '//h1 /text() | //h2 /text() | //h3 /text() | //h4 /text() | //strong /text() | //b/text()'
        ).getall()
        divs = response.css('body *::text').getall()
        self.get_description(titles, divs, job)
        return job
コード例 #6
0
    def parse(self, response):
        job = Job()
        job["ID"] = 1
        job["website"] = self.meta["name"]
        job["url"] = response.url
        job["jobTitle"] = response.css('h1.h2::text').get()
        job["company"] = response.xpath(
            "//h1[@class='h2']/following-sibling::h4/text()")
        job["positions"] = 1
        job["readvertised"] = "N/A"
        job["country"] = "Kenya"

        titles = response.xpath(
            '//h1 /text() | //h4 /text() | //strong /text() | //b/text()'
        ).getall()
        divs = response.css('div.container *::text').getall()
        self.get_description(titles, divs, job)
        return job
コード例 #7
0
	def parse(self, response):
		job = Job()
		job["ID"] = 1
		job["website"]= self.meta["name"]
		job["url"] = response.url
		job["jobTitle"] = response.css('article.post h2 a::text').get()
		job["jobType"] = "N/A"
		job["positions"] = 1
		job["uploadDate"] = response.css('div#meta_authorl::text').get()
		job["readvertised"] = "N/A"
		job["company"] = response.css('article.post h2 a::text').get().split(" at ")[-1]
		job["employmentType"] = "N/A"
		job["country"] = "Kenya"

		titles = response.xpath('//h3/text() | //strong /text() | //b/text()').getall()
		divs = response.css('article.post *::text').getall()
		self.get_description(titles, divs, job)
		return job	
コード例 #8
0
	def parse(self, response):
		job = Job()
		job["ID"]= 1
		job["website"] = self.meta["name"]
		job["url"] = response.url
		title = self.clean_text(response.css('.b-advert-title-inner::text').get())
		job["jobTitle"] = title
		job["positions"] = 1
		job["description"] = title
		job["technology"] = title
		job["uploadDate"] = self.clean_text(response.css('time::text').get())
		job["town"] = self.clean_text(response.css('.b-advert-info-statistics::text').get())
		job["year"] = "2020"
		job["contact"] = "N/A"
		job["readvertised"] = "N/A"
		job["country"] = "Kenya"
		divs = response.css(".b-advert-attributes *::text").getall()
		self.get_details(divs, job)
		return job
コード例 #9
0
	def parse(self, response):
		job = Job()
		job["ID"] = 1
		job["website"]= self.meta["name"]
		job["url"] = response.url
		job["jobTitle"] = response.css('header h1.title::text').get()
		job["positionLevel"] = response.css('header h1.title::text').get()
		job["positions"] = 1
		job["uploadDate"] = response.css('time.time::attr(datetime)').get()
		job["deadline"] = response.css('selector::text').get()
		job["readvertised"] = "N/A"
		job["salary"] = "N/A"
		job["technology"] = response.css('header h1.title::text').get()
		
		job["country"] = "Kenya"
		self.get_header_details(response.css('article header span::text').getall(), job)

		titles = response.xpath('//h3/text() | //strong /text() | //b/text()').getall()
		divs = response.css('.content *::text').getall()
		self.get_description(titles, divs, job)
		return job	
コード例 #10
0
    def parse(self, response):
        job = Job()
        job["ID"] = 1
        job["website"] = self.meta["name"]
        job["url"] = response.url
        job["jobTitle"] = response.css('h1.entry-title::text').get().split(
            "–")[0]
        job["jobType"] = response.xpath(
            "//div[@class='single-job-meta']/ul/li[span[@class='job-meta-title']/text() = 'Job Type:']/span[@class='job-meta-content']/text()"
        ).get()
        job["positionLevel"] = response.xpath(
            "//div[@class='single-job-meta']/ul/li[span[@class='job-meta-title']/text() = 'Career Level:']/span[@class='job-meta-content']/text()"
        ).get()
        job["positions"] = 1
        job["uploadDate"] = "N/A"
        job["year"] = response.xpath(
            "//div[@class='single-job-meta']/ul/li[span[@class='job-meta-title']/text() = 'Deadline:']/span[@class='job-meta-content']/text()"
        ).get().strip().split(" ")[-1]
        job["deadline"] = response.xpath(
            "//div[@class='single-job-meta']/ul/li[span[@class='job-meta-title']/text() = 'Deadline:']/span[@class='job-meta-content']/text()"
        ).get()
        job["town"] = response.xpath(
            "//div[@class='single-job-meta']/ul/li[span[@class='job-meta-title']/text() = 'Location:']/span[@class='job-meta-content']/text()"
        ).get()
        job["readvertised"] = "N/A"
        job["company"] = "N/A"
        job["technology"] = response.css('h1.entry-title::text').get().split(
            "–")[0]
        job["employmentType"] = response.xpath(
            "//div[@class='single-job-meta']/ul/li[span[@class='job-meta-title']/text() = 'Job Type:']/span[@class='job-meta-content']/text()"
        ).get()
        job["industry"] = response.css('h1.entry-title::text').get()
        job["country"] = "Kenya"

        titles = response.xpath('//strong /text() | //b/text()').getall()
        divs = response.css('div.entry-content *::text').getall()
        self.get_description(titles, divs, job)
        return job
コード例 #11
0
 def parse(self, response):
     job = Job()
     job["ID"] = 1
     job["website"] = self.meta["name"]
     job["url"] = response.url
     job["jobTitle"] = self.clean_text(
         response.css('.job-header__title::text').get())
     job["positions"] = 1
     jobType = self.clean_text(
         response.css('.job-header__work-type::text').get())
     job["jobType"] = jobType
     job["employmentType"] = jobType
     job["year"] = "2020"
     job["readvertised"] = "N/A"
     job["salary"] = self.clean_text(
         response.css('.job-header__salary::text').get())
     job["country"] = "Kenya"
     self.get_town(
         response.css('.job-header__location *::text').getall(), job)
     self.get_company(response.css('h2 *::text').getall(), job)
     self.get_description(
         response.css('.job__details *::text').getall(), job)
     return job
コード例 #12
0
ファイル: bestjobs.py プロジェクト: JerryNyoike/jobscrape
    def parse(self, response):
        job = Job()
        job["ID"] = 1
        job["website"] = self.meta["name"]
        job["url"] = response.url
        jobTitle = self.clean_text(response.css('h1::text').get())
        job["jobTitle"] = jobTitle
        job["technology"] = jobTitle
        job["industry"] = jobTitle
        job["positionLevel"] = "N/A"
        job["year"] = "2020"
        job["readvertised"] = "N/A"
        job["company"] = self.clean_text(response.css('h2::text').get())
        job["skills"] = response.css('selector::text').get()
        job["responsibilities"] = "N/A"
        job["country"] = "Kenya"

        self.get_type(response.css('.box_r *::text').getall(), job)
        self.get_header_details(
            response.css('header span::text').getall(), job)
        self.get_description(
            response.css('.detalle_oferta ul *::text').getall(), job)
        return job
コード例 #13
0
	def parse(self, response):
		job = Job()
		job["ID"] = 1
		job["website"]= self.meta["name"]
		job["url"] = response.url
		job["jobTitle"] = response.css('a.subjob-title::text').get()
		job["jobType"] = response.xpath("//ul[@class='job-key-info']/li[span[@class='jkey-title']/text() = 'Job Type']/span[@class='jkey-info']/a/text()").get()
		job["positionLevel"] = "N/A"
		job["positions"] = 1
		job["uploadDate"] = response.css('div#posted-date::text').get()
		job["year"] = response.css('div#posted-date::text').get().strip().split(",")[-1]
		job["deadline"] = response.css('div.read-date-sec-li:nth-child(2)::text').get()
		job["town"] = response.xpath("//ul[@class='job-key-info']/li[span[@class='jkey-title']/text() = 'Location']/span[@class='jkey-info']/a/text()").get()
		job["readvertised"] = "N/A"
		job["company"] = response.css('li.job-industry a::text').get()
		job["technology"] = response.xpath("//ul[@class='job-key-info']/li[span[@class='jkey-title']/text() = 'Job Field']/span[@class='jkey-info']/a/text()").get()
		job["employmentType"] = response.xpath("//ul[@class='job-key-info']/li[span[@class='jkey-title']/text() = 'Job Type']/span[@class='jkey-info']/a/text()").get()
		job["industry"] = response.css('li.job-industry a::text').get()
		job["country"] = "Kenya"

		titles = response.xpath('//strong /text() | //b/text()').getall()
		divs = response.css('#job-description-holder *::text').getall()
		self.get_description(titles, divs, job)
		return job	
コード例 #14
0
ファイル: newjobs.py プロジェクト: JerryNyoike/jobscrape
	def parse(self, response):
		job = Job()
		job["ID"] = 1
		job["website"]= self.meta["name"]
		job["url"] = response.url
		job["jobTitle"] = response.xpath('//h1[@class="entry-title"]/text()').get()
		job["jobType"] = response.xpath('//span[@itemprop="employmentType"]/text()').get()
		job["positionLevel"] = response.xpath('//h1[@class="entry-title"]/text()').get()
		job["positions"] = 1
		job["uploadDate"] = response.xpath('//span[@class="updated"]/text()').get()
		job["year"] = response.xpath('//span[@class="updated"]/text()').get().split(',')[-1].strip()
		job["deadline"] = "N/A"
		job["town"] = response.xpath('//span[@itemprop="address"]/text()').get()
		job["reavertised"] = "N/A"
		job["company"] = response.css('.wpjb-job-company::text').get()
		job["technology"] = response.xpath('//span[@itemprop="occupationalCategory"]/text()').get()
		job["employmentType"] = response.xpath('//span[@itemprop="employmentType"]/text()').get()
		job["industry"] = response.xpath('//span[@itemprop="occupationalCategory"]/text()').get()
		job["country"] = "Kenya"
		
		titles = response.xpath('//h3/text() | //strong /text() | //b/text()').getall()
		divs = response.css('.wpjb-job-content *::text').getall()
		self.get_description(titles, divs, job)
		return job	
コード例 #15
0
    def parse(self, response):
        job = Job()

        jobContent = response.xpath(
            '//main[@class="content"]/article[contains(@class, "job_posting")]'
        )
        jobDetails = jobContent.xpath('//div[@class="entry-content"]')
        jobMeta = response.xpath('//footer/p[@class="entry-meta"]')
        title = jobContent.xpath(
            '//header[@class="entry-header"]/h1[@class="entry-title"]/text()'
        ).get()
        uploadTime = jobContent.xpath(
            '//header[@class="entry-header"]/p/time/text()').get()
        if uploadTime:
            year = uploadTime.split(" ")[2]
        else:
            year = "N/A"
        jobType = jobMeta.xpath(
            '//span[contains(@class, "wsm-categories")]/a[1]/text()').get()

        description = ''.join(
            jobDetails.xpath(
                '//p/strong[contains(text(), "Title")]/ancestor::p/preceding-sibling::p/text()'
            ).getall())
        salary = jobDetails.xpath(
            '//p/strong[contains(text(), "Gross Salary")]/ancestor::p/text()'
        ).get()
        if salary:
            salary = salary.strip()
        else:
            salary = "N/A"

        town = jobDetails.xpath(
            '//p/strong[contains(text(), "Location")]/ancestor::p/text()').get(
            )
        if town is not None:
            town = town.strip()
        else:
            town = "N/A"

        skills = ''.join(
            jobDetails.xpath(
                '//p/span/strong[contains(text(), "Qualifications")]/ancestor::p/following-sibling::ul[1]/li/text()'
            ).getall())
        responsibilities = ''.join(
            jobDetails.xpath(
                '//p/span/strong[contains(text(), "Responsibilities")]/ancestor::p/following-sibling::ul[1]/li/text()'
            ).getall())
        contact = jobDetails.xpath(
            '//p/span/strong[contains(text(), "How to Apply")]/ancestor::p/span/strong[contains(text(), "@")]/text()'
        ).get()
        company = jobMeta.xpath('//span[last()]/a/text()').get()
        applicationDetails = jobDetails.xpath(
            '//p/span/strong[contains(text(), "How to Apply")]/ancestor::p/text()'
        ).getall()
        if applicationDetails:
            try:
                deadline = ''.join(
                    [applicationDetails[-2], applicationDetails[-1]]).strip()
            except IndexError:
                deadline = "N/A"
        else:
            deadline = "N/A"
        industry = jobMeta.xpath(
            '//p/span[@class="entry-tags"]/a/text()').get()
        country = 'Kenya'
        requirements = 'N/A'
        positionLevel = 'N/A'
        technology = jobDetails.xpath('//p[3]/strong/text()').get()

        job["ID"] = 1
        job["website"] = self.meta["name"]
        job["url"] = response.url
        job["jobTitle"] = title
        job["jobType"] = jobType
        job["positionLevel"] = "N/A"
        job["positions"] = 1
        job["uploadDate"] = uploadTime
        job["year"] = year
        job["deadline"] = deadline
        job["town"] = town
        job["contact"] = contact
        job["readvertised"] = "NO"
        job["salary"] = salary
        job["company"] = company
        job["technology"] = technology
        job["description"] = description
        job["employmentType"] = jobType
        job["skills"] = skills
        job["industry"] = industry
        job["responsibilities"] = responsibilities
        job["requirements"] = requirements
        job["country"] = country

        return job
コード例 #16
0
ファイル: jik.py プロジェクト: JerryNyoike/jobscrape
    def parse(self, response):

        valid_position = lambda position: position if position != -1 else 0

        job = Job()

        title = response.xpath('//div[@class="job-title"]/text()').get()
        date = response.xpath('//div[@class="job-date"]/text()').get()
        year = date.split('.')[-1].strip()
        company = response.xpath('//div[@class="job-company"]/text()').get()
        location = response.xpath('//div[@class="job-location"]/text()').get()
        town = location.split(',')[0].strip()
        country = location.split(',')[-1].strip()
        description = self.clean_text(''.join(
            response.xpath('//div[@class="job-desc"]/p/text()').getall()))
        sumPos = valid_position(description.find('JOB SUMMARY'))
        respPos = valid_position(description.find('RESPONSIBILITIES'))
        skillPos = valid_position(description.find('REQUIRED SKILLS'))
        eduPos = valid_position(description.find('REQUIRED EDUCATION'))

        try:
            jobDesc = response.xpath('substring("{}", {}, {})'.format(
                description, sumPos, (respPos - sumPos))).get()
        except:
            jobDesc = ''
        try:
            skills = response.xpath('substring("{}", {}, {})'.format(
                description, skillPos, (eduPos - skillPos))).get()
        except:
            skills = ''
        try:
            education = response.xpath('substring("{}", {})'.format(
                description, eduPos)).get()
        except:
            education = ''
        try:
            reponsibilities = response.xpath('substring("{}", {}, {})'.format(
                description, respPos, (skillPos - respPos))).get()
        except:
            reponsibilities = ''

        contact = response.xpath(
            '//div[@class="links"]/a[@class="view-job-link"]/@href').get()

        job["ID"] = 1
        job["website"] = self.meta["name"]
        job["url"] = response.url
        job["jobTitle"] = title
        job["company"] = company
        job["jobType"] = "Fulltime"
        job["positionLevel"] = "N/A"
        job["uploadDate"] = date
        job["year"] = year
        job["deadline"] = "N/A"
        job["town"] = town
        job["country"] = country
        job["contact"] = contact
        job["readvertised"] = "N"
        job["technology"] = skills
        job["description"] = jobDesc
        job["employmentType"] = ''
        job["skills"] = skills
        job["industry"] = ''
        job["responsibilities"] = reponsibilities
        job["requirements"] = education
        job["country"] = country

        return job
コード例 #17
0
    def parse(self, response):
        job = Job()

        desc = response.xpath(
            '//*[contains(text(), "Description")]/following::p[1]/text() | //div[@id="carrerbox"]/descendant-or-self::div[@class="wpb_wrapper"]/p/strong/span[contains(text(), "Job description")]/following::p[1]/text()'
        ).get()
        resp = ''.join(
            response.xpath(
                '//*[contains(text(), "Responsibilities")]/following::ul[1]/li/text() | //div[@class="jobs-subheading"]/span/strong[contains(text(), "Responsibilities")]/ancestor::div/following-sibling::div/ul[1]/li/div/p/text()'
            ).getall())
        qualifications = ''.join(
            response.xpath(
                '//*[contains(text(), "Responsibilities")]/following::ul[2]/li/text() | //div[@class="jobs-subheading"]/span/strong[contains(text(), "Responsibilities")]/ancestor::div/following-sibling::div/ul[1]/li/div/p/text()'
            ).getall())
        if qualifications.find("Degree") == -1:
            education = "N/A"
        else:
            education = qualifications[qualifications.
                                       find("Degree"):qualifications.find(".")]

        contact = response.xpath(
            '//p/strong[contains(text(), "@")]/text() | //p/span/strong[contains(text(), "How to Apply")]/following::p[1]/span/a/@href'
        ).get()
        title = response.xpath(
            '//h1[contains(@class, "entry-title")]/text()').get()
        position = response.xpath(
            '//div[@class="wpb_wrapper"]/p[contains(text(), "Position")]/text()'
        ).get()
        if position is not None:
            position = position.split(':')[-1].strip()
        location = response.xpath(
            '//div[@class="wpb_wrapper"]/p[contains(text(), "Location")]/text()'
        ).get()
        if location is not None:
            location = location.split(':')[-1].strip()
        industry = response.xpath('//div[@class="row"]/p[1]/text()').get()

        job["ID"] = 1
        job["website"] = self.meta["name"]
        job["url"] = response.url
        job["jobTitle"] = title
        job["company"] = title.split('Job')[-1]
        job["jobType"] = "Fulltime"
        job["positionLevel"] = position
        job["uploadDate"] = "N/A"
        job["year"] = "2020"
        job["deadline"] = "N/A"
        job["town"] = location
        job["country"] = "Kenya"
        job["contact"] = contact
        job["readvertised"] = "N/A"
        job["technology"] = resp
        job["description"] = desc
        job["employmentType"] = "Fulltime"
        job["skills"] = qualifications
        job["industry"] = industry
        job["responsibilities"] = resp
        job["requirements"] = education
        job["country"] = "Kenya"

        return job