Exemple #1
0
 def parse(self, response):
     """Get the joblinks and hand them off.
     """
     s = Selector(response)
     jobs = s.xpath(self.job_selector)
     for job in jobs:
         item = JobItem()
         joblink = job.xpath('//a[@class="job-link"]/@href').extract_first()
         if not joblink:
             continue
         item = JobItem()
         item["url"] = urljoin(self.root, joblink)
         item["site"] = "StackOverflow"
         item["title"] = job.xpath("//h2/a/@title").extract_first()
         item["text"] = job.xpath('//a[@class="post-tag"]/text()').extract()
         try:
             posted = s.xpath(
                 '//p[@class="-posted-date"]/text()').extract_first()
             parsed = utilities.stackoverflowtime(posted).isoformat()
             item["date_posted"] = parsed
         except Exception as e:
             self.logger.error(e)
         request = Request(item["url"],
                           callback=self.parse_job,
                           meta={"item": item})
         yield request
Exemple #2
0
 def parse(self, response):
     """Get the joblinks and hand them off.
     """
     s = Selector(response)
     jobs = s.css(self.job_selector)
     for job in jobs:
         joblink = job.xpath("h2/a/@href").extract_first()
         if not joblink:
             continue
         item = JobItem()
         item["url"] = urljoin(self.root, joblink)
         item["title"] = job.xpath("h2/a/@title").extract_first()
         item["text"] = job.xpath(
             'table//span[@class="summary"]/text()').extract()
         try:
             posted = s.xpath(
                 '//span[@class="date"]/text()').extract_first()
             if posted == "30+ days ago":
                 posted.replace("+", "")
             parsed = utilities.naturaltime(posted).isoformat()
             item["date_posted"] = parsed
         except Exception as e:
             self.logger.error(e)
         request = Request(item["url"],
                           callback=self.parse_job,
                           meta={"item": item})
         yield request
Exemple #3
0
 def parse(self, response):
     data = json.loads(response.text)
     converter = html2text.HTML2Text()
     for job in data["hits"]["hits"]:
         item = JobItem()
         item["url"] = urljoin("https://www.workingnomads.co/jobs/",
                               job["_source"]["slug"])
         item["title"] = job["_source"]["title"]
         item["site"] = "WorkingNomads"
         item["text"] = converter.handle(job["_source"]["description"])
         item["text"] = [item["text"] + " ".join(item.get("tags", []))]
         try:
             posted = converter.handle(job["_source"]["pub_date"])
             item["date_posted"] = posted.split("+")[0]
         except Exception as e:
             self.logger.error(e)
         yield item
Exemple #4
0
 def parse_job(self, response):
     """Parse a joblink into a JobItem.
     """
     s = Selector(response)
     item = JobItem()
     item["url"] = response.url
     item["site"] = "LandingJobs"
     item["title"] = s.css("h1::text").extract_first()
     item["text"] = s.xpath(
         '//section[@class="ld-job-details"]//text()').extract()
     item["text"].extend(
         s.xpath(
             '//section[@class="ld-job-offer-section"]//text()').extract())
     yield item
Exemple #5
0
 def parse_job(self, response):
     """Parse a joblink into a JobItem.
     """
     s = Selector(response)
     item = JobItem()
     item["url"] = response.url
     item["site"] = "FlexJobs"
     item["title"] = s.css("h1::text").extract_first()
     item["text"] = s.css("#job-description p::text").extract()
     item["text"].extend(s.css("td::text, th::text").extract())
     try:
         item["date_posted"] = parse_time(item["text"])
     except Exception as e:
         self.logger.error(e)
     yield item
Exemple #6
0
 def parse_job(self, response):
     """Parse a joblink into a JobItem.
     """
     s = Selector(response)
     item = JobItem()
     item["url"] = response.url
     item["site"] = "VirtualVocations"
     item["title"] = s.css("h1::text").extract_first()
     item["text"] = s.xpath('//div[@id="job_details"]//text()').extract()
     try:
         posted = s.xpath('//div[@class="col-sm-6"]/p/text()')[8].extract()
         item["date_posted"] = parse_date(posted).isoformat()
     except Exception as e:
         self.logger.error(e)
     yield item
Exemple #7
0
 def parse_job(self, response):
     """Parse a joblink into a JobItem.
     """
     s = Selector(response)
     item = JobItem()
     item["url"] = response.url.split("?")[0]
     item["site"] = "CareerBuilder"
     item["title"] = s.css(".card").css("h1::text").extract_first()
     item["text"] = s.css(".job-facts::text").extract()
     item["text"].extend(s.css(".item").css(".tag::text").extract())
     item["text"].extend(s.css(".description::text").extract())
     try:
         posted = s.xpath('//h3[@id="job-begin-date"]/text()').extract_first()
         item["date_posted"] = utilities.naturaltime(
             posted.replace("Posted ", "")
         ).isoformat()
     except Exception as e:
         self.logger.error(e)
     yield item
Exemple #8
0
    def parse_job(self, response):
        """Parse a joblink into a JobItem.
        """
        s = Selector(response)
        item = JobItem()
        item["url"] = response.url
        item["site"] = "RemoteWorking"
        item["title"] = s.css("h1::text").extract_first()
        item["text"] = s.xpath(
            '//div[@itemprop="description"]//text()').extract()

        try:
            posted = s.xpath(
                '//li[@class="date-posted"]//text()').extract_first()
            item["date_posted"] = utilities.naturaltime(
                posted.replace("Posted ", "")).isoformat()
        except Exception as e:
            self.logger.error(e)
        yield item
Exemple #9
0
 def parse_job(self, response):
     """Parse a joblink into a JobItem.
     """
     s = Selector(response)
     item = JobItem()
     item["url"] = response.url
     item["site"] = "Remote.co"
     item["title"] = s.css("h1::text").extract_first()
     item["company"] = s.xpath('//strong[@itemprop="name"]/text()').extract_first()
     job = s.css(".job-description")
     job.xpath("p[1]")
     item["text"] = s.xpath('//div[@class="job_description"]//text()').extract()
     try:
         posted = s.xpath("//time//text()").extract_first()
         item["date_posted"] = utilities.naturaltime(
             posted.replace("Posted ", "")
         ).isoformat()
     except Exception as e:
         self.logger.error(e)
     yield item