def parse(self, response): """Get the joblinks and hand them off. """ s = Selector(response) jobs = s.xpath(self.job_selector) for job in jobs: item = JobItem() joblink = job.xpath('//a[@class="job-link"]/@href').extract_first() if not joblink: continue item = JobItem() item["url"] = urljoin(self.root, joblink) item["site"] = "StackOverflow" item["title"] = job.xpath("//h2/a/@title").extract_first() item["text"] = job.xpath('//a[@class="post-tag"]/text()').extract() try: posted = s.xpath( '//p[@class="-posted-date"]/text()').extract_first() parsed = utilities.stackoverflowtime(posted).isoformat() item["date_posted"] = parsed except Exception as e: self.logger.error(e) request = Request(item["url"], callback=self.parse_job, meta={"item": item}) yield request
def parse(self, response): """Get the joblinks and hand them off. """ s = Selector(response) jobs = s.css(self.job_selector) for job in jobs: joblink = job.xpath("h2/a/@href").extract_first() if not joblink: continue item = JobItem() item["url"] = urljoin(self.root, joblink) item["title"] = job.xpath("h2/a/@title").extract_first() item["text"] = job.xpath( 'table//span[@class="summary"]/text()').extract() try: posted = s.xpath( '//span[@class="date"]/text()').extract_first() if posted == "30+ days ago": posted.replace("+", "") parsed = utilities.naturaltime(posted).isoformat() item["date_posted"] = parsed except Exception as e: self.logger.error(e) request = Request(item["url"], callback=self.parse_job, meta={"item": item}) yield request
def parse(self, response): data = json.loads(response.text) converter = html2text.HTML2Text() for job in data["hits"]["hits"]: item = JobItem() item["url"] = urljoin("https://www.workingnomads.co/jobs/", job["_source"]["slug"]) item["title"] = job["_source"]["title"] item["site"] = "WorkingNomads" item["text"] = converter.handle(job["_source"]["description"]) item["text"] = [item["text"] + " ".join(item.get("tags", []))] try: posted = converter.handle(job["_source"]["pub_date"]) item["date_posted"] = posted.split("+")[0] except Exception as e: self.logger.error(e) yield item
def parse_job(self, response): """Parse a joblink into a JobItem. """ s = Selector(response) item = JobItem() item["url"] = response.url item["site"] = "LandingJobs" item["title"] = s.css("h1::text").extract_first() item["text"] = s.xpath( '//section[@class="ld-job-details"]//text()').extract() item["text"].extend( s.xpath( '//section[@class="ld-job-offer-section"]//text()').extract()) yield item
def parse_job(self, response): """Parse a joblink into a JobItem. """ s = Selector(response) item = JobItem() item["url"] = response.url item["site"] = "FlexJobs" item["title"] = s.css("h1::text").extract_first() item["text"] = s.css("#job-description p::text").extract() item["text"].extend(s.css("td::text, th::text").extract()) try: item["date_posted"] = parse_time(item["text"]) except Exception as e: self.logger.error(e) yield item
def parse_job(self, response): """Parse a joblink into a JobItem. """ s = Selector(response) item = JobItem() item["url"] = response.url item["site"] = "VirtualVocations" item["title"] = s.css("h1::text").extract_first() item["text"] = s.xpath('//div[@id="job_details"]//text()').extract() try: posted = s.xpath('//div[@class="col-sm-6"]/p/text()')[8].extract() item["date_posted"] = parse_date(posted).isoformat() except Exception as e: self.logger.error(e) yield item
def parse_job(self, response): """Parse a joblink into a JobItem. """ s = Selector(response) item = JobItem() item["url"] = response.url.split("?")[0] item["site"] = "CareerBuilder" item["title"] = s.css(".card").css("h1::text").extract_first() item["text"] = s.css(".job-facts::text").extract() item["text"].extend(s.css(".item").css(".tag::text").extract()) item["text"].extend(s.css(".description::text").extract()) try: posted = s.xpath('//h3[@id="job-begin-date"]/text()').extract_first() item["date_posted"] = utilities.naturaltime( posted.replace("Posted ", "") ).isoformat() except Exception as e: self.logger.error(e) yield item
def parse_job(self, response): """Parse a joblink into a JobItem. """ s = Selector(response) item = JobItem() item["url"] = response.url item["site"] = "RemoteWorking" item["title"] = s.css("h1::text").extract_first() item["text"] = s.xpath( '//div[@itemprop="description"]//text()').extract() try: posted = s.xpath( '//li[@class="date-posted"]//text()').extract_first() item["date_posted"] = utilities.naturaltime( posted.replace("Posted ", "")).isoformat() except Exception as e: self.logger.error(e) yield item
def parse_job(self, response): """Parse a joblink into a JobItem. """ s = Selector(response) item = JobItem() item["url"] = response.url item["site"] = "Remote.co" item["title"] = s.css("h1::text").extract_first() item["company"] = s.xpath('//strong[@itemprop="name"]/text()').extract_first() job = s.css(".job-description") job.xpath("p[1]") item["text"] = s.xpath('//div[@class="job_description"]//text()').extract() try: posted = s.xpath("//time//text()").extract_first() item["date_posted"] = utilities.naturaltime( posted.replace("Posted ", "") ).isoformat() except Exception as e: self.logger.error(e) yield item